xref: /openbmc/qemu/tcg/tcg.c (revision 8109598b683ad2b6b02cd9c79dc15b7fc0b685aa)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpDivRem {
990     TCGOutOp base;
991     void (*out_rr01r)(TCGContext *s, TCGType type,
992                       TCGReg a0, TCGReg a1, TCGReg a4);
993 } TCGOutOpDivRem;
994 
995 typedef struct TCGOutOpUnary {
996     TCGOutOp base;
997     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
998 } TCGOutOpUnary;
999 
1000 typedef struct TCGOutOpSubtract {
1001     TCGOutOp base;
1002     void (*out_rrr)(TCGContext *s, TCGType type,
1003                     TCGReg a0, TCGReg a1, TCGReg a2);
1004     void (*out_rir)(TCGContext *s, TCGType type,
1005                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1006 } TCGOutOpSubtract;
1007 
1008 #include "tcg-target.c.inc"
1009 
1010 #ifndef CONFIG_TCG_INTERPRETER
1011 /* Validate CPUTLBDescFast placement. */
1012 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1013                         sizeof(CPUNegativeOffsetState))
1014                   < MIN_TLB_MASK_TABLE_OFS);
1015 #endif
1016 
1017 /*
1018  * Register V as the TCGOutOp for O.
1019  * This verifies that V is of type T, otherwise give a nice compiler error.
1020  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1021  */
1022 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1023 
1024 /* Register allocation descriptions for every TCGOpcode. */
1025 static const TCGOutOp * const all_outop[NB_OPS] = {
1026     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1027     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1028     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1029     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1030     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1031     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1032     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1033     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1034     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1035     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1036     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1037     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1038     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1039     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1040     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1041     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1042     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1043     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1044     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1045 };
1046 
1047 #undef OUTOP
1048 
1049 /*
1050  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1051  * and registered the target's TCG globals) must register with this function
1052  * before initiating translation.
1053  *
1054  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1055  * of tcg_region_init() for the reasoning behind this.
1056  *
1057  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1058  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1059  * is not used anymore for translation once this function is called.
1060  *
1061  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1062  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1063  * modes.
1064  */
1065 #ifdef CONFIG_USER_ONLY
1066 void tcg_register_thread(void)
1067 {
1068     tcg_ctx = &tcg_init_ctx;
1069 }
1070 #else
1071 void tcg_register_thread(void)
1072 {
1073     TCGContext *s = g_malloc(sizeof(*s));
1074     unsigned int i, n;
1075 
1076     *s = tcg_init_ctx;
1077 
1078     /* Relink mem_base.  */
1079     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1080         if (tcg_init_ctx.temps[i].mem_base) {
1081             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1082             tcg_debug_assert(b >= 0 && b < n);
1083             s->temps[i].mem_base = &s->temps[b];
1084         }
1085     }
1086 
1087     /* Claim an entry in tcg_ctxs */
1088     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1089     g_assert(n < tcg_max_ctxs);
1090     qatomic_set(&tcg_ctxs[n], s);
1091 
1092     if (n > 0) {
1093         tcg_region_initial_alloc(s);
1094     }
1095 
1096     tcg_ctx = s;
1097 }
1098 #endif /* !CONFIG_USER_ONLY */
1099 
1100 /* pool based memory allocation */
1101 void *tcg_malloc_internal(TCGContext *s, int size)
1102 {
1103     TCGPool *p;
1104     int pool_size;
1105 
1106     if (size > TCG_POOL_CHUNK_SIZE) {
1107         /* big malloc: insert a new pool (XXX: could optimize) */
1108         p = g_malloc(sizeof(TCGPool) + size);
1109         p->size = size;
1110         p->next = s->pool_first_large;
1111         s->pool_first_large = p;
1112         return p->data;
1113     } else {
1114         p = s->pool_current;
1115         if (!p) {
1116             p = s->pool_first;
1117             if (!p)
1118                 goto new_pool;
1119         } else {
1120             if (!p->next) {
1121             new_pool:
1122                 pool_size = TCG_POOL_CHUNK_SIZE;
1123                 p = g_malloc(sizeof(TCGPool) + pool_size);
1124                 p->size = pool_size;
1125                 p->next = NULL;
1126                 if (s->pool_current) {
1127                     s->pool_current->next = p;
1128                 } else {
1129                     s->pool_first = p;
1130                 }
1131             } else {
1132                 p = p->next;
1133             }
1134         }
1135     }
1136     s->pool_current = p;
1137     s->pool_cur = p->data + size;
1138     s->pool_end = p->data + p->size;
1139     return p->data;
1140 }
1141 
1142 void tcg_pool_reset(TCGContext *s)
1143 {
1144     TCGPool *p, *t;
1145     for (p = s->pool_first_large; p; p = t) {
1146         t = p->next;
1147         g_free(p);
1148     }
1149     s->pool_first_large = NULL;
1150     s->pool_cur = s->pool_end = NULL;
1151     s->pool_current = NULL;
1152 }
1153 
1154 /*
1155  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1156  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1157  * We only use these for layout in tcg_out_ld_helper_ret and
1158  * tcg_out_st_helper_args, and share them between several of
1159  * the helpers, with the end result that it's easier to build manually.
1160  */
1161 
1162 #if TCG_TARGET_REG_BITS == 32
1163 # define dh_typecode_ttl  dh_typecode_i32
1164 #else
1165 # define dh_typecode_ttl  dh_typecode_i64
1166 #endif
1167 
1168 static TCGHelperInfo info_helper_ld32_mmu = {
1169     .flags = TCG_CALL_NO_WG,
1170     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1171               | dh_typemask(env, 1)
1172               | dh_typemask(i64, 2)  /* uint64_t addr */
1173               | dh_typemask(i32, 3)  /* unsigned oi */
1174               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1175 };
1176 
1177 static TCGHelperInfo info_helper_ld64_mmu = {
1178     .flags = TCG_CALL_NO_WG,
1179     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1180               | dh_typemask(env, 1)
1181               | dh_typemask(i64, 2)  /* uint64_t addr */
1182               | dh_typemask(i32, 3)  /* unsigned oi */
1183               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1184 };
1185 
1186 static TCGHelperInfo info_helper_ld128_mmu = {
1187     .flags = TCG_CALL_NO_WG,
1188     .typemask = dh_typemask(i128, 0) /* return Int128 */
1189               | dh_typemask(env, 1)
1190               | dh_typemask(i64, 2)  /* uint64_t addr */
1191               | dh_typemask(i32, 3)  /* unsigned oi */
1192               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1193 };
1194 
1195 static TCGHelperInfo info_helper_st32_mmu = {
1196     .flags = TCG_CALL_NO_WG,
1197     .typemask = dh_typemask(void, 0)
1198               | dh_typemask(env, 1)
1199               | dh_typemask(i64, 2)  /* uint64_t addr */
1200               | dh_typemask(i32, 3)  /* uint32_t data */
1201               | dh_typemask(i32, 4)  /* unsigned oi */
1202               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1203 };
1204 
1205 static TCGHelperInfo info_helper_st64_mmu = {
1206     .flags = TCG_CALL_NO_WG,
1207     .typemask = dh_typemask(void, 0)
1208               | dh_typemask(env, 1)
1209               | dh_typemask(i64, 2)  /* uint64_t addr */
1210               | dh_typemask(i64, 3)  /* uint64_t data */
1211               | dh_typemask(i32, 4)  /* unsigned oi */
1212               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1213 };
1214 
1215 static TCGHelperInfo info_helper_st128_mmu = {
1216     .flags = TCG_CALL_NO_WG,
1217     .typemask = dh_typemask(void, 0)
1218               | dh_typemask(env, 1)
1219               | dh_typemask(i64, 2)  /* uint64_t addr */
1220               | dh_typemask(i128, 3) /* Int128 data */
1221               | dh_typemask(i32, 4)  /* unsigned oi */
1222               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1223 };
1224 
1225 #ifdef CONFIG_TCG_INTERPRETER
1226 static ffi_type *typecode_to_ffi(int argmask)
1227 {
1228     /*
1229      * libffi does not support __int128_t, so we have forced Int128
1230      * to use the structure definition instead of the builtin type.
1231      */
1232     static ffi_type *ffi_type_i128_elements[3] = {
1233         &ffi_type_uint64,
1234         &ffi_type_uint64,
1235         NULL
1236     };
1237     static ffi_type ffi_type_i128 = {
1238         .size = 16,
1239         .alignment = __alignof__(Int128),
1240         .type = FFI_TYPE_STRUCT,
1241         .elements = ffi_type_i128_elements,
1242     };
1243 
1244     switch (argmask) {
1245     case dh_typecode_void:
1246         return &ffi_type_void;
1247     case dh_typecode_i32:
1248         return &ffi_type_uint32;
1249     case dh_typecode_s32:
1250         return &ffi_type_sint32;
1251     case dh_typecode_i64:
1252         return &ffi_type_uint64;
1253     case dh_typecode_s64:
1254         return &ffi_type_sint64;
1255     case dh_typecode_ptr:
1256         return &ffi_type_pointer;
1257     case dh_typecode_i128:
1258         return &ffi_type_i128;
1259     }
1260     g_assert_not_reached();
1261 }
1262 
1263 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1264 {
1265     unsigned typemask = info->typemask;
1266     struct {
1267         ffi_cif cif;
1268         ffi_type *args[];
1269     } *ca;
1270     ffi_status status;
1271     int nargs;
1272 
1273     /* Ignoring the return type, find the last non-zero field. */
1274     nargs = 32 - clz32(typemask >> 3);
1275     nargs = DIV_ROUND_UP(nargs, 3);
1276     assert(nargs <= MAX_CALL_IARGS);
1277 
1278     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1279     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1280     ca->cif.nargs = nargs;
1281 
1282     if (nargs != 0) {
1283         ca->cif.arg_types = ca->args;
1284         for (int j = 0; j < nargs; ++j) {
1285             int typecode = extract32(typemask, (j + 1) * 3, 3);
1286             ca->args[j] = typecode_to_ffi(typecode);
1287         }
1288     }
1289 
1290     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1291                           ca->cif.rtype, ca->cif.arg_types);
1292     assert(status == FFI_OK);
1293 
1294     return &ca->cif;
1295 }
1296 
1297 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1298 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1299 #else
1300 #define HELPER_INFO_INIT(I)      (&(I)->init)
1301 #define HELPER_INFO_INIT_VAL(I)  1
1302 #endif /* CONFIG_TCG_INTERPRETER */
1303 
1304 static inline bool arg_slot_reg_p(unsigned arg_slot)
1305 {
1306     /*
1307      * Split the sizeof away from the comparison to avoid Werror from
1308      * "unsigned < 0 is always false", when iarg_regs is empty.
1309      */
1310     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1311     return arg_slot < nreg;
1312 }
1313 
1314 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1315 {
1316     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1317     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1318 
1319     tcg_debug_assert(stk_slot < max);
1320     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1321 }
1322 
1323 typedef struct TCGCumulativeArgs {
1324     int arg_idx;                /* tcg_gen_callN args[] */
1325     int info_in_idx;            /* TCGHelperInfo in[] */
1326     int arg_slot;               /* regs+stack slot */
1327     int ref_slot;               /* stack slots for references */
1328 } TCGCumulativeArgs;
1329 
1330 static void layout_arg_even(TCGCumulativeArgs *cum)
1331 {
1332     cum->arg_slot += cum->arg_slot & 1;
1333 }
1334 
1335 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1336                          TCGCallArgumentKind kind)
1337 {
1338     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1339 
1340     *loc = (TCGCallArgumentLoc){
1341         .kind = kind,
1342         .arg_idx = cum->arg_idx,
1343         .arg_slot = cum->arg_slot,
1344     };
1345     cum->info_in_idx++;
1346     cum->arg_slot++;
1347 }
1348 
1349 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1350                                 TCGHelperInfo *info, int n)
1351 {
1352     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1353 
1354     for (int i = 0; i < n; ++i) {
1355         /* Layout all using the same arg_idx, adjusting the subindex. */
1356         loc[i] = (TCGCallArgumentLoc){
1357             .kind = TCG_CALL_ARG_NORMAL,
1358             .arg_idx = cum->arg_idx,
1359             .tmp_subindex = i,
1360             .arg_slot = cum->arg_slot + i,
1361         };
1362     }
1363     cum->info_in_idx += n;
1364     cum->arg_slot += n;
1365 }
1366 
1367 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1368 {
1369     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1370     int n = 128 / TCG_TARGET_REG_BITS;
1371 
1372     /* The first subindex carries the pointer. */
1373     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1374 
1375     /*
1376      * The callee is allowed to clobber memory associated with
1377      * structure pass by-reference.  Therefore we must make copies.
1378      * Allocate space from "ref_slot", which will be adjusted to
1379      * follow the parameters on the stack.
1380      */
1381     loc[0].ref_slot = cum->ref_slot;
1382 
1383     /*
1384      * Subsequent words also go into the reference slot, but
1385      * do not accumulate into the regular arguments.
1386      */
1387     for (int i = 1; i < n; ++i) {
1388         loc[i] = (TCGCallArgumentLoc){
1389             .kind = TCG_CALL_ARG_BY_REF_N,
1390             .arg_idx = cum->arg_idx,
1391             .tmp_subindex = i,
1392             .ref_slot = cum->ref_slot + i,
1393         };
1394     }
1395     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1396     cum->ref_slot += n;
1397 }
1398 
1399 static void init_call_layout(TCGHelperInfo *info)
1400 {
1401     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1402     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1403     unsigned typemask = info->typemask;
1404     unsigned typecode;
1405     TCGCumulativeArgs cum = { };
1406 
1407     /*
1408      * Parse and place any function return value.
1409      */
1410     typecode = typemask & 7;
1411     switch (typecode) {
1412     case dh_typecode_void:
1413         info->nr_out = 0;
1414         break;
1415     case dh_typecode_i32:
1416     case dh_typecode_s32:
1417     case dh_typecode_ptr:
1418         info->nr_out = 1;
1419         info->out_kind = TCG_CALL_RET_NORMAL;
1420         break;
1421     case dh_typecode_i64:
1422     case dh_typecode_s64:
1423         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1424         info->out_kind = TCG_CALL_RET_NORMAL;
1425         /* Query the last register now to trigger any assert early. */
1426         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1427         break;
1428     case dh_typecode_i128:
1429         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1430         info->out_kind = TCG_TARGET_CALL_RET_I128;
1431         switch (TCG_TARGET_CALL_RET_I128) {
1432         case TCG_CALL_RET_NORMAL:
1433             /* Query the last register now to trigger any assert early. */
1434             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1435             break;
1436         case TCG_CALL_RET_BY_VEC:
1437             /* Query the single register now to trigger any assert early. */
1438             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1439             break;
1440         case TCG_CALL_RET_BY_REF:
1441             /*
1442              * Allocate the first argument to the output.
1443              * We don't need to store this anywhere, just make it
1444              * unavailable for use in the input loop below.
1445              */
1446             cum.arg_slot = 1;
1447             break;
1448         default:
1449             qemu_build_not_reached();
1450         }
1451         break;
1452     default:
1453         g_assert_not_reached();
1454     }
1455 
1456     /*
1457      * Parse and place function arguments.
1458      */
1459     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1460         TCGCallArgumentKind kind;
1461         TCGType type;
1462 
1463         typecode = typemask & 7;
1464         switch (typecode) {
1465         case dh_typecode_i32:
1466         case dh_typecode_s32:
1467             type = TCG_TYPE_I32;
1468             break;
1469         case dh_typecode_i64:
1470         case dh_typecode_s64:
1471             type = TCG_TYPE_I64;
1472             break;
1473         case dh_typecode_ptr:
1474             type = TCG_TYPE_PTR;
1475             break;
1476         case dh_typecode_i128:
1477             type = TCG_TYPE_I128;
1478             break;
1479         default:
1480             g_assert_not_reached();
1481         }
1482 
1483         switch (type) {
1484         case TCG_TYPE_I32:
1485             switch (TCG_TARGET_CALL_ARG_I32) {
1486             case TCG_CALL_ARG_EVEN:
1487                 layout_arg_even(&cum);
1488                 /* fall through */
1489             case TCG_CALL_ARG_NORMAL:
1490                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1491                 break;
1492             case TCG_CALL_ARG_EXTEND:
1493                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1494                 layout_arg_1(&cum, info, kind);
1495                 break;
1496             default:
1497                 qemu_build_not_reached();
1498             }
1499             break;
1500 
1501         case TCG_TYPE_I64:
1502             switch (TCG_TARGET_CALL_ARG_I64) {
1503             case TCG_CALL_ARG_EVEN:
1504                 layout_arg_even(&cum);
1505                 /* fall through */
1506             case TCG_CALL_ARG_NORMAL:
1507                 if (TCG_TARGET_REG_BITS == 32) {
1508                     layout_arg_normal_n(&cum, info, 2);
1509                 } else {
1510                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1511                 }
1512                 break;
1513             default:
1514                 qemu_build_not_reached();
1515             }
1516             break;
1517 
1518         case TCG_TYPE_I128:
1519             switch (TCG_TARGET_CALL_ARG_I128) {
1520             case TCG_CALL_ARG_EVEN:
1521                 layout_arg_even(&cum);
1522                 /* fall through */
1523             case TCG_CALL_ARG_NORMAL:
1524                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1525                 break;
1526             case TCG_CALL_ARG_BY_REF:
1527                 layout_arg_by_ref(&cum, info);
1528                 break;
1529             default:
1530                 qemu_build_not_reached();
1531             }
1532             break;
1533 
1534         default:
1535             g_assert_not_reached();
1536         }
1537     }
1538     info->nr_in = cum.info_in_idx;
1539 
1540     /* Validate that we didn't overrun the input array. */
1541     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1542     /* Validate the backend has enough argument space. */
1543     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1544 
1545     /*
1546      * Relocate the "ref_slot" area to the end of the parameters.
1547      * Minimizing this stack offset helps code size for x86,
1548      * which has a signed 8-bit offset encoding.
1549      */
1550     if (cum.ref_slot != 0) {
1551         int ref_base = 0;
1552 
1553         if (cum.arg_slot > max_reg_slots) {
1554             int align = __alignof(Int128) / sizeof(tcg_target_long);
1555 
1556             ref_base = cum.arg_slot - max_reg_slots;
1557             if (align > 1) {
1558                 ref_base = ROUND_UP(ref_base, align);
1559             }
1560         }
1561         assert(ref_base + cum.ref_slot <= max_stk_slots);
1562         ref_base += max_reg_slots;
1563 
1564         if (ref_base != 0) {
1565             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1566                 TCGCallArgumentLoc *loc = &info->in[i];
1567                 switch (loc->kind) {
1568                 case TCG_CALL_ARG_BY_REF:
1569                 case TCG_CALL_ARG_BY_REF_N:
1570                     loc->ref_slot += ref_base;
1571                     break;
1572                 default:
1573                     break;
1574                 }
1575             }
1576         }
1577     }
1578 }
1579 
1580 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1581 static void process_constraint_sets(void);
1582 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1583                                             TCGReg reg, const char *name);
1584 
1585 static void tcg_context_init(unsigned max_threads)
1586 {
1587     TCGContext *s = &tcg_init_ctx;
1588     int n, i;
1589     TCGTemp *ts;
1590 
1591     memset(s, 0, sizeof(*s));
1592     s->nb_globals = 0;
1593 
1594     init_call_layout(&info_helper_ld32_mmu);
1595     init_call_layout(&info_helper_ld64_mmu);
1596     init_call_layout(&info_helper_ld128_mmu);
1597     init_call_layout(&info_helper_st32_mmu);
1598     init_call_layout(&info_helper_st64_mmu);
1599     init_call_layout(&info_helper_st128_mmu);
1600 
1601     tcg_target_init(s);
1602     process_constraint_sets();
1603 
1604     /* Reverse the order of the saved registers, assuming they're all at
1605        the start of tcg_target_reg_alloc_order.  */
1606     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1607         int r = tcg_target_reg_alloc_order[n];
1608         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1609             break;
1610         }
1611     }
1612     for (i = 0; i < n; ++i) {
1613         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1614     }
1615     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1616         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1617     }
1618 
1619     tcg_ctx = s;
1620     /*
1621      * In user-mode we simply share the init context among threads, since we
1622      * use a single region. See the documentation tcg_region_init() for the
1623      * reasoning behind this.
1624      * In system-mode we will have at most max_threads TCG threads.
1625      */
1626 #ifdef CONFIG_USER_ONLY
1627     tcg_ctxs = &tcg_ctx;
1628     tcg_cur_ctxs = 1;
1629     tcg_max_ctxs = 1;
1630 #else
1631     tcg_max_ctxs = max_threads;
1632     tcg_ctxs = g_new0(TCGContext *, max_threads);
1633 #endif
1634 
1635     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1636     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1637     tcg_env = temp_tcgv_ptr(ts);
1638 }
1639 
1640 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1641 {
1642     tcg_context_init(max_threads);
1643     tcg_region_init(tb_size, splitwx, max_threads);
1644 }
1645 
1646 /*
1647  * Allocate TBs right before their corresponding translated code, making
1648  * sure that TBs and code are on different cache lines.
1649  */
1650 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1651 {
1652     uintptr_t align = qemu_icache_linesize;
1653     TranslationBlock *tb;
1654     void *next;
1655 
1656  retry:
1657     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1658     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1659 
1660     if (unlikely(next > s->code_gen_highwater)) {
1661         if (tcg_region_alloc(s)) {
1662             return NULL;
1663         }
1664         goto retry;
1665     }
1666     qatomic_set(&s->code_gen_ptr, next);
1667     return tb;
1668 }
1669 
1670 void tcg_prologue_init(void)
1671 {
1672     TCGContext *s = tcg_ctx;
1673     size_t prologue_size;
1674 
1675     s->code_ptr = s->code_gen_ptr;
1676     s->code_buf = s->code_gen_ptr;
1677     s->data_gen_ptr = NULL;
1678 
1679 #ifndef CONFIG_TCG_INTERPRETER
1680     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1681 #endif
1682 
1683     s->pool_labels = NULL;
1684 
1685     qemu_thread_jit_write();
1686     /* Generate the prologue.  */
1687     tcg_target_qemu_prologue(s);
1688 
1689     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1690     {
1691         int result = tcg_out_pool_finalize(s);
1692         tcg_debug_assert(result == 0);
1693     }
1694 
1695     prologue_size = tcg_current_code_size(s);
1696     perf_report_prologue(s->code_gen_ptr, prologue_size);
1697 
1698 #ifndef CONFIG_TCG_INTERPRETER
1699     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1700                         (uintptr_t)s->code_buf, prologue_size);
1701 #endif
1702 
1703     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1704         FILE *logfile = qemu_log_trylock();
1705         if (logfile) {
1706             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1707             if (s->data_gen_ptr) {
1708                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1709                 size_t data_size = prologue_size - code_size;
1710                 size_t i;
1711 
1712                 disas(logfile, s->code_gen_ptr, code_size);
1713 
1714                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1715                     if (sizeof(tcg_target_ulong) == 8) {
1716                         fprintf(logfile,
1717                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1718                                 (uintptr_t)s->data_gen_ptr + i,
1719                                 *(uint64_t *)(s->data_gen_ptr + i));
1720                     } else {
1721                         fprintf(logfile,
1722                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1723                                 (uintptr_t)s->data_gen_ptr + i,
1724                                 *(uint32_t *)(s->data_gen_ptr + i));
1725                     }
1726                 }
1727             } else {
1728                 disas(logfile, s->code_gen_ptr, prologue_size);
1729             }
1730             fprintf(logfile, "\n");
1731             qemu_log_unlock(logfile);
1732         }
1733     }
1734 
1735 #ifndef CONFIG_TCG_INTERPRETER
1736     /*
1737      * Assert that goto_ptr is implemented completely, setting an epilogue.
1738      * For tci, we use NULL as the signal to return from the interpreter,
1739      * so skip this check.
1740      */
1741     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1742 #endif
1743 
1744     tcg_region_prologue_set(s);
1745 }
1746 
1747 void tcg_func_start(TCGContext *s)
1748 {
1749     tcg_pool_reset(s);
1750     s->nb_temps = s->nb_globals;
1751 
1752     /* No temps have been previously allocated for size or locality.  */
1753     tcg_temp_ebb_reset_freed(s);
1754 
1755     /* No constant temps have been previously allocated. */
1756     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1757         if (s->const_table[i]) {
1758             g_hash_table_remove_all(s->const_table[i]);
1759         }
1760     }
1761 
1762     s->nb_ops = 0;
1763     s->nb_labels = 0;
1764     s->current_frame_offset = s->frame_start;
1765 
1766 #ifdef CONFIG_DEBUG_TCG
1767     s->goto_tb_issue_mask = 0;
1768 #endif
1769 
1770     QTAILQ_INIT(&s->ops);
1771     QTAILQ_INIT(&s->free_ops);
1772     s->emit_before_op = NULL;
1773     QSIMPLEQ_INIT(&s->labels);
1774 
1775     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1776     tcg_debug_assert(s->insn_start_words > 0);
1777 }
1778 
1779 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1780 {
1781     int n = s->nb_temps++;
1782 
1783     if (n >= TCG_MAX_TEMPS) {
1784         tcg_raise_tb_overflow(s);
1785     }
1786     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1787 }
1788 
1789 static TCGTemp *tcg_global_alloc(TCGContext *s)
1790 {
1791     TCGTemp *ts;
1792 
1793     tcg_debug_assert(s->nb_globals == s->nb_temps);
1794     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1795     s->nb_globals++;
1796     ts = tcg_temp_alloc(s);
1797     ts->kind = TEMP_GLOBAL;
1798 
1799     return ts;
1800 }
1801 
1802 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1803                                             TCGReg reg, const char *name)
1804 {
1805     TCGTemp *ts;
1806 
1807     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1808 
1809     ts = tcg_global_alloc(s);
1810     ts->base_type = type;
1811     ts->type = type;
1812     ts->kind = TEMP_FIXED;
1813     ts->reg = reg;
1814     ts->name = name;
1815     tcg_regset_set_reg(s->reserved_regs, reg);
1816 
1817     return ts;
1818 }
1819 
1820 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1821 {
1822     s->frame_start = start;
1823     s->frame_end = start + size;
1824     s->frame_temp
1825         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1826 }
1827 
1828 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1829                                             const char *name, TCGType type)
1830 {
1831     TCGContext *s = tcg_ctx;
1832     TCGTemp *base_ts = tcgv_ptr_temp(base);
1833     TCGTemp *ts = tcg_global_alloc(s);
1834     int indirect_reg = 0;
1835 
1836     switch (base_ts->kind) {
1837     case TEMP_FIXED:
1838         break;
1839     case TEMP_GLOBAL:
1840         /* We do not support double-indirect registers.  */
1841         tcg_debug_assert(!base_ts->indirect_reg);
1842         base_ts->indirect_base = 1;
1843         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1844                             ? 2 : 1);
1845         indirect_reg = 1;
1846         break;
1847     default:
1848         g_assert_not_reached();
1849     }
1850 
1851     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1852         TCGTemp *ts2 = tcg_global_alloc(s);
1853         char buf[64];
1854 
1855         ts->base_type = TCG_TYPE_I64;
1856         ts->type = TCG_TYPE_I32;
1857         ts->indirect_reg = indirect_reg;
1858         ts->mem_allocated = 1;
1859         ts->mem_base = base_ts;
1860         ts->mem_offset = offset;
1861         pstrcpy(buf, sizeof(buf), name);
1862         pstrcat(buf, sizeof(buf), "_0");
1863         ts->name = strdup(buf);
1864 
1865         tcg_debug_assert(ts2 == ts + 1);
1866         ts2->base_type = TCG_TYPE_I64;
1867         ts2->type = TCG_TYPE_I32;
1868         ts2->indirect_reg = indirect_reg;
1869         ts2->mem_allocated = 1;
1870         ts2->mem_base = base_ts;
1871         ts2->mem_offset = offset + 4;
1872         ts2->temp_subindex = 1;
1873         pstrcpy(buf, sizeof(buf), name);
1874         pstrcat(buf, sizeof(buf), "_1");
1875         ts2->name = strdup(buf);
1876     } else {
1877         ts->base_type = type;
1878         ts->type = type;
1879         ts->indirect_reg = indirect_reg;
1880         ts->mem_allocated = 1;
1881         ts->mem_base = base_ts;
1882         ts->mem_offset = offset;
1883         ts->name = name;
1884     }
1885     return ts;
1886 }
1887 
1888 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1889 {
1890     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1891     return temp_tcgv_i32(ts);
1892 }
1893 
1894 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1895 {
1896     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1897     return temp_tcgv_i64(ts);
1898 }
1899 
1900 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1901 {
1902     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1903     return temp_tcgv_ptr(ts);
1904 }
1905 
1906 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1907 {
1908     TCGContext *s = tcg_ctx;
1909     TCGTemp *ts;
1910     int n;
1911 
1912     if (kind == TEMP_EBB) {
1913         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1914 
1915         if (idx < TCG_MAX_TEMPS) {
1916             /* There is already an available temp with the right type.  */
1917             clear_bit(idx, s->free_temps[type].l);
1918 
1919             ts = &s->temps[idx];
1920             ts->temp_allocated = 1;
1921             tcg_debug_assert(ts->base_type == type);
1922             tcg_debug_assert(ts->kind == kind);
1923             return ts;
1924         }
1925     } else {
1926         tcg_debug_assert(kind == TEMP_TB);
1927     }
1928 
1929     switch (type) {
1930     case TCG_TYPE_I32:
1931     case TCG_TYPE_V64:
1932     case TCG_TYPE_V128:
1933     case TCG_TYPE_V256:
1934         n = 1;
1935         break;
1936     case TCG_TYPE_I64:
1937         n = 64 / TCG_TARGET_REG_BITS;
1938         break;
1939     case TCG_TYPE_I128:
1940         n = 128 / TCG_TARGET_REG_BITS;
1941         break;
1942     default:
1943         g_assert_not_reached();
1944     }
1945 
1946     ts = tcg_temp_alloc(s);
1947     ts->base_type = type;
1948     ts->temp_allocated = 1;
1949     ts->kind = kind;
1950 
1951     if (n == 1) {
1952         ts->type = type;
1953     } else {
1954         ts->type = TCG_TYPE_REG;
1955 
1956         for (int i = 1; i < n; ++i) {
1957             TCGTemp *ts2 = tcg_temp_alloc(s);
1958 
1959             tcg_debug_assert(ts2 == ts + i);
1960             ts2->base_type = type;
1961             ts2->type = TCG_TYPE_REG;
1962             ts2->temp_allocated = 1;
1963             ts2->temp_subindex = i;
1964             ts2->kind = kind;
1965         }
1966     }
1967     return ts;
1968 }
1969 
1970 TCGv_i32 tcg_temp_new_i32(void)
1971 {
1972     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1973 }
1974 
1975 TCGv_i32 tcg_temp_ebb_new_i32(void)
1976 {
1977     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1978 }
1979 
1980 TCGv_i64 tcg_temp_new_i64(void)
1981 {
1982     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1983 }
1984 
1985 TCGv_i64 tcg_temp_ebb_new_i64(void)
1986 {
1987     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1988 }
1989 
1990 TCGv_ptr tcg_temp_new_ptr(void)
1991 {
1992     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1993 }
1994 
1995 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1996 {
1997     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1998 }
1999 
2000 TCGv_i128 tcg_temp_new_i128(void)
2001 {
2002     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2003 }
2004 
2005 TCGv_i128 tcg_temp_ebb_new_i128(void)
2006 {
2007     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2008 }
2009 
2010 TCGv_vec tcg_temp_new_vec(TCGType type)
2011 {
2012     TCGTemp *t;
2013 
2014 #ifdef CONFIG_DEBUG_TCG
2015     switch (type) {
2016     case TCG_TYPE_V64:
2017         assert(TCG_TARGET_HAS_v64);
2018         break;
2019     case TCG_TYPE_V128:
2020         assert(TCG_TARGET_HAS_v128);
2021         break;
2022     case TCG_TYPE_V256:
2023         assert(TCG_TARGET_HAS_v256);
2024         break;
2025     default:
2026         g_assert_not_reached();
2027     }
2028 #endif
2029 
2030     t = tcg_temp_new_internal(type, TEMP_EBB);
2031     return temp_tcgv_vec(t);
2032 }
2033 
2034 /* Create a new temp of the same type as an existing temp.  */
2035 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2036 {
2037     TCGTemp *t = tcgv_vec_temp(match);
2038 
2039     tcg_debug_assert(t->temp_allocated != 0);
2040 
2041     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2042     return temp_tcgv_vec(t);
2043 }
2044 
2045 void tcg_temp_free_internal(TCGTemp *ts)
2046 {
2047     TCGContext *s = tcg_ctx;
2048 
2049     switch (ts->kind) {
2050     case TEMP_CONST:
2051     case TEMP_TB:
2052         /* Silently ignore free. */
2053         break;
2054     case TEMP_EBB:
2055         tcg_debug_assert(ts->temp_allocated != 0);
2056         ts->temp_allocated = 0;
2057         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2058         break;
2059     default:
2060         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2061         g_assert_not_reached();
2062     }
2063 }
2064 
2065 void tcg_temp_free_i32(TCGv_i32 arg)
2066 {
2067     tcg_temp_free_internal(tcgv_i32_temp(arg));
2068 }
2069 
2070 void tcg_temp_free_i64(TCGv_i64 arg)
2071 {
2072     tcg_temp_free_internal(tcgv_i64_temp(arg));
2073 }
2074 
2075 void tcg_temp_free_i128(TCGv_i128 arg)
2076 {
2077     tcg_temp_free_internal(tcgv_i128_temp(arg));
2078 }
2079 
2080 void tcg_temp_free_ptr(TCGv_ptr arg)
2081 {
2082     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2083 }
2084 
2085 void tcg_temp_free_vec(TCGv_vec arg)
2086 {
2087     tcg_temp_free_internal(tcgv_vec_temp(arg));
2088 }
2089 
2090 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2091 {
2092     TCGContext *s = tcg_ctx;
2093     GHashTable *h = s->const_table[type];
2094     TCGTemp *ts;
2095 
2096     if (h == NULL) {
2097         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2098         s->const_table[type] = h;
2099     }
2100 
2101     ts = g_hash_table_lookup(h, &val);
2102     if (ts == NULL) {
2103         int64_t *val_ptr;
2104 
2105         ts = tcg_temp_alloc(s);
2106 
2107         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2108             TCGTemp *ts2 = tcg_temp_alloc(s);
2109 
2110             tcg_debug_assert(ts2 == ts + 1);
2111 
2112             ts->base_type = TCG_TYPE_I64;
2113             ts->type = TCG_TYPE_I32;
2114             ts->kind = TEMP_CONST;
2115             ts->temp_allocated = 1;
2116 
2117             ts2->base_type = TCG_TYPE_I64;
2118             ts2->type = TCG_TYPE_I32;
2119             ts2->kind = TEMP_CONST;
2120             ts2->temp_allocated = 1;
2121             ts2->temp_subindex = 1;
2122 
2123             /*
2124              * Retain the full value of the 64-bit constant in the low
2125              * part, so that the hash table works.  Actual uses will
2126              * truncate the value to the low part.
2127              */
2128             ts[HOST_BIG_ENDIAN].val = val;
2129             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2130             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2131         } else {
2132             ts->base_type = type;
2133             ts->type = type;
2134             ts->kind = TEMP_CONST;
2135             ts->temp_allocated = 1;
2136             ts->val = val;
2137             val_ptr = &ts->val;
2138         }
2139         g_hash_table_insert(h, val_ptr, ts);
2140     }
2141 
2142     return ts;
2143 }
2144 
2145 TCGv_i32 tcg_constant_i32(int32_t val)
2146 {
2147     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2148 }
2149 
2150 TCGv_i64 tcg_constant_i64(int64_t val)
2151 {
2152     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2153 }
2154 
2155 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2156 {
2157     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2158 }
2159 
2160 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2161 {
2162     val = dup_const(vece, val);
2163     return temp_tcgv_vec(tcg_constant_internal(type, val));
2164 }
2165 
2166 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2167 {
2168     TCGTemp *t = tcgv_vec_temp(match);
2169 
2170     tcg_debug_assert(t->temp_allocated != 0);
2171     return tcg_constant_vec(t->base_type, vece, val);
2172 }
2173 
2174 #ifdef CONFIG_DEBUG_TCG
2175 size_t temp_idx(TCGTemp *ts)
2176 {
2177     ptrdiff_t n = ts - tcg_ctx->temps;
2178     assert(n >= 0 && n < tcg_ctx->nb_temps);
2179     return n;
2180 }
2181 
2182 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2183 {
2184     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2185 
2186     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2187     assert(o % sizeof(TCGTemp) == 0);
2188 
2189     return (void *)tcg_ctx + (uintptr_t)v;
2190 }
2191 #endif /* CONFIG_DEBUG_TCG */
2192 
2193 /*
2194  * Return true if OP may appear in the opcode stream with TYPE.
2195  * Test the runtime variable that controls each opcode.
2196  */
2197 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2198 {
2199     bool has_type;
2200 
2201     switch (type) {
2202     case TCG_TYPE_I32:
2203         has_type = true;
2204         break;
2205     case TCG_TYPE_I64:
2206         has_type = TCG_TARGET_REG_BITS == 64;
2207         break;
2208     case TCG_TYPE_V64:
2209         has_type = TCG_TARGET_HAS_v64;
2210         break;
2211     case TCG_TYPE_V128:
2212         has_type = TCG_TARGET_HAS_v128;
2213         break;
2214     case TCG_TYPE_V256:
2215         has_type = TCG_TARGET_HAS_v256;
2216         break;
2217     default:
2218         has_type = false;
2219         break;
2220     }
2221 
2222     switch (op) {
2223     case INDEX_op_discard:
2224     case INDEX_op_set_label:
2225     case INDEX_op_call:
2226     case INDEX_op_br:
2227     case INDEX_op_mb:
2228     case INDEX_op_insn_start:
2229     case INDEX_op_exit_tb:
2230     case INDEX_op_goto_tb:
2231     case INDEX_op_goto_ptr:
2232     case INDEX_op_qemu_ld_i32:
2233     case INDEX_op_qemu_st_i32:
2234     case INDEX_op_qemu_ld_i64:
2235     case INDEX_op_qemu_st_i64:
2236         return true;
2237 
2238     case INDEX_op_qemu_st8_i32:
2239         return TCG_TARGET_HAS_qemu_st8_i32;
2240 
2241     case INDEX_op_qemu_ld_i128:
2242     case INDEX_op_qemu_st_i128:
2243         return TCG_TARGET_HAS_qemu_ldst_i128;
2244 
2245     case INDEX_op_add:
2246     case INDEX_op_and:
2247     case INDEX_op_mov:
2248     case INDEX_op_or:
2249     case INDEX_op_xor:
2250         return has_type;
2251 
2252     case INDEX_op_setcond_i32:
2253     case INDEX_op_brcond_i32:
2254     case INDEX_op_movcond_i32:
2255     case INDEX_op_ld8u_i32:
2256     case INDEX_op_ld8s_i32:
2257     case INDEX_op_ld16u_i32:
2258     case INDEX_op_ld16s_i32:
2259     case INDEX_op_ld_i32:
2260     case INDEX_op_st8_i32:
2261     case INDEX_op_st16_i32:
2262     case INDEX_op_st_i32:
2263     case INDEX_op_shl_i32:
2264     case INDEX_op_shr_i32:
2265     case INDEX_op_sar_i32:
2266     case INDEX_op_extract_i32:
2267     case INDEX_op_sextract_i32:
2268     case INDEX_op_deposit_i32:
2269         return true;
2270 
2271     case INDEX_op_negsetcond_i32:
2272         return TCG_TARGET_HAS_negsetcond_i32;
2273     case INDEX_op_rem_i32:
2274     case INDEX_op_remu_i32:
2275         return TCG_TARGET_HAS_rem_i32;
2276     case INDEX_op_rotl_i32:
2277     case INDEX_op_rotr_i32:
2278         return TCG_TARGET_HAS_rot_i32;
2279     case INDEX_op_extract2_i32:
2280         return TCG_TARGET_HAS_extract2_i32;
2281     case INDEX_op_add2_i32:
2282         return TCG_TARGET_HAS_add2_i32;
2283     case INDEX_op_sub2_i32:
2284         return TCG_TARGET_HAS_sub2_i32;
2285     case INDEX_op_mulu2_i32:
2286         return TCG_TARGET_HAS_mulu2_i32;
2287     case INDEX_op_muls2_i32:
2288         return TCG_TARGET_HAS_muls2_i32;
2289     case INDEX_op_bswap16_i32:
2290         return TCG_TARGET_HAS_bswap16_i32;
2291     case INDEX_op_bswap32_i32:
2292         return TCG_TARGET_HAS_bswap32_i32;
2293     case INDEX_op_clz_i32:
2294         return TCG_TARGET_HAS_clz_i32;
2295     case INDEX_op_ctz_i32:
2296         return TCG_TARGET_HAS_ctz_i32;
2297     case INDEX_op_ctpop_i32:
2298         return TCG_TARGET_HAS_ctpop_i32;
2299 
2300     case INDEX_op_brcond2_i32:
2301     case INDEX_op_setcond2_i32:
2302         return TCG_TARGET_REG_BITS == 32;
2303 
2304     case INDEX_op_setcond_i64:
2305     case INDEX_op_brcond_i64:
2306     case INDEX_op_movcond_i64:
2307     case INDEX_op_ld8u_i64:
2308     case INDEX_op_ld8s_i64:
2309     case INDEX_op_ld16u_i64:
2310     case INDEX_op_ld16s_i64:
2311     case INDEX_op_ld32u_i64:
2312     case INDEX_op_ld32s_i64:
2313     case INDEX_op_ld_i64:
2314     case INDEX_op_st8_i64:
2315     case INDEX_op_st16_i64:
2316     case INDEX_op_st32_i64:
2317     case INDEX_op_st_i64:
2318     case INDEX_op_shl_i64:
2319     case INDEX_op_shr_i64:
2320     case INDEX_op_sar_i64:
2321     case INDEX_op_ext_i32_i64:
2322     case INDEX_op_extu_i32_i64:
2323     case INDEX_op_extract_i64:
2324     case INDEX_op_sextract_i64:
2325     case INDEX_op_deposit_i64:
2326         return TCG_TARGET_REG_BITS == 64;
2327 
2328     case INDEX_op_negsetcond_i64:
2329         return TCG_TARGET_HAS_negsetcond_i64;
2330     case INDEX_op_rem_i64:
2331     case INDEX_op_remu_i64:
2332         return TCG_TARGET_HAS_rem_i64;
2333     case INDEX_op_rotl_i64:
2334     case INDEX_op_rotr_i64:
2335         return TCG_TARGET_HAS_rot_i64;
2336     case INDEX_op_extract2_i64:
2337         return TCG_TARGET_HAS_extract2_i64;
2338     case INDEX_op_extrl_i64_i32:
2339     case INDEX_op_extrh_i64_i32:
2340         return TCG_TARGET_HAS_extr_i64_i32;
2341     case INDEX_op_bswap16_i64:
2342         return TCG_TARGET_HAS_bswap16_i64;
2343     case INDEX_op_bswap32_i64:
2344         return TCG_TARGET_HAS_bswap32_i64;
2345     case INDEX_op_bswap64_i64:
2346         return TCG_TARGET_HAS_bswap64_i64;
2347     case INDEX_op_clz_i64:
2348         return TCG_TARGET_HAS_clz_i64;
2349     case INDEX_op_ctz_i64:
2350         return TCG_TARGET_HAS_ctz_i64;
2351     case INDEX_op_ctpop_i64:
2352         return TCG_TARGET_HAS_ctpop_i64;
2353     case INDEX_op_add2_i64:
2354         return TCG_TARGET_HAS_add2_i64;
2355     case INDEX_op_sub2_i64:
2356         return TCG_TARGET_HAS_sub2_i64;
2357     case INDEX_op_mulu2_i64:
2358         return TCG_TARGET_HAS_mulu2_i64;
2359     case INDEX_op_muls2_i64:
2360         return TCG_TARGET_HAS_muls2_i64;
2361 
2362     case INDEX_op_mov_vec:
2363     case INDEX_op_dup_vec:
2364     case INDEX_op_dupm_vec:
2365     case INDEX_op_ld_vec:
2366     case INDEX_op_st_vec:
2367     case INDEX_op_add_vec:
2368     case INDEX_op_sub_vec:
2369     case INDEX_op_and_vec:
2370     case INDEX_op_or_vec:
2371     case INDEX_op_xor_vec:
2372     case INDEX_op_cmp_vec:
2373         return has_type;
2374     case INDEX_op_dup2_vec:
2375         return has_type && TCG_TARGET_REG_BITS == 32;
2376     case INDEX_op_not_vec:
2377         return has_type && TCG_TARGET_HAS_not_vec;
2378     case INDEX_op_neg_vec:
2379         return has_type && TCG_TARGET_HAS_neg_vec;
2380     case INDEX_op_abs_vec:
2381         return has_type && TCG_TARGET_HAS_abs_vec;
2382     case INDEX_op_andc_vec:
2383         return has_type && TCG_TARGET_HAS_andc_vec;
2384     case INDEX_op_orc_vec:
2385         return has_type && TCG_TARGET_HAS_orc_vec;
2386     case INDEX_op_nand_vec:
2387         return has_type && TCG_TARGET_HAS_nand_vec;
2388     case INDEX_op_nor_vec:
2389         return has_type && TCG_TARGET_HAS_nor_vec;
2390     case INDEX_op_eqv_vec:
2391         return has_type && TCG_TARGET_HAS_eqv_vec;
2392     case INDEX_op_mul_vec:
2393         return has_type && TCG_TARGET_HAS_mul_vec;
2394     case INDEX_op_shli_vec:
2395     case INDEX_op_shri_vec:
2396     case INDEX_op_sari_vec:
2397         return has_type && TCG_TARGET_HAS_shi_vec;
2398     case INDEX_op_shls_vec:
2399     case INDEX_op_shrs_vec:
2400     case INDEX_op_sars_vec:
2401         return has_type && TCG_TARGET_HAS_shs_vec;
2402     case INDEX_op_shlv_vec:
2403     case INDEX_op_shrv_vec:
2404     case INDEX_op_sarv_vec:
2405         return has_type && TCG_TARGET_HAS_shv_vec;
2406     case INDEX_op_rotli_vec:
2407         return has_type && TCG_TARGET_HAS_roti_vec;
2408     case INDEX_op_rotls_vec:
2409         return has_type && TCG_TARGET_HAS_rots_vec;
2410     case INDEX_op_rotlv_vec:
2411     case INDEX_op_rotrv_vec:
2412         return has_type && TCG_TARGET_HAS_rotv_vec;
2413     case INDEX_op_ssadd_vec:
2414     case INDEX_op_usadd_vec:
2415     case INDEX_op_sssub_vec:
2416     case INDEX_op_ussub_vec:
2417         return has_type && TCG_TARGET_HAS_sat_vec;
2418     case INDEX_op_smin_vec:
2419     case INDEX_op_umin_vec:
2420     case INDEX_op_smax_vec:
2421     case INDEX_op_umax_vec:
2422         return has_type && TCG_TARGET_HAS_minmax_vec;
2423     case INDEX_op_bitsel_vec:
2424         return has_type && TCG_TARGET_HAS_bitsel_vec;
2425     case INDEX_op_cmpsel_vec:
2426         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2427 
2428     default:
2429         if (op < INDEX_op_last_generic) {
2430             const TCGOutOp *outop;
2431             TCGConstraintSetIndex con_set;
2432 
2433             if (!has_type) {
2434                 return false;
2435             }
2436 
2437             outop = all_outop[op];
2438             tcg_debug_assert(outop != NULL);
2439 
2440             con_set = outop->static_constraint;
2441             if (con_set == C_Dynamic) {
2442                 con_set = outop->dynamic_constraint(type, flags);
2443             }
2444             if (con_set >= 0) {
2445                 return true;
2446             }
2447             tcg_debug_assert(con_set == C_NotImplemented);
2448             return false;
2449         }
2450         tcg_debug_assert(op < NB_OPS);
2451         return true;
2452 
2453     case INDEX_op_last_generic:
2454         g_assert_not_reached();
2455     }
2456 }
2457 
2458 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2459 {
2460     unsigned width;
2461 
2462     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2463     width = (type == TCG_TYPE_I32 ? 32 : 64);
2464 
2465     tcg_debug_assert(ofs < width);
2466     tcg_debug_assert(len > 0);
2467     tcg_debug_assert(len <= width - ofs);
2468 
2469     return TCG_TARGET_deposit_valid(type, ofs, len);
2470 }
2471 
2472 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2473 
2474 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2475                           TCGTemp *ret, TCGTemp **args)
2476 {
2477     TCGv_i64 extend_free[MAX_CALL_IARGS];
2478     int n_extend = 0;
2479     TCGOp *op;
2480     int i, n, pi = 0, total_args;
2481 
2482     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2483         init_call_layout(info);
2484         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2485     }
2486 
2487     total_args = info->nr_out + info->nr_in + 2;
2488     op = tcg_op_alloc(INDEX_op_call, total_args);
2489 
2490 #ifdef CONFIG_PLUGIN
2491     /* Flag helpers that may affect guest state */
2492     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2493         tcg_ctx->plugin_insn->calls_helpers = true;
2494     }
2495 #endif
2496 
2497     TCGOP_CALLO(op) = n = info->nr_out;
2498     switch (n) {
2499     case 0:
2500         tcg_debug_assert(ret == NULL);
2501         break;
2502     case 1:
2503         tcg_debug_assert(ret != NULL);
2504         op->args[pi++] = temp_arg(ret);
2505         break;
2506     case 2:
2507     case 4:
2508         tcg_debug_assert(ret != NULL);
2509         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2510         tcg_debug_assert(ret->temp_subindex == 0);
2511         for (i = 0; i < n; ++i) {
2512             op->args[pi++] = temp_arg(ret + i);
2513         }
2514         break;
2515     default:
2516         g_assert_not_reached();
2517     }
2518 
2519     TCGOP_CALLI(op) = n = info->nr_in;
2520     for (i = 0; i < n; i++) {
2521         const TCGCallArgumentLoc *loc = &info->in[i];
2522         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2523 
2524         switch (loc->kind) {
2525         case TCG_CALL_ARG_NORMAL:
2526         case TCG_CALL_ARG_BY_REF:
2527         case TCG_CALL_ARG_BY_REF_N:
2528             op->args[pi++] = temp_arg(ts);
2529             break;
2530 
2531         case TCG_CALL_ARG_EXTEND_U:
2532         case TCG_CALL_ARG_EXTEND_S:
2533             {
2534                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2535                 TCGv_i32 orig = temp_tcgv_i32(ts);
2536 
2537                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2538                     tcg_gen_ext_i32_i64(temp, orig);
2539                 } else {
2540                     tcg_gen_extu_i32_i64(temp, orig);
2541                 }
2542                 op->args[pi++] = tcgv_i64_arg(temp);
2543                 extend_free[n_extend++] = temp;
2544             }
2545             break;
2546 
2547         default:
2548             g_assert_not_reached();
2549         }
2550     }
2551     op->args[pi++] = (uintptr_t)func;
2552     op->args[pi++] = (uintptr_t)info;
2553     tcg_debug_assert(pi == total_args);
2554 
2555     if (tcg_ctx->emit_before_op) {
2556         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2557     } else {
2558         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2559     }
2560 
2561     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2562     for (i = 0; i < n_extend; ++i) {
2563         tcg_temp_free_i64(extend_free[i]);
2564     }
2565 }
2566 
2567 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2568 {
2569     tcg_gen_callN(func, info, ret, NULL);
2570 }
2571 
2572 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2573 {
2574     tcg_gen_callN(func, info, ret, &t1);
2575 }
2576 
2577 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2578                    TCGTemp *t1, TCGTemp *t2)
2579 {
2580     TCGTemp *args[2] = { t1, t2 };
2581     tcg_gen_callN(func, info, ret, args);
2582 }
2583 
2584 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2585                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2586 {
2587     TCGTemp *args[3] = { t1, t2, t3 };
2588     tcg_gen_callN(func, info, ret, args);
2589 }
2590 
2591 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2592                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2593 {
2594     TCGTemp *args[4] = { t1, t2, t3, t4 };
2595     tcg_gen_callN(func, info, ret, args);
2596 }
2597 
2598 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2599                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2600 {
2601     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2602     tcg_gen_callN(func, info, ret, args);
2603 }
2604 
2605 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2606                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2607                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2608 {
2609     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2610     tcg_gen_callN(func, info, ret, args);
2611 }
2612 
2613 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2614                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2615                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2616 {
2617     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2618     tcg_gen_callN(func, info, ret, args);
2619 }
2620 
2621 static void tcg_reg_alloc_start(TCGContext *s)
2622 {
2623     int i, n;
2624 
2625     for (i = 0, n = s->nb_temps; i < n; i++) {
2626         TCGTemp *ts = &s->temps[i];
2627         TCGTempVal val = TEMP_VAL_MEM;
2628 
2629         switch (ts->kind) {
2630         case TEMP_CONST:
2631             val = TEMP_VAL_CONST;
2632             break;
2633         case TEMP_FIXED:
2634             val = TEMP_VAL_REG;
2635             break;
2636         case TEMP_GLOBAL:
2637             break;
2638         case TEMP_EBB:
2639             val = TEMP_VAL_DEAD;
2640             /* fall through */
2641         case TEMP_TB:
2642             ts->mem_allocated = 0;
2643             break;
2644         default:
2645             g_assert_not_reached();
2646         }
2647         ts->val_type = val;
2648     }
2649 
2650     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2651 }
2652 
2653 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2654                                  TCGTemp *ts)
2655 {
2656     int idx = temp_idx(ts);
2657 
2658     switch (ts->kind) {
2659     case TEMP_FIXED:
2660     case TEMP_GLOBAL:
2661         pstrcpy(buf, buf_size, ts->name);
2662         break;
2663     case TEMP_TB:
2664         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2665         break;
2666     case TEMP_EBB:
2667         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2668         break;
2669     case TEMP_CONST:
2670         switch (ts->type) {
2671         case TCG_TYPE_I32:
2672             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2673             break;
2674 #if TCG_TARGET_REG_BITS > 32
2675         case TCG_TYPE_I64:
2676             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2677             break;
2678 #endif
2679         case TCG_TYPE_V64:
2680         case TCG_TYPE_V128:
2681         case TCG_TYPE_V256:
2682             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2683                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2684             break;
2685         default:
2686             g_assert_not_reached();
2687         }
2688         break;
2689     }
2690     return buf;
2691 }
2692 
2693 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2694                              int buf_size, TCGArg arg)
2695 {
2696     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2697 }
2698 
2699 static const char * const cond_name[] =
2700 {
2701     [TCG_COND_NEVER] = "never",
2702     [TCG_COND_ALWAYS] = "always",
2703     [TCG_COND_EQ] = "eq",
2704     [TCG_COND_NE] = "ne",
2705     [TCG_COND_LT] = "lt",
2706     [TCG_COND_GE] = "ge",
2707     [TCG_COND_LE] = "le",
2708     [TCG_COND_GT] = "gt",
2709     [TCG_COND_LTU] = "ltu",
2710     [TCG_COND_GEU] = "geu",
2711     [TCG_COND_LEU] = "leu",
2712     [TCG_COND_GTU] = "gtu",
2713     [TCG_COND_TSTEQ] = "tsteq",
2714     [TCG_COND_TSTNE] = "tstne",
2715 };
2716 
2717 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2718 {
2719     [MO_UB]   = "ub",
2720     [MO_SB]   = "sb",
2721     [MO_LEUW] = "leuw",
2722     [MO_LESW] = "lesw",
2723     [MO_LEUL] = "leul",
2724     [MO_LESL] = "lesl",
2725     [MO_LEUQ] = "leq",
2726     [MO_BEUW] = "beuw",
2727     [MO_BESW] = "besw",
2728     [MO_BEUL] = "beul",
2729     [MO_BESL] = "besl",
2730     [MO_BEUQ] = "beq",
2731     [MO_128 + MO_BE] = "beo",
2732     [MO_128 + MO_LE] = "leo",
2733 };
2734 
2735 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2736     [MO_UNALN >> MO_ASHIFT]    = "un+",
2737     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2738     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2739     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2740     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2741     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2742     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2743     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2744 };
2745 
2746 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2747     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2748     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2749     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2750     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2751     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2752     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2753 };
2754 
2755 static const char bswap_flag_name[][6] = {
2756     [TCG_BSWAP_IZ] = "iz",
2757     [TCG_BSWAP_OZ] = "oz",
2758     [TCG_BSWAP_OS] = "os",
2759     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2760     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2761 };
2762 
2763 #ifdef CONFIG_PLUGIN
2764 static const char * const plugin_from_name[] = {
2765     "from-tb",
2766     "from-insn",
2767     "after-insn",
2768     "after-tb",
2769 };
2770 #endif
2771 
2772 static inline bool tcg_regset_single(TCGRegSet d)
2773 {
2774     return (d & (d - 1)) == 0;
2775 }
2776 
2777 static inline TCGReg tcg_regset_first(TCGRegSet d)
2778 {
2779     if (TCG_TARGET_NB_REGS <= 32) {
2780         return ctz32(d);
2781     } else {
2782         return ctz64(d);
2783     }
2784 }
2785 
2786 /* Return only the number of characters output -- no error return. */
2787 #define ne_fprintf(...) \
2788     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2789 
2790 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2791 {
2792     char buf[128];
2793     TCGOp *op;
2794 
2795     QTAILQ_FOREACH(op, &s->ops, link) {
2796         int i, k, nb_oargs, nb_iargs, nb_cargs;
2797         const TCGOpDef *def;
2798         TCGOpcode c;
2799         int col = 0;
2800 
2801         c = op->opc;
2802         def = &tcg_op_defs[c];
2803 
2804         if (c == INDEX_op_insn_start) {
2805             nb_oargs = 0;
2806             col += ne_fprintf(f, "\n ----");
2807 
2808             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2809                 col += ne_fprintf(f, " %016" PRIx64,
2810                                   tcg_get_insn_start_param(op, i));
2811             }
2812         } else if (c == INDEX_op_call) {
2813             const TCGHelperInfo *info = tcg_call_info(op);
2814             void *func = tcg_call_func(op);
2815 
2816             /* variable number of arguments */
2817             nb_oargs = TCGOP_CALLO(op);
2818             nb_iargs = TCGOP_CALLI(op);
2819             nb_cargs = def->nb_cargs;
2820 
2821             col += ne_fprintf(f, " %s ", def->name);
2822 
2823             /*
2824              * Print the function name from TCGHelperInfo, if available.
2825              * Note that plugins have a template function for the info,
2826              * but the actual function pointer comes from the plugin.
2827              */
2828             if (func == info->func) {
2829                 col += ne_fprintf(f, "%s", info->name);
2830             } else {
2831                 col += ne_fprintf(f, "plugin(%p)", func);
2832             }
2833 
2834             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2835             for (i = 0; i < nb_oargs; i++) {
2836                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2837                                                             op->args[i]));
2838             }
2839             for (i = 0; i < nb_iargs; i++) {
2840                 TCGArg arg = op->args[nb_oargs + i];
2841                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2842                 col += ne_fprintf(f, ",%s", t);
2843             }
2844         } else {
2845             if (def->flags & TCG_OPF_INT) {
2846                 col += ne_fprintf(f, " %s_i%d ",
2847                                   def->name,
2848                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2849             } else if (def->flags & TCG_OPF_VECTOR) {
2850                 col += ne_fprintf(f, "%s v%d,e%d,",
2851                                   def->name,
2852                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2853                                   8 << TCGOP_VECE(op));
2854             } else {
2855                 col += ne_fprintf(f, " %s ", def->name);
2856             }
2857 
2858             nb_oargs = def->nb_oargs;
2859             nb_iargs = def->nb_iargs;
2860             nb_cargs = def->nb_cargs;
2861 
2862             k = 0;
2863             for (i = 0; i < nb_oargs; i++) {
2864                 const char *sep =  k ? "," : "";
2865                 col += ne_fprintf(f, "%s%s", sep,
2866                                   tcg_get_arg_str(s, buf, sizeof(buf),
2867                                                   op->args[k++]));
2868             }
2869             for (i = 0; i < nb_iargs; i++) {
2870                 const char *sep =  k ? "," : "";
2871                 col += ne_fprintf(f, "%s%s", sep,
2872                                   tcg_get_arg_str(s, buf, sizeof(buf),
2873                                                   op->args[k++]));
2874             }
2875             switch (c) {
2876             case INDEX_op_brcond_i32:
2877             case INDEX_op_setcond_i32:
2878             case INDEX_op_negsetcond_i32:
2879             case INDEX_op_movcond_i32:
2880             case INDEX_op_brcond2_i32:
2881             case INDEX_op_setcond2_i32:
2882             case INDEX_op_brcond_i64:
2883             case INDEX_op_setcond_i64:
2884             case INDEX_op_negsetcond_i64:
2885             case INDEX_op_movcond_i64:
2886             case INDEX_op_cmp_vec:
2887             case INDEX_op_cmpsel_vec:
2888                 if (op->args[k] < ARRAY_SIZE(cond_name)
2889                     && cond_name[op->args[k]]) {
2890                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2891                 } else {
2892                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2893                 }
2894                 i = 1;
2895                 break;
2896             case INDEX_op_qemu_ld_i32:
2897             case INDEX_op_qemu_st_i32:
2898             case INDEX_op_qemu_st8_i32:
2899             case INDEX_op_qemu_ld_i64:
2900             case INDEX_op_qemu_st_i64:
2901             case INDEX_op_qemu_ld_i128:
2902             case INDEX_op_qemu_st_i128:
2903                 {
2904                     const char *s_al, *s_op, *s_at;
2905                     MemOpIdx oi = op->args[k++];
2906                     MemOp mop = get_memop(oi);
2907                     unsigned ix = get_mmuidx(oi);
2908 
2909                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2910                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2911                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2912                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2913 
2914                     /* If all fields are accounted for, print symbolically. */
2915                     if (!mop && s_al && s_op && s_at) {
2916                         col += ne_fprintf(f, ",%s%s%s,%u",
2917                                           s_at, s_al, s_op, ix);
2918                     } else {
2919                         mop = get_memop(oi);
2920                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2921                     }
2922                     i = 1;
2923                 }
2924                 break;
2925             case INDEX_op_bswap16_i32:
2926             case INDEX_op_bswap16_i64:
2927             case INDEX_op_bswap32_i32:
2928             case INDEX_op_bswap32_i64:
2929             case INDEX_op_bswap64_i64:
2930                 {
2931                     TCGArg flags = op->args[k];
2932                     const char *name = NULL;
2933 
2934                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2935                         name = bswap_flag_name[flags];
2936                     }
2937                     if (name) {
2938                         col += ne_fprintf(f, ",%s", name);
2939                     } else {
2940                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2941                     }
2942                     i = k = 1;
2943                 }
2944                 break;
2945 #ifdef CONFIG_PLUGIN
2946             case INDEX_op_plugin_cb:
2947                 {
2948                     TCGArg from = op->args[k++];
2949                     const char *name = NULL;
2950 
2951                     if (from < ARRAY_SIZE(plugin_from_name)) {
2952                         name = plugin_from_name[from];
2953                     }
2954                     if (name) {
2955                         col += ne_fprintf(f, "%s", name);
2956                     } else {
2957                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2958                     }
2959                     i = 1;
2960                 }
2961                 break;
2962 #endif
2963             default:
2964                 i = 0;
2965                 break;
2966             }
2967             switch (c) {
2968             case INDEX_op_set_label:
2969             case INDEX_op_br:
2970             case INDEX_op_brcond_i32:
2971             case INDEX_op_brcond_i64:
2972             case INDEX_op_brcond2_i32:
2973                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2974                                   arg_label(op->args[k])->id);
2975                 i++, k++;
2976                 break;
2977             case INDEX_op_mb:
2978                 {
2979                     TCGBar membar = op->args[k];
2980                     const char *b_op, *m_op;
2981 
2982                     switch (membar & TCG_BAR_SC) {
2983                     case 0:
2984                         b_op = "none";
2985                         break;
2986                     case TCG_BAR_LDAQ:
2987                         b_op = "acq";
2988                         break;
2989                     case TCG_BAR_STRL:
2990                         b_op = "rel";
2991                         break;
2992                     case TCG_BAR_SC:
2993                         b_op = "seq";
2994                         break;
2995                     default:
2996                         g_assert_not_reached();
2997                     }
2998 
2999                     switch (membar & TCG_MO_ALL) {
3000                     case 0:
3001                         m_op = "none";
3002                         break;
3003                     case TCG_MO_LD_LD:
3004                         m_op = "rr";
3005                         break;
3006                     case TCG_MO_LD_ST:
3007                         m_op = "rw";
3008                         break;
3009                     case TCG_MO_ST_LD:
3010                         m_op = "wr";
3011                         break;
3012                     case TCG_MO_ST_ST:
3013                         m_op = "ww";
3014                         break;
3015                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3016                         m_op = "rr+rw";
3017                         break;
3018                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3019                         m_op = "rr+wr";
3020                         break;
3021                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3022                         m_op = "rr+ww";
3023                         break;
3024                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3025                         m_op = "rw+wr";
3026                         break;
3027                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3028                         m_op = "rw+ww";
3029                         break;
3030                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3031                         m_op = "wr+ww";
3032                         break;
3033                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3034                         m_op = "rr+rw+wr";
3035                         break;
3036                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3037                         m_op = "rr+rw+ww";
3038                         break;
3039                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3040                         m_op = "rr+wr+ww";
3041                         break;
3042                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3043                         m_op = "rw+wr+ww";
3044                         break;
3045                     case TCG_MO_ALL:
3046                         m_op = "all";
3047                         break;
3048                     default:
3049                         g_assert_not_reached();
3050                     }
3051 
3052                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3053                     i++, k++;
3054                 }
3055                 break;
3056             default:
3057                 break;
3058             }
3059             for (; i < nb_cargs; i++, k++) {
3060                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3061                                   op->args[k]);
3062             }
3063         }
3064 
3065         if (have_prefs || op->life) {
3066             for (; col < 40; ++col) {
3067                 putc(' ', f);
3068             }
3069         }
3070 
3071         if (op->life) {
3072             unsigned life = op->life;
3073 
3074             if (life & (SYNC_ARG * 3)) {
3075                 ne_fprintf(f, "  sync:");
3076                 for (i = 0; i < 2; ++i) {
3077                     if (life & (SYNC_ARG << i)) {
3078                         ne_fprintf(f, " %d", i);
3079                     }
3080                 }
3081             }
3082             life /= DEAD_ARG;
3083             if (life) {
3084                 ne_fprintf(f, "  dead:");
3085                 for (i = 0; life; ++i, life >>= 1) {
3086                     if (life & 1) {
3087                         ne_fprintf(f, " %d", i);
3088                     }
3089                 }
3090             }
3091         }
3092 
3093         if (have_prefs) {
3094             for (i = 0; i < nb_oargs; ++i) {
3095                 TCGRegSet set = output_pref(op, i);
3096 
3097                 if (i == 0) {
3098                     ne_fprintf(f, "  pref=");
3099                 } else {
3100                     ne_fprintf(f, ",");
3101                 }
3102                 if (set == 0) {
3103                     ne_fprintf(f, "none");
3104                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3105                     ne_fprintf(f, "all");
3106 #ifdef CONFIG_DEBUG_TCG
3107                 } else if (tcg_regset_single(set)) {
3108                     TCGReg reg = tcg_regset_first(set);
3109                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3110 #endif
3111                 } else if (TCG_TARGET_NB_REGS <= 32) {
3112                     ne_fprintf(f, "0x%x", (uint32_t)set);
3113                 } else {
3114                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3115                 }
3116             }
3117         }
3118 
3119         putc('\n', f);
3120     }
3121 }
3122 
3123 /* we give more priority to constraints with less registers */
3124 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3125 {
3126     int n;
3127 
3128     arg_ct += k;
3129     n = ctpop64(arg_ct->regs);
3130 
3131     /*
3132      * Sort constraints of a single register first, which includes output
3133      * aliases (which must exactly match the input already allocated).
3134      */
3135     if (n == 1 || arg_ct->oalias) {
3136         return INT_MAX;
3137     }
3138 
3139     /*
3140      * Sort register pairs next, first then second immediately after.
3141      * Arbitrarily sort multiple pairs by the index of the first reg;
3142      * there shouldn't be many pairs.
3143      */
3144     switch (arg_ct->pair) {
3145     case 1:
3146     case 3:
3147         return (k + 1) * 2;
3148     case 2:
3149         return (arg_ct->pair_index + 1) * 2 - 1;
3150     }
3151 
3152     /* Finally, sort by decreasing register count. */
3153     assert(n > 1);
3154     return -n;
3155 }
3156 
3157 /* sort from highest priority to lowest */
3158 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3159 {
3160     int i, j;
3161 
3162     for (i = 0; i < n; i++) {
3163         a[start + i].sort_index = start + i;
3164     }
3165     if (n <= 1) {
3166         return;
3167     }
3168     for (i = 0; i < n - 1; i++) {
3169         for (j = i + 1; j < n; j++) {
3170             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3171             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3172             if (p1 < p2) {
3173                 int tmp = a[start + i].sort_index;
3174                 a[start + i].sort_index = a[start + j].sort_index;
3175                 a[start + j].sort_index = tmp;
3176             }
3177         }
3178     }
3179 }
3180 
3181 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3182 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3183 
3184 static void process_constraint_sets(void)
3185 {
3186     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3187         const TCGConstraintSet *tdefs = &constraint_sets[c];
3188         TCGArgConstraint *args_ct = all_cts[c];
3189         int nb_oargs = tdefs->nb_oargs;
3190         int nb_iargs = tdefs->nb_iargs;
3191         int nb_args = nb_oargs + nb_iargs;
3192         bool saw_alias_pair = false;
3193 
3194         for (int i = 0; i < nb_args; i++) {
3195             const char *ct_str = tdefs->args_ct_str[i];
3196             bool input_p = i >= nb_oargs;
3197             int o;
3198 
3199             switch (*ct_str) {
3200             case '0' ... '9':
3201                 o = *ct_str - '0';
3202                 tcg_debug_assert(input_p);
3203                 tcg_debug_assert(o < nb_oargs);
3204                 tcg_debug_assert(args_ct[o].regs != 0);
3205                 tcg_debug_assert(!args_ct[o].oalias);
3206                 args_ct[i] = args_ct[o];
3207                 /* The output sets oalias.  */
3208                 args_ct[o].oalias = 1;
3209                 args_ct[o].alias_index = i;
3210                 /* The input sets ialias. */
3211                 args_ct[i].ialias = 1;
3212                 args_ct[i].alias_index = o;
3213                 if (args_ct[i].pair) {
3214                     saw_alias_pair = true;
3215                 }
3216                 tcg_debug_assert(ct_str[1] == '\0');
3217                 continue;
3218 
3219             case '&':
3220                 tcg_debug_assert(!input_p);
3221                 args_ct[i].newreg = true;
3222                 ct_str++;
3223                 break;
3224 
3225             case 'p': /* plus */
3226                 /* Allocate to the register after the previous. */
3227                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3228                 o = i - 1;
3229                 tcg_debug_assert(!args_ct[o].pair);
3230                 tcg_debug_assert(!args_ct[o].ct);
3231                 args_ct[i] = (TCGArgConstraint){
3232                     .pair = 2,
3233                     .pair_index = o,
3234                     .regs = args_ct[o].regs << 1,
3235                     .newreg = args_ct[o].newreg,
3236                 };
3237                 args_ct[o].pair = 1;
3238                 args_ct[o].pair_index = i;
3239                 tcg_debug_assert(ct_str[1] == '\0');
3240                 continue;
3241 
3242             case 'm': /* minus */
3243                 /* Allocate to the register before the previous. */
3244                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3245                 o = i - 1;
3246                 tcg_debug_assert(!args_ct[o].pair);
3247                 tcg_debug_assert(!args_ct[o].ct);
3248                 args_ct[i] = (TCGArgConstraint){
3249                     .pair = 1,
3250                     .pair_index = o,
3251                     .regs = args_ct[o].regs >> 1,
3252                     .newreg = args_ct[o].newreg,
3253                 };
3254                 args_ct[o].pair = 2;
3255                 args_ct[o].pair_index = i;
3256                 tcg_debug_assert(ct_str[1] == '\0');
3257                 continue;
3258             }
3259 
3260             do {
3261                 switch (*ct_str) {
3262                 case 'i':
3263                     args_ct[i].ct |= TCG_CT_CONST;
3264                     break;
3265 #ifdef TCG_REG_ZERO
3266                 case 'z':
3267                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3268                     break;
3269 #endif
3270 
3271                 /* Include all of the target-specific constraints. */
3272 
3273 #undef CONST
3274 #define CONST(CASE, MASK) \
3275     case CASE: args_ct[i].ct |= MASK; break;
3276 #define REGS(CASE, MASK) \
3277     case CASE: args_ct[i].regs |= MASK; break;
3278 
3279 #include "tcg-target-con-str.h"
3280 
3281 #undef REGS
3282 #undef CONST
3283                 default:
3284                 case '0' ... '9':
3285                 case '&':
3286                 case 'p':
3287                 case 'm':
3288                     /* Typo in TCGConstraintSet constraint. */
3289                     g_assert_not_reached();
3290                 }
3291             } while (*++ct_str != '\0');
3292         }
3293 
3294         /*
3295          * Fix up output pairs that are aliased with inputs.
3296          * When we created the alias, we copied pair from the output.
3297          * There are three cases:
3298          *    (1a) Pairs of inputs alias pairs of outputs.
3299          *    (1b) One input aliases the first of a pair of outputs.
3300          *    (2)  One input aliases the second of a pair of outputs.
3301          *
3302          * Case 1a is handled by making sure that the pair_index'es are
3303          * properly updated so that they appear the same as a pair of inputs.
3304          *
3305          * Case 1b is handled by setting the pair_index of the input to
3306          * itself, simply so it doesn't point to an unrelated argument.
3307          * Since we don't encounter the "second" during the input allocation
3308          * phase, nothing happens with the second half of the input pair.
3309          *
3310          * Case 2 is handled by setting the second input to pair=3, the
3311          * first output to pair=3, and the pair_index'es to match.
3312          */
3313         if (saw_alias_pair) {
3314             for (int i = nb_oargs; i < nb_args; i++) {
3315                 int o, o2, i2;
3316 
3317                 /*
3318                  * Since [0-9pm] must be alone in the constraint string,
3319                  * the only way they can both be set is if the pair comes
3320                  * from the output alias.
3321                  */
3322                 if (!args_ct[i].ialias) {
3323                     continue;
3324                 }
3325                 switch (args_ct[i].pair) {
3326                 case 0:
3327                     break;
3328                 case 1:
3329                     o = args_ct[i].alias_index;
3330                     o2 = args_ct[o].pair_index;
3331                     tcg_debug_assert(args_ct[o].pair == 1);
3332                     tcg_debug_assert(args_ct[o2].pair == 2);
3333                     if (args_ct[o2].oalias) {
3334                         /* Case 1a */
3335                         i2 = args_ct[o2].alias_index;
3336                         tcg_debug_assert(args_ct[i2].pair == 2);
3337                         args_ct[i2].pair_index = i;
3338                         args_ct[i].pair_index = i2;
3339                     } else {
3340                         /* Case 1b */
3341                         args_ct[i].pair_index = i;
3342                     }
3343                     break;
3344                 case 2:
3345                     o = args_ct[i].alias_index;
3346                     o2 = args_ct[o].pair_index;
3347                     tcg_debug_assert(args_ct[o].pair == 2);
3348                     tcg_debug_assert(args_ct[o2].pair == 1);
3349                     if (args_ct[o2].oalias) {
3350                         /* Case 1a */
3351                         i2 = args_ct[o2].alias_index;
3352                         tcg_debug_assert(args_ct[i2].pair == 1);
3353                         args_ct[i2].pair_index = i;
3354                         args_ct[i].pair_index = i2;
3355                     } else {
3356                         /* Case 2 */
3357                         args_ct[i].pair = 3;
3358                         args_ct[o2].pair = 3;
3359                         args_ct[i].pair_index = o2;
3360                         args_ct[o2].pair_index = i;
3361                     }
3362                     break;
3363                 default:
3364                     g_assert_not_reached();
3365                 }
3366             }
3367         }
3368 
3369         /* sort the constraints (XXX: this is just an heuristic) */
3370         sort_constraints(args_ct, 0, nb_oargs);
3371         sort_constraints(args_ct, nb_oargs, nb_iargs);
3372     }
3373 }
3374 
3375 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3376 {
3377     TCGOpcode opc = op->opc;
3378     TCGType type = TCGOP_TYPE(op);
3379     unsigned flags = TCGOP_FLAGS(op);
3380     const TCGOpDef *def = &tcg_op_defs[opc];
3381     const TCGOutOp *outop = all_outop[opc];
3382     TCGConstraintSetIndex con_set;
3383 
3384     if (def->flags & TCG_OPF_NOT_PRESENT) {
3385         return empty_cts;
3386     }
3387 
3388     if (outop) {
3389         con_set = outop->static_constraint;
3390         if (con_set == C_Dynamic) {
3391             con_set = outop->dynamic_constraint(type, flags);
3392         }
3393     } else {
3394         con_set = tcg_target_op_def(opc, type, flags);
3395     }
3396     tcg_debug_assert(con_set >= 0);
3397     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3398 
3399     /* The constraint arguments must match TCGOpcode arguments. */
3400     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3401     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3402 
3403     return all_cts[con_set];
3404 }
3405 
3406 static void remove_label_use(TCGOp *op, int idx)
3407 {
3408     TCGLabel *label = arg_label(op->args[idx]);
3409     TCGLabelUse *use;
3410 
3411     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3412         if (use->op == op) {
3413             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3414             return;
3415         }
3416     }
3417     g_assert_not_reached();
3418 }
3419 
3420 void tcg_op_remove(TCGContext *s, TCGOp *op)
3421 {
3422     switch (op->opc) {
3423     case INDEX_op_br:
3424         remove_label_use(op, 0);
3425         break;
3426     case INDEX_op_brcond_i32:
3427     case INDEX_op_brcond_i64:
3428         remove_label_use(op, 3);
3429         break;
3430     case INDEX_op_brcond2_i32:
3431         remove_label_use(op, 5);
3432         break;
3433     default:
3434         break;
3435     }
3436 
3437     QTAILQ_REMOVE(&s->ops, op, link);
3438     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3439     s->nb_ops--;
3440 }
3441 
3442 void tcg_remove_ops_after(TCGOp *op)
3443 {
3444     TCGContext *s = tcg_ctx;
3445 
3446     while (true) {
3447         TCGOp *last = tcg_last_op();
3448         if (last == op) {
3449             return;
3450         }
3451         tcg_op_remove(s, last);
3452     }
3453 }
3454 
3455 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3456 {
3457     TCGContext *s = tcg_ctx;
3458     TCGOp *op = NULL;
3459 
3460     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3461         QTAILQ_FOREACH(op, &s->free_ops, link) {
3462             if (nargs <= op->nargs) {
3463                 QTAILQ_REMOVE(&s->free_ops, op, link);
3464                 nargs = op->nargs;
3465                 goto found;
3466             }
3467         }
3468     }
3469 
3470     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3471     nargs = MAX(4, nargs);
3472     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3473 
3474  found:
3475     memset(op, 0, offsetof(TCGOp, link));
3476     op->opc = opc;
3477     op->nargs = nargs;
3478 
3479     /* Check for bitfield overflow. */
3480     tcg_debug_assert(op->nargs == nargs);
3481 
3482     s->nb_ops++;
3483     return op;
3484 }
3485 
3486 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3487 {
3488     TCGOp *op = tcg_op_alloc(opc, nargs);
3489 
3490     if (tcg_ctx->emit_before_op) {
3491         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3492     } else {
3493         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3494     }
3495     return op;
3496 }
3497 
3498 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3499                             TCGOpcode opc, TCGType type, unsigned nargs)
3500 {
3501     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3502 
3503     TCGOP_TYPE(new_op) = type;
3504     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3505     return new_op;
3506 }
3507 
3508 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3509                            TCGOpcode opc, TCGType type, unsigned nargs)
3510 {
3511     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3512 
3513     TCGOP_TYPE(new_op) = type;
3514     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3515     return new_op;
3516 }
3517 
3518 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3519 {
3520     TCGLabelUse *u;
3521 
3522     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3523         TCGOp *op = u->op;
3524         switch (op->opc) {
3525         case INDEX_op_br:
3526             op->args[0] = label_arg(to);
3527             break;
3528         case INDEX_op_brcond_i32:
3529         case INDEX_op_brcond_i64:
3530             op->args[3] = label_arg(to);
3531             break;
3532         case INDEX_op_brcond2_i32:
3533             op->args[5] = label_arg(to);
3534             break;
3535         default:
3536             g_assert_not_reached();
3537         }
3538     }
3539 
3540     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3541 }
3542 
3543 /* Reachable analysis : remove unreachable code.  */
3544 static void __attribute__((noinline))
3545 reachable_code_pass(TCGContext *s)
3546 {
3547     TCGOp *op, *op_next, *op_prev;
3548     bool dead = false;
3549 
3550     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3551         bool remove = dead;
3552         TCGLabel *label;
3553 
3554         switch (op->opc) {
3555         case INDEX_op_set_label:
3556             label = arg_label(op->args[0]);
3557 
3558             /*
3559              * Note that the first op in the TB is always a load,
3560              * so there is always something before a label.
3561              */
3562             op_prev = QTAILQ_PREV(op, link);
3563 
3564             /*
3565              * If we find two sequential labels, move all branches to
3566              * reference the second label and remove the first label.
3567              * Do this before branch to next optimization, so that the
3568              * middle label is out of the way.
3569              */
3570             if (op_prev->opc == INDEX_op_set_label) {
3571                 move_label_uses(label, arg_label(op_prev->args[0]));
3572                 tcg_op_remove(s, op_prev);
3573                 op_prev = QTAILQ_PREV(op, link);
3574             }
3575 
3576             /*
3577              * Optimization can fold conditional branches to unconditional.
3578              * If we find a label which is preceded by an unconditional
3579              * branch to next, remove the branch.  We couldn't do this when
3580              * processing the branch because any dead code between the branch
3581              * and label had not yet been removed.
3582              */
3583             if (op_prev->opc == INDEX_op_br &&
3584                 label == arg_label(op_prev->args[0])) {
3585                 tcg_op_remove(s, op_prev);
3586                 /* Fall through means insns become live again.  */
3587                 dead = false;
3588             }
3589 
3590             if (QSIMPLEQ_EMPTY(&label->branches)) {
3591                 /*
3592                  * While there is an occasional backward branch, virtually
3593                  * all branches generated by the translators are forward.
3594                  * Which means that generally we will have already removed
3595                  * all references to the label that will be, and there is
3596                  * little to be gained by iterating.
3597                  */
3598                 remove = true;
3599             } else {
3600                 /* Once we see a label, insns become live again.  */
3601                 dead = false;
3602                 remove = false;
3603             }
3604             break;
3605 
3606         case INDEX_op_br:
3607         case INDEX_op_exit_tb:
3608         case INDEX_op_goto_ptr:
3609             /* Unconditional branches; everything following is dead.  */
3610             dead = true;
3611             break;
3612 
3613         case INDEX_op_call:
3614             /* Notice noreturn helper calls, raising exceptions.  */
3615             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3616                 dead = true;
3617             }
3618             break;
3619 
3620         case INDEX_op_insn_start:
3621             /* Never remove -- we need to keep these for unwind.  */
3622             remove = false;
3623             break;
3624 
3625         default:
3626             break;
3627         }
3628 
3629         if (remove) {
3630             tcg_op_remove(s, op);
3631         }
3632     }
3633 }
3634 
3635 #define TS_DEAD  1
3636 #define TS_MEM   2
3637 
3638 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3639 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3640 
3641 /* For liveness_pass_1, the register preferences for a given temp.  */
3642 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3643 {
3644     return ts->state_ptr;
3645 }
3646 
3647 /* For liveness_pass_1, reset the preferences for a given temp to the
3648  * maximal regset for its type.
3649  */
3650 static inline void la_reset_pref(TCGTemp *ts)
3651 {
3652     *la_temp_pref(ts)
3653         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3654 }
3655 
3656 /* liveness analysis: end of function: all temps are dead, and globals
3657    should be in memory. */
3658 static void la_func_end(TCGContext *s, int ng, int nt)
3659 {
3660     int i;
3661 
3662     for (i = 0; i < ng; ++i) {
3663         s->temps[i].state = TS_DEAD | TS_MEM;
3664         la_reset_pref(&s->temps[i]);
3665     }
3666     for (i = ng; i < nt; ++i) {
3667         s->temps[i].state = TS_DEAD;
3668         la_reset_pref(&s->temps[i]);
3669     }
3670 }
3671 
3672 /* liveness analysis: end of basic block: all temps are dead, globals
3673    and local temps should be in memory. */
3674 static void la_bb_end(TCGContext *s, int ng, int nt)
3675 {
3676     int i;
3677 
3678     for (i = 0; i < nt; ++i) {
3679         TCGTemp *ts = &s->temps[i];
3680         int state;
3681 
3682         switch (ts->kind) {
3683         case TEMP_FIXED:
3684         case TEMP_GLOBAL:
3685         case TEMP_TB:
3686             state = TS_DEAD | TS_MEM;
3687             break;
3688         case TEMP_EBB:
3689         case TEMP_CONST:
3690             state = TS_DEAD;
3691             break;
3692         default:
3693             g_assert_not_reached();
3694         }
3695         ts->state = state;
3696         la_reset_pref(ts);
3697     }
3698 }
3699 
3700 /* liveness analysis: sync globals back to memory.  */
3701 static void la_global_sync(TCGContext *s, int ng)
3702 {
3703     int i;
3704 
3705     for (i = 0; i < ng; ++i) {
3706         int state = s->temps[i].state;
3707         s->temps[i].state = state | TS_MEM;
3708         if (state == TS_DEAD) {
3709             /* If the global was previously dead, reset prefs.  */
3710             la_reset_pref(&s->temps[i]);
3711         }
3712     }
3713 }
3714 
3715 /*
3716  * liveness analysis: conditional branch: all temps are dead unless
3717  * explicitly live-across-conditional-branch, globals and local temps
3718  * should be synced.
3719  */
3720 static void la_bb_sync(TCGContext *s, int ng, int nt)
3721 {
3722     la_global_sync(s, ng);
3723 
3724     for (int i = ng; i < nt; ++i) {
3725         TCGTemp *ts = &s->temps[i];
3726         int state;
3727 
3728         switch (ts->kind) {
3729         case TEMP_TB:
3730             state = ts->state;
3731             ts->state = state | TS_MEM;
3732             if (state != TS_DEAD) {
3733                 continue;
3734             }
3735             break;
3736         case TEMP_EBB:
3737         case TEMP_CONST:
3738             continue;
3739         default:
3740             g_assert_not_reached();
3741         }
3742         la_reset_pref(&s->temps[i]);
3743     }
3744 }
3745 
3746 /* liveness analysis: sync globals back to memory and kill.  */
3747 static void la_global_kill(TCGContext *s, int ng)
3748 {
3749     int i;
3750 
3751     for (i = 0; i < ng; i++) {
3752         s->temps[i].state = TS_DEAD | TS_MEM;
3753         la_reset_pref(&s->temps[i]);
3754     }
3755 }
3756 
3757 /* liveness analysis: note live globals crossing calls.  */
3758 static void la_cross_call(TCGContext *s, int nt)
3759 {
3760     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3761     int i;
3762 
3763     for (i = 0; i < nt; i++) {
3764         TCGTemp *ts = &s->temps[i];
3765         if (!(ts->state & TS_DEAD)) {
3766             TCGRegSet *pset = la_temp_pref(ts);
3767             TCGRegSet set = *pset;
3768 
3769             set &= mask;
3770             /* If the combination is not possible, restart.  */
3771             if (set == 0) {
3772                 set = tcg_target_available_regs[ts->type] & mask;
3773             }
3774             *pset = set;
3775         }
3776     }
3777 }
3778 
3779 /*
3780  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3781  * to TEMP_EBB, if possible.
3782  */
3783 static void __attribute__((noinline))
3784 liveness_pass_0(TCGContext *s)
3785 {
3786     void * const multiple_ebb = (void *)(uintptr_t)-1;
3787     int nb_temps = s->nb_temps;
3788     TCGOp *op, *ebb;
3789 
3790     for (int i = s->nb_globals; i < nb_temps; ++i) {
3791         s->temps[i].state_ptr = NULL;
3792     }
3793 
3794     /*
3795      * Represent each EBB by the op at which it begins.  In the case of
3796      * the first EBB, this is the first op, otherwise it is a label.
3797      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3798      * within a single EBB, else MULTIPLE_EBB.
3799      */
3800     ebb = QTAILQ_FIRST(&s->ops);
3801     QTAILQ_FOREACH(op, &s->ops, link) {
3802         const TCGOpDef *def;
3803         int nb_oargs, nb_iargs;
3804 
3805         switch (op->opc) {
3806         case INDEX_op_set_label:
3807             ebb = op;
3808             continue;
3809         case INDEX_op_discard:
3810             continue;
3811         case INDEX_op_call:
3812             nb_oargs = TCGOP_CALLO(op);
3813             nb_iargs = TCGOP_CALLI(op);
3814             break;
3815         default:
3816             def = &tcg_op_defs[op->opc];
3817             nb_oargs = def->nb_oargs;
3818             nb_iargs = def->nb_iargs;
3819             break;
3820         }
3821 
3822         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3823             TCGTemp *ts = arg_temp(op->args[i]);
3824 
3825             if (ts->kind != TEMP_TB) {
3826                 continue;
3827             }
3828             if (ts->state_ptr == NULL) {
3829                 ts->state_ptr = ebb;
3830             } else if (ts->state_ptr != ebb) {
3831                 ts->state_ptr = multiple_ebb;
3832             }
3833         }
3834     }
3835 
3836     /*
3837      * For TEMP_TB that turned out not to be used beyond one EBB,
3838      * reduce the liveness to TEMP_EBB.
3839      */
3840     for (int i = s->nb_globals; i < nb_temps; ++i) {
3841         TCGTemp *ts = &s->temps[i];
3842         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3843             ts->kind = TEMP_EBB;
3844         }
3845     }
3846 }
3847 
3848 /* Liveness analysis : update the opc_arg_life array to tell if a
3849    given input arguments is dead. Instructions updating dead
3850    temporaries are removed. */
3851 static void __attribute__((noinline))
3852 liveness_pass_1(TCGContext *s)
3853 {
3854     int nb_globals = s->nb_globals;
3855     int nb_temps = s->nb_temps;
3856     TCGOp *op, *op_prev;
3857     TCGRegSet *prefs;
3858     int i;
3859 
3860     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3861     for (i = 0; i < nb_temps; ++i) {
3862         s->temps[i].state_ptr = prefs + i;
3863     }
3864 
3865     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3866     la_func_end(s, nb_globals, nb_temps);
3867 
3868     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3869         int nb_iargs, nb_oargs;
3870         TCGOpcode opc_new, opc_new2;
3871         TCGLifeData arg_life = 0;
3872         TCGTemp *ts;
3873         TCGOpcode opc = op->opc;
3874         const TCGOpDef *def = &tcg_op_defs[opc];
3875         const TCGArgConstraint *args_ct;
3876 
3877         switch (opc) {
3878         case INDEX_op_call:
3879             {
3880                 const TCGHelperInfo *info = tcg_call_info(op);
3881                 int call_flags = tcg_call_flags(op);
3882 
3883                 nb_oargs = TCGOP_CALLO(op);
3884                 nb_iargs = TCGOP_CALLI(op);
3885 
3886                 /* pure functions can be removed if their result is unused */
3887                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3888                     for (i = 0; i < nb_oargs; i++) {
3889                         ts = arg_temp(op->args[i]);
3890                         if (ts->state != TS_DEAD) {
3891                             goto do_not_remove_call;
3892                         }
3893                     }
3894                     goto do_remove;
3895                 }
3896             do_not_remove_call:
3897 
3898                 /* Output args are dead.  */
3899                 for (i = 0; i < nb_oargs; i++) {
3900                     ts = arg_temp(op->args[i]);
3901                     if (ts->state & TS_DEAD) {
3902                         arg_life |= DEAD_ARG << i;
3903                     }
3904                     if (ts->state & TS_MEM) {
3905                         arg_life |= SYNC_ARG << i;
3906                     }
3907                     ts->state = TS_DEAD;
3908                     la_reset_pref(ts);
3909                 }
3910 
3911                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3912                 memset(op->output_pref, 0, sizeof(op->output_pref));
3913 
3914                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3915                                     TCG_CALL_NO_READ_GLOBALS))) {
3916                     la_global_kill(s, nb_globals);
3917                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3918                     la_global_sync(s, nb_globals);
3919                 }
3920 
3921                 /* Record arguments that die in this helper.  */
3922                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3923                     ts = arg_temp(op->args[i]);
3924                     if (ts->state & TS_DEAD) {
3925                         arg_life |= DEAD_ARG << i;
3926                     }
3927                 }
3928 
3929                 /* For all live registers, remove call-clobbered prefs.  */
3930                 la_cross_call(s, nb_temps);
3931 
3932                 /*
3933                  * Input arguments are live for preceding opcodes.
3934                  *
3935                  * For those arguments that die, and will be allocated in
3936                  * registers, clear the register set for that arg, to be
3937                  * filled in below.  For args that will be on the stack,
3938                  * reset to any available reg.  Process arguments in reverse
3939                  * order so that if a temp is used more than once, the stack
3940                  * reset to max happens before the register reset to 0.
3941                  */
3942                 for (i = nb_iargs - 1; i >= 0; i--) {
3943                     const TCGCallArgumentLoc *loc = &info->in[i];
3944                     ts = arg_temp(op->args[nb_oargs + i]);
3945 
3946                     if (ts->state & TS_DEAD) {
3947                         switch (loc->kind) {
3948                         case TCG_CALL_ARG_NORMAL:
3949                         case TCG_CALL_ARG_EXTEND_U:
3950                         case TCG_CALL_ARG_EXTEND_S:
3951                             if (arg_slot_reg_p(loc->arg_slot)) {
3952                                 *la_temp_pref(ts) = 0;
3953                                 break;
3954                             }
3955                             /* fall through */
3956                         default:
3957                             *la_temp_pref(ts) =
3958                                 tcg_target_available_regs[ts->type];
3959                             break;
3960                         }
3961                         ts->state &= ~TS_DEAD;
3962                     }
3963                 }
3964 
3965                 /*
3966                  * For each input argument, add its input register to prefs.
3967                  * If a temp is used once, this produces a single set bit;
3968                  * if a temp is used multiple times, this produces a set.
3969                  */
3970                 for (i = 0; i < nb_iargs; i++) {
3971                     const TCGCallArgumentLoc *loc = &info->in[i];
3972                     ts = arg_temp(op->args[nb_oargs + i]);
3973 
3974                     switch (loc->kind) {
3975                     case TCG_CALL_ARG_NORMAL:
3976                     case TCG_CALL_ARG_EXTEND_U:
3977                     case TCG_CALL_ARG_EXTEND_S:
3978                         if (arg_slot_reg_p(loc->arg_slot)) {
3979                             tcg_regset_set_reg(*la_temp_pref(ts),
3980                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3981                         }
3982                         break;
3983                     default:
3984                         break;
3985                     }
3986                 }
3987             }
3988             break;
3989         case INDEX_op_insn_start:
3990             break;
3991         case INDEX_op_discard:
3992             /* mark the temporary as dead */
3993             ts = arg_temp(op->args[0]);
3994             ts->state = TS_DEAD;
3995             la_reset_pref(ts);
3996             break;
3997 
3998         case INDEX_op_add2_i32:
3999         case INDEX_op_add2_i64:
4000             opc_new = INDEX_op_add;
4001             goto do_addsub2;
4002         case INDEX_op_sub2_i32:
4003         case INDEX_op_sub2_i64:
4004             opc_new = INDEX_op_sub;
4005         do_addsub2:
4006             nb_iargs = 4;
4007             nb_oargs = 2;
4008             /* Test if the high part of the operation is dead, but not
4009                the low part.  The result can be optimized to a simple
4010                add or sub.  This happens often for x86_64 guest when the
4011                cpu mode is set to 32 bit.  */
4012             if (arg_temp(op->args[1])->state == TS_DEAD) {
4013                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4014                     goto do_remove;
4015                 }
4016                 /* Replace the opcode and adjust the args in place,
4017                    leaving 3 unused args at the end.  */
4018                 op->opc = opc = opc_new;
4019                 op->args[1] = op->args[2];
4020                 op->args[2] = op->args[4];
4021                 /* Fall through and mark the single-word operation live.  */
4022                 nb_iargs = 2;
4023                 nb_oargs = 1;
4024             }
4025             goto do_not_remove;
4026 
4027         case INDEX_op_muls2_i32:
4028         case INDEX_op_muls2_i64:
4029             opc_new = INDEX_op_mul;
4030             opc_new2 = INDEX_op_mulsh;
4031             goto do_mul2;
4032         case INDEX_op_mulu2_i32:
4033         case INDEX_op_mulu2_i64:
4034             opc_new = INDEX_op_mul;
4035             opc_new2 = INDEX_op_muluh;
4036         do_mul2:
4037             nb_iargs = 2;
4038             nb_oargs = 2;
4039             if (arg_temp(op->args[1])->state == TS_DEAD) {
4040                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4041                     /* Both parts of the operation are dead.  */
4042                     goto do_remove;
4043                 }
4044                 /* The high part of the operation is dead; generate the low. */
4045                 op->opc = opc = opc_new;
4046                 op->args[1] = op->args[2];
4047                 op->args[2] = op->args[3];
4048             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4049                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4050                 /* The low part of the operation is dead; generate the high. */
4051                 op->opc = opc = opc_new2;
4052                 op->args[0] = op->args[1];
4053                 op->args[1] = op->args[2];
4054                 op->args[2] = op->args[3];
4055             } else {
4056                 goto do_not_remove;
4057             }
4058             /* Mark the single-word operation live.  */
4059             nb_oargs = 1;
4060             goto do_not_remove;
4061 
4062         default:
4063             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4064             nb_iargs = def->nb_iargs;
4065             nb_oargs = def->nb_oargs;
4066 
4067             /* Test if the operation can be removed because all
4068                its outputs are dead. We assume that nb_oargs == 0
4069                implies side effects */
4070             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4071                 for (i = 0; i < nb_oargs; i++) {
4072                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4073                         goto do_not_remove;
4074                     }
4075                 }
4076                 goto do_remove;
4077             }
4078             goto do_not_remove;
4079 
4080         do_remove:
4081             tcg_op_remove(s, op);
4082             break;
4083 
4084         do_not_remove:
4085             for (i = 0; i < nb_oargs; i++) {
4086                 ts = arg_temp(op->args[i]);
4087 
4088                 /* Remember the preference of the uses that followed.  */
4089                 if (i < ARRAY_SIZE(op->output_pref)) {
4090                     op->output_pref[i] = *la_temp_pref(ts);
4091                 }
4092 
4093                 /* Output args are dead.  */
4094                 if (ts->state & TS_DEAD) {
4095                     arg_life |= DEAD_ARG << i;
4096                 }
4097                 if (ts->state & TS_MEM) {
4098                     arg_life |= SYNC_ARG << i;
4099                 }
4100                 ts->state = TS_DEAD;
4101                 la_reset_pref(ts);
4102             }
4103 
4104             /* If end of basic block, update.  */
4105             if (def->flags & TCG_OPF_BB_EXIT) {
4106                 la_func_end(s, nb_globals, nb_temps);
4107             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4108                 la_bb_sync(s, nb_globals, nb_temps);
4109             } else if (def->flags & TCG_OPF_BB_END) {
4110                 la_bb_end(s, nb_globals, nb_temps);
4111             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4112                 la_global_sync(s, nb_globals);
4113                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4114                     la_cross_call(s, nb_temps);
4115                 }
4116             }
4117 
4118             /* Record arguments that die in this opcode.  */
4119             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4120                 ts = arg_temp(op->args[i]);
4121                 if (ts->state & TS_DEAD) {
4122                     arg_life |= DEAD_ARG << i;
4123                 }
4124             }
4125 
4126             /* Input arguments are live for preceding opcodes.  */
4127             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4128                 ts = arg_temp(op->args[i]);
4129                 if (ts->state & TS_DEAD) {
4130                     /* For operands that were dead, initially allow
4131                        all regs for the type.  */
4132                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4133                     ts->state &= ~TS_DEAD;
4134                 }
4135             }
4136 
4137             /* Incorporate constraints for this operand.  */
4138             switch (opc) {
4139             case INDEX_op_mov:
4140                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4141                    have proper constraints.  That said, special case
4142                    moves to propagate preferences backward.  */
4143                 if (IS_DEAD_ARG(1)) {
4144                     *la_temp_pref(arg_temp(op->args[0]))
4145                         = *la_temp_pref(arg_temp(op->args[1]));
4146                 }
4147                 break;
4148 
4149             default:
4150                 args_ct = opcode_args_ct(op);
4151                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4152                     const TCGArgConstraint *ct = &args_ct[i];
4153                     TCGRegSet set, *pset;
4154 
4155                     ts = arg_temp(op->args[i]);
4156                     pset = la_temp_pref(ts);
4157                     set = *pset;
4158 
4159                     set &= ct->regs;
4160                     if (ct->ialias) {
4161                         set &= output_pref(op, ct->alias_index);
4162                     }
4163                     /* If the combination is not possible, restart.  */
4164                     if (set == 0) {
4165                         set = ct->regs;
4166                     }
4167                     *pset = set;
4168                 }
4169                 break;
4170             }
4171             break;
4172         }
4173         op->life = arg_life;
4174     }
4175 }
4176 
4177 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4178 static bool __attribute__((noinline))
4179 liveness_pass_2(TCGContext *s)
4180 {
4181     int nb_globals = s->nb_globals;
4182     int nb_temps, i;
4183     bool changes = false;
4184     TCGOp *op, *op_next;
4185 
4186     /* Create a temporary for each indirect global.  */
4187     for (i = 0; i < nb_globals; ++i) {
4188         TCGTemp *its = &s->temps[i];
4189         if (its->indirect_reg) {
4190             TCGTemp *dts = tcg_temp_alloc(s);
4191             dts->type = its->type;
4192             dts->base_type = its->base_type;
4193             dts->temp_subindex = its->temp_subindex;
4194             dts->kind = TEMP_EBB;
4195             its->state_ptr = dts;
4196         } else {
4197             its->state_ptr = NULL;
4198         }
4199         /* All globals begin dead.  */
4200         its->state = TS_DEAD;
4201     }
4202     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4203         TCGTemp *its = &s->temps[i];
4204         its->state_ptr = NULL;
4205         its->state = TS_DEAD;
4206     }
4207 
4208     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4209         TCGOpcode opc = op->opc;
4210         const TCGOpDef *def = &tcg_op_defs[opc];
4211         TCGLifeData arg_life = op->life;
4212         int nb_iargs, nb_oargs, call_flags;
4213         TCGTemp *arg_ts, *dir_ts;
4214 
4215         if (opc == INDEX_op_call) {
4216             nb_oargs = TCGOP_CALLO(op);
4217             nb_iargs = TCGOP_CALLI(op);
4218             call_flags = tcg_call_flags(op);
4219         } else {
4220             nb_iargs = def->nb_iargs;
4221             nb_oargs = def->nb_oargs;
4222 
4223             /* Set flags similar to how calls require.  */
4224             if (def->flags & TCG_OPF_COND_BRANCH) {
4225                 /* Like reading globals: sync_globals */
4226                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4227             } else if (def->flags & TCG_OPF_BB_END) {
4228                 /* Like writing globals: save_globals */
4229                 call_flags = 0;
4230             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4231                 /* Like reading globals: sync_globals */
4232                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4233             } else {
4234                 /* No effect on globals.  */
4235                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4236                               TCG_CALL_NO_WRITE_GLOBALS);
4237             }
4238         }
4239 
4240         /* Make sure that input arguments are available.  */
4241         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4242             arg_ts = arg_temp(op->args[i]);
4243             dir_ts = arg_ts->state_ptr;
4244             if (dir_ts && arg_ts->state == TS_DEAD) {
4245                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4246                                   ? INDEX_op_ld_i32
4247                                   : INDEX_op_ld_i64);
4248                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4249                                                   arg_ts->type, 3);
4250 
4251                 lop->args[0] = temp_arg(dir_ts);
4252                 lop->args[1] = temp_arg(arg_ts->mem_base);
4253                 lop->args[2] = arg_ts->mem_offset;
4254 
4255                 /* Loaded, but synced with memory.  */
4256                 arg_ts->state = TS_MEM;
4257             }
4258         }
4259 
4260         /* Perform input replacement, and mark inputs that became dead.
4261            No action is required except keeping temp_state up to date
4262            so that we reload when needed.  */
4263         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4264             arg_ts = arg_temp(op->args[i]);
4265             dir_ts = arg_ts->state_ptr;
4266             if (dir_ts) {
4267                 op->args[i] = temp_arg(dir_ts);
4268                 changes = true;
4269                 if (IS_DEAD_ARG(i)) {
4270                     arg_ts->state = TS_DEAD;
4271                 }
4272             }
4273         }
4274 
4275         /* Liveness analysis should ensure that the following are
4276            all correct, for call sites and basic block end points.  */
4277         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4278             /* Nothing to do */
4279         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4280             for (i = 0; i < nb_globals; ++i) {
4281                 /* Liveness should see that globals are synced back,
4282                    that is, either TS_DEAD or TS_MEM.  */
4283                 arg_ts = &s->temps[i];
4284                 tcg_debug_assert(arg_ts->state_ptr == 0
4285                                  || arg_ts->state != 0);
4286             }
4287         } else {
4288             for (i = 0; i < nb_globals; ++i) {
4289                 /* Liveness should see that globals are saved back,
4290                    that is, TS_DEAD, waiting to be reloaded.  */
4291                 arg_ts = &s->temps[i];
4292                 tcg_debug_assert(arg_ts->state_ptr == 0
4293                                  || arg_ts->state == TS_DEAD);
4294             }
4295         }
4296 
4297         /* Outputs become available.  */
4298         if (opc == INDEX_op_mov) {
4299             arg_ts = arg_temp(op->args[0]);
4300             dir_ts = arg_ts->state_ptr;
4301             if (dir_ts) {
4302                 op->args[0] = temp_arg(dir_ts);
4303                 changes = true;
4304 
4305                 /* The output is now live and modified.  */
4306                 arg_ts->state = 0;
4307 
4308                 if (NEED_SYNC_ARG(0)) {
4309                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4310                                       ? INDEX_op_st_i32
4311                                       : INDEX_op_st_i64);
4312                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4313                                                      arg_ts->type, 3);
4314                     TCGTemp *out_ts = dir_ts;
4315 
4316                     if (IS_DEAD_ARG(0)) {
4317                         out_ts = arg_temp(op->args[1]);
4318                         arg_ts->state = TS_DEAD;
4319                         tcg_op_remove(s, op);
4320                     } else {
4321                         arg_ts->state = TS_MEM;
4322                     }
4323 
4324                     sop->args[0] = temp_arg(out_ts);
4325                     sop->args[1] = temp_arg(arg_ts->mem_base);
4326                     sop->args[2] = arg_ts->mem_offset;
4327                 } else {
4328                     tcg_debug_assert(!IS_DEAD_ARG(0));
4329                 }
4330             }
4331         } else {
4332             for (i = 0; i < nb_oargs; i++) {
4333                 arg_ts = arg_temp(op->args[i]);
4334                 dir_ts = arg_ts->state_ptr;
4335                 if (!dir_ts) {
4336                     continue;
4337                 }
4338                 op->args[i] = temp_arg(dir_ts);
4339                 changes = true;
4340 
4341                 /* The output is now live and modified.  */
4342                 arg_ts->state = 0;
4343 
4344                 /* Sync outputs upon their last write.  */
4345                 if (NEED_SYNC_ARG(i)) {
4346                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4347                                       ? INDEX_op_st_i32
4348                                       : INDEX_op_st_i64);
4349                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4350                                                      arg_ts->type, 3);
4351 
4352                     sop->args[0] = temp_arg(dir_ts);
4353                     sop->args[1] = temp_arg(arg_ts->mem_base);
4354                     sop->args[2] = arg_ts->mem_offset;
4355 
4356                     arg_ts->state = TS_MEM;
4357                 }
4358                 /* Drop outputs that are dead.  */
4359                 if (IS_DEAD_ARG(i)) {
4360                     arg_ts->state = TS_DEAD;
4361                 }
4362             }
4363         }
4364     }
4365 
4366     return changes;
4367 }
4368 
4369 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4370 {
4371     intptr_t off;
4372     int size, align;
4373 
4374     /* When allocating an object, look at the full type. */
4375     size = tcg_type_size(ts->base_type);
4376     switch (ts->base_type) {
4377     case TCG_TYPE_I32:
4378         align = 4;
4379         break;
4380     case TCG_TYPE_I64:
4381     case TCG_TYPE_V64:
4382         align = 8;
4383         break;
4384     case TCG_TYPE_I128:
4385     case TCG_TYPE_V128:
4386     case TCG_TYPE_V256:
4387         /*
4388          * Note that we do not require aligned storage for V256,
4389          * and that we provide alignment for I128 to match V128,
4390          * even if that's above what the host ABI requires.
4391          */
4392         align = 16;
4393         break;
4394     default:
4395         g_assert_not_reached();
4396     }
4397 
4398     /*
4399      * Assume the stack is sufficiently aligned.
4400      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4401      * and do not require 16 byte vector alignment.  This seems slightly
4402      * easier than fully parameterizing the above switch statement.
4403      */
4404     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4405     off = ROUND_UP(s->current_frame_offset, align);
4406 
4407     /* If we've exhausted the stack frame, restart with a smaller TB. */
4408     if (off + size > s->frame_end) {
4409         tcg_raise_tb_overflow(s);
4410     }
4411     s->current_frame_offset = off + size;
4412 #if defined(__sparc__)
4413     off += TCG_TARGET_STACK_BIAS;
4414 #endif
4415 
4416     /* If the object was subdivided, assign memory to all the parts. */
4417     if (ts->base_type != ts->type) {
4418         int part_size = tcg_type_size(ts->type);
4419         int part_count = size / part_size;
4420 
4421         /*
4422          * Each part is allocated sequentially in tcg_temp_new_internal.
4423          * Jump back to the first part by subtracting the current index.
4424          */
4425         ts -= ts->temp_subindex;
4426         for (int i = 0; i < part_count; ++i) {
4427             ts[i].mem_offset = off + i * part_size;
4428             ts[i].mem_base = s->frame_temp;
4429             ts[i].mem_allocated = 1;
4430         }
4431     } else {
4432         ts->mem_offset = off;
4433         ts->mem_base = s->frame_temp;
4434         ts->mem_allocated = 1;
4435     }
4436 }
4437 
4438 /* Assign @reg to @ts, and update reg_to_temp[]. */
4439 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4440 {
4441     if (ts->val_type == TEMP_VAL_REG) {
4442         TCGReg old = ts->reg;
4443         tcg_debug_assert(s->reg_to_temp[old] == ts);
4444         if (old == reg) {
4445             return;
4446         }
4447         s->reg_to_temp[old] = NULL;
4448     }
4449     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4450     s->reg_to_temp[reg] = ts;
4451     ts->val_type = TEMP_VAL_REG;
4452     ts->reg = reg;
4453 }
4454 
4455 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4456 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4457 {
4458     tcg_debug_assert(type != TEMP_VAL_REG);
4459     if (ts->val_type == TEMP_VAL_REG) {
4460         TCGReg reg = ts->reg;
4461         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4462         s->reg_to_temp[reg] = NULL;
4463     }
4464     ts->val_type = type;
4465 }
4466 
4467 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4468 
4469 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4470    mark it free; otherwise mark it dead.  */
4471 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4472 {
4473     TCGTempVal new_type;
4474 
4475     switch (ts->kind) {
4476     case TEMP_FIXED:
4477         return;
4478     case TEMP_GLOBAL:
4479     case TEMP_TB:
4480         new_type = TEMP_VAL_MEM;
4481         break;
4482     case TEMP_EBB:
4483         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4484         break;
4485     case TEMP_CONST:
4486         new_type = TEMP_VAL_CONST;
4487         break;
4488     default:
4489         g_assert_not_reached();
4490     }
4491     set_temp_val_nonreg(s, ts, new_type);
4492 }
4493 
4494 /* Mark a temporary as dead.  */
4495 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4496 {
4497     temp_free_or_dead(s, ts, 1);
4498 }
4499 
4500 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4501    registers needs to be allocated to store a constant.  If 'free_or_dead'
4502    is non-zero, subsequently release the temporary; if it is positive, the
4503    temp is dead; if it is negative, the temp is free.  */
4504 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4505                       TCGRegSet preferred_regs, int free_or_dead)
4506 {
4507     if (!temp_readonly(ts) && !ts->mem_coherent) {
4508         if (!ts->mem_allocated) {
4509             temp_allocate_frame(s, ts);
4510         }
4511         switch (ts->val_type) {
4512         case TEMP_VAL_CONST:
4513             /* If we're going to free the temp immediately, then we won't
4514                require it later in a register, so attempt to store the
4515                constant to memory directly.  */
4516             if (free_or_dead
4517                 && tcg_out_sti(s, ts->type, ts->val,
4518                                ts->mem_base->reg, ts->mem_offset)) {
4519                 break;
4520             }
4521             temp_load(s, ts, tcg_target_available_regs[ts->type],
4522                       allocated_regs, preferred_regs);
4523             /* fallthrough */
4524 
4525         case TEMP_VAL_REG:
4526             tcg_out_st(s, ts->type, ts->reg,
4527                        ts->mem_base->reg, ts->mem_offset);
4528             break;
4529 
4530         case TEMP_VAL_MEM:
4531             break;
4532 
4533         case TEMP_VAL_DEAD:
4534         default:
4535             g_assert_not_reached();
4536         }
4537         ts->mem_coherent = 1;
4538     }
4539     if (free_or_dead) {
4540         temp_free_or_dead(s, ts, free_or_dead);
4541     }
4542 }
4543 
4544 /* free register 'reg' by spilling the corresponding temporary if necessary */
4545 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4546 {
4547     TCGTemp *ts = s->reg_to_temp[reg];
4548     if (ts != NULL) {
4549         temp_sync(s, ts, allocated_regs, 0, -1);
4550     }
4551 }
4552 
4553 /**
4554  * tcg_reg_alloc:
4555  * @required_regs: Set of registers in which we must allocate.
4556  * @allocated_regs: Set of registers which must be avoided.
4557  * @preferred_regs: Set of registers we should prefer.
4558  * @rev: True if we search the registers in "indirect" order.
4559  *
4560  * The allocated register must be in @required_regs & ~@allocated_regs,
4561  * but if we can put it in @preferred_regs we may save a move later.
4562  */
4563 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4564                             TCGRegSet allocated_regs,
4565                             TCGRegSet preferred_regs, bool rev)
4566 {
4567     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4568     TCGRegSet reg_ct[2];
4569     const int *order;
4570 
4571     reg_ct[1] = required_regs & ~allocated_regs;
4572     tcg_debug_assert(reg_ct[1] != 0);
4573     reg_ct[0] = reg_ct[1] & preferred_regs;
4574 
4575     /* Skip the preferred_regs option if it cannot be satisfied,
4576        or if the preference made no difference.  */
4577     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4578 
4579     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4580 
4581     /* Try free registers, preferences first.  */
4582     for (j = f; j < 2; j++) {
4583         TCGRegSet set = reg_ct[j];
4584 
4585         if (tcg_regset_single(set)) {
4586             /* One register in the set.  */
4587             TCGReg reg = tcg_regset_first(set);
4588             if (s->reg_to_temp[reg] == NULL) {
4589                 return reg;
4590             }
4591         } else {
4592             for (i = 0; i < n; i++) {
4593                 TCGReg reg = order[i];
4594                 if (s->reg_to_temp[reg] == NULL &&
4595                     tcg_regset_test_reg(set, reg)) {
4596                     return reg;
4597                 }
4598             }
4599         }
4600     }
4601 
4602     /* We must spill something.  */
4603     for (j = f; j < 2; j++) {
4604         TCGRegSet set = reg_ct[j];
4605 
4606         if (tcg_regset_single(set)) {
4607             /* One register in the set.  */
4608             TCGReg reg = tcg_regset_first(set);
4609             tcg_reg_free(s, reg, allocated_regs);
4610             return reg;
4611         } else {
4612             for (i = 0; i < n; i++) {
4613                 TCGReg reg = order[i];
4614                 if (tcg_regset_test_reg(set, reg)) {
4615                     tcg_reg_free(s, reg, allocated_regs);
4616                     return reg;
4617                 }
4618             }
4619         }
4620     }
4621 
4622     g_assert_not_reached();
4623 }
4624 
4625 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4626                                  TCGRegSet allocated_regs,
4627                                  TCGRegSet preferred_regs, bool rev)
4628 {
4629     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4630     TCGRegSet reg_ct[2];
4631     const int *order;
4632 
4633     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4634     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4635     tcg_debug_assert(reg_ct[1] != 0);
4636     reg_ct[0] = reg_ct[1] & preferred_regs;
4637 
4638     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4639 
4640     /*
4641      * Skip the preferred_regs option if it cannot be satisfied,
4642      * or if the preference made no difference.
4643      */
4644     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4645 
4646     /*
4647      * Minimize the number of flushes by looking for 2 free registers first,
4648      * then a single flush, then two flushes.
4649      */
4650     for (fmin = 2; fmin >= 0; fmin--) {
4651         for (j = k; j < 2; j++) {
4652             TCGRegSet set = reg_ct[j];
4653 
4654             for (i = 0; i < n; i++) {
4655                 TCGReg reg = order[i];
4656 
4657                 if (tcg_regset_test_reg(set, reg)) {
4658                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4659                     if (f >= fmin) {
4660                         tcg_reg_free(s, reg, allocated_regs);
4661                         tcg_reg_free(s, reg + 1, allocated_regs);
4662                         return reg;
4663                     }
4664                 }
4665             }
4666         }
4667     }
4668     g_assert_not_reached();
4669 }
4670 
4671 /* Make sure the temporary is in a register.  If needed, allocate the register
4672    from DESIRED while avoiding ALLOCATED.  */
4673 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4674                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4675 {
4676     TCGReg reg;
4677 
4678     switch (ts->val_type) {
4679     case TEMP_VAL_REG:
4680         return;
4681     case TEMP_VAL_CONST:
4682         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4683                             preferred_regs, ts->indirect_base);
4684         if (ts->type <= TCG_TYPE_I64) {
4685             tcg_out_movi(s, ts->type, reg, ts->val);
4686         } else {
4687             uint64_t val = ts->val;
4688             MemOp vece = MO_64;
4689 
4690             /*
4691              * Find the minimal vector element that matches the constant.
4692              * The targets will, in general, have to do this search anyway,
4693              * do this generically.
4694              */
4695             if (val == dup_const(MO_8, val)) {
4696                 vece = MO_8;
4697             } else if (val == dup_const(MO_16, val)) {
4698                 vece = MO_16;
4699             } else if (val == dup_const(MO_32, val)) {
4700                 vece = MO_32;
4701             }
4702 
4703             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4704         }
4705         ts->mem_coherent = 0;
4706         break;
4707     case TEMP_VAL_MEM:
4708         if (!ts->mem_allocated) {
4709             temp_allocate_frame(s, ts);
4710         }
4711         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4712                             preferred_regs, ts->indirect_base);
4713         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4714         ts->mem_coherent = 1;
4715         break;
4716     case TEMP_VAL_DEAD:
4717     default:
4718         g_assert_not_reached();
4719     }
4720     set_temp_val_reg(s, ts, reg);
4721 }
4722 
4723 /* Save a temporary to memory. 'allocated_regs' is used in case a
4724    temporary registers needs to be allocated to store a constant.  */
4725 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4726 {
4727     /* The liveness analysis already ensures that globals are back
4728        in memory. Keep an tcg_debug_assert for safety. */
4729     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4730 }
4731 
4732 /* save globals to their canonical location and assume they can be
4733    modified be the following code. 'allocated_regs' is used in case a
4734    temporary registers needs to be allocated to store a constant. */
4735 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4736 {
4737     int i, n;
4738 
4739     for (i = 0, n = s->nb_globals; i < n; i++) {
4740         temp_save(s, &s->temps[i], allocated_regs);
4741     }
4742 }
4743 
4744 /* sync globals to their canonical location and assume they can be
4745    read by the following code. 'allocated_regs' is used in case a
4746    temporary registers needs to be allocated to store a constant. */
4747 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4748 {
4749     int i, n;
4750 
4751     for (i = 0, n = s->nb_globals; i < n; i++) {
4752         TCGTemp *ts = &s->temps[i];
4753         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4754                          || ts->kind == TEMP_FIXED
4755                          || ts->mem_coherent);
4756     }
4757 }
4758 
4759 /* at the end of a basic block, we assume all temporaries are dead and
4760    all globals are stored at their canonical location. */
4761 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4762 {
4763     int i;
4764 
4765     for (i = s->nb_globals; i < s->nb_temps; i++) {
4766         TCGTemp *ts = &s->temps[i];
4767 
4768         switch (ts->kind) {
4769         case TEMP_TB:
4770             temp_save(s, ts, allocated_regs);
4771             break;
4772         case TEMP_EBB:
4773             /* The liveness analysis already ensures that temps are dead.
4774                Keep an tcg_debug_assert for safety. */
4775             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4776             break;
4777         case TEMP_CONST:
4778             /* Similarly, we should have freed any allocated register. */
4779             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4780             break;
4781         default:
4782             g_assert_not_reached();
4783         }
4784     }
4785 
4786     save_globals(s, allocated_regs);
4787 }
4788 
4789 /*
4790  * At a conditional branch, we assume all temporaries are dead unless
4791  * explicitly live-across-conditional-branch; all globals and local
4792  * temps are synced to their location.
4793  */
4794 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4795 {
4796     sync_globals(s, allocated_regs);
4797 
4798     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4799         TCGTemp *ts = &s->temps[i];
4800         /*
4801          * The liveness analysis already ensures that temps are dead.
4802          * Keep tcg_debug_asserts for safety.
4803          */
4804         switch (ts->kind) {
4805         case TEMP_TB:
4806             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4807             break;
4808         case TEMP_EBB:
4809         case TEMP_CONST:
4810             break;
4811         default:
4812             g_assert_not_reached();
4813         }
4814     }
4815 }
4816 
4817 /*
4818  * Specialized code generation for INDEX_op_mov_* with a constant.
4819  */
4820 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4821                                   tcg_target_ulong val, TCGLifeData arg_life,
4822                                   TCGRegSet preferred_regs)
4823 {
4824     /* ENV should not be modified.  */
4825     tcg_debug_assert(!temp_readonly(ots));
4826 
4827     /* The movi is not explicitly generated here.  */
4828     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4829     ots->val = val;
4830     ots->mem_coherent = 0;
4831     if (NEED_SYNC_ARG(0)) {
4832         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4833     } else if (IS_DEAD_ARG(0)) {
4834         temp_dead(s, ots);
4835     }
4836 }
4837 
4838 /*
4839  * Specialized code generation for INDEX_op_mov_*.
4840  */
4841 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4842 {
4843     const TCGLifeData arg_life = op->life;
4844     TCGRegSet allocated_regs, preferred_regs;
4845     TCGTemp *ts, *ots;
4846     TCGType otype, itype;
4847     TCGReg oreg, ireg;
4848 
4849     allocated_regs = s->reserved_regs;
4850     preferred_regs = output_pref(op, 0);
4851     ots = arg_temp(op->args[0]);
4852     ts = arg_temp(op->args[1]);
4853 
4854     /* ENV should not be modified.  */
4855     tcg_debug_assert(!temp_readonly(ots));
4856 
4857     /* Note that otype != itype for no-op truncation.  */
4858     otype = ots->type;
4859     itype = ts->type;
4860 
4861     if (ts->val_type == TEMP_VAL_CONST) {
4862         /* propagate constant or generate sti */
4863         tcg_target_ulong val = ts->val;
4864         if (IS_DEAD_ARG(1)) {
4865             temp_dead(s, ts);
4866         }
4867         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4868         return;
4869     }
4870 
4871     /* If the source value is in memory we're going to be forced
4872        to have it in a register in order to perform the copy.  Copy
4873        the SOURCE value into its own register first, that way we
4874        don't have to reload SOURCE the next time it is used. */
4875     if (ts->val_type == TEMP_VAL_MEM) {
4876         temp_load(s, ts, tcg_target_available_regs[itype],
4877                   allocated_regs, preferred_regs);
4878     }
4879     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4880     ireg = ts->reg;
4881 
4882     if (IS_DEAD_ARG(0)) {
4883         /* mov to a non-saved dead register makes no sense (even with
4884            liveness analysis disabled). */
4885         tcg_debug_assert(NEED_SYNC_ARG(0));
4886         if (!ots->mem_allocated) {
4887             temp_allocate_frame(s, ots);
4888         }
4889         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4890         if (IS_DEAD_ARG(1)) {
4891             temp_dead(s, ts);
4892         }
4893         temp_dead(s, ots);
4894         return;
4895     }
4896 
4897     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4898         /*
4899          * The mov can be suppressed.  Kill input first, so that it
4900          * is unlinked from reg_to_temp, then set the output to the
4901          * reg that we saved from the input.
4902          */
4903         temp_dead(s, ts);
4904         oreg = ireg;
4905     } else {
4906         if (ots->val_type == TEMP_VAL_REG) {
4907             oreg = ots->reg;
4908         } else {
4909             /* Make sure to not spill the input register during allocation. */
4910             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4911                                  allocated_regs | ((TCGRegSet)1 << ireg),
4912                                  preferred_regs, ots->indirect_base);
4913         }
4914         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4915             /*
4916              * Cross register class move not supported.
4917              * Store the source register into the destination slot
4918              * and leave the destination temp as TEMP_VAL_MEM.
4919              */
4920             assert(!temp_readonly(ots));
4921             if (!ts->mem_allocated) {
4922                 temp_allocate_frame(s, ots);
4923             }
4924             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4925             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4926             ots->mem_coherent = 1;
4927             return;
4928         }
4929     }
4930     set_temp_val_reg(s, ots, oreg);
4931     ots->mem_coherent = 0;
4932 
4933     if (NEED_SYNC_ARG(0)) {
4934         temp_sync(s, ots, allocated_regs, 0, 0);
4935     }
4936 }
4937 
4938 /*
4939  * Specialized code generation for INDEX_op_dup_vec.
4940  */
4941 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4942 {
4943     const TCGLifeData arg_life = op->life;
4944     TCGRegSet dup_out_regs, dup_in_regs;
4945     const TCGArgConstraint *dup_args_ct;
4946     TCGTemp *its, *ots;
4947     TCGType itype, vtype;
4948     unsigned vece;
4949     int lowpart_ofs;
4950     bool ok;
4951 
4952     ots = arg_temp(op->args[0]);
4953     its = arg_temp(op->args[1]);
4954 
4955     /* ENV should not be modified.  */
4956     tcg_debug_assert(!temp_readonly(ots));
4957 
4958     itype = its->type;
4959     vece = TCGOP_VECE(op);
4960     vtype = TCGOP_TYPE(op);
4961 
4962     if (its->val_type == TEMP_VAL_CONST) {
4963         /* Propagate constant via movi -> dupi.  */
4964         tcg_target_ulong val = its->val;
4965         if (IS_DEAD_ARG(1)) {
4966             temp_dead(s, its);
4967         }
4968         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4969         return;
4970     }
4971 
4972     dup_args_ct = opcode_args_ct(op);
4973     dup_out_regs = dup_args_ct[0].regs;
4974     dup_in_regs = dup_args_ct[1].regs;
4975 
4976     /* Allocate the output register now.  */
4977     if (ots->val_type != TEMP_VAL_REG) {
4978         TCGRegSet allocated_regs = s->reserved_regs;
4979         TCGReg oreg;
4980 
4981         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4982             /* Make sure to not spill the input register. */
4983             tcg_regset_set_reg(allocated_regs, its->reg);
4984         }
4985         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4986                              output_pref(op, 0), ots->indirect_base);
4987         set_temp_val_reg(s, ots, oreg);
4988     }
4989 
4990     switch (its->val_type) {
4991     case TEMP_VAL_REG:
4992         /*
4993          * The dup constriaints must be broad, covering all possible VECE.
4994          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4995          * to fail, indicating that extra moves are required for that case.
4996          */
4997         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4998             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4999                 goto done;
5000             }
5001             /* Try again from memory or a vector input register.  */
5002         }
5003         if (!its->mem_coherent) {
5004             /*
5005              * The input register is not synced, and so an extra store
5006              * would be required to use memory.  Attempt an integer-vector
5007              * register move first.  We do not have a TCGRegSet for this.
5008              */
5009             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5010                 break;
5011             }
5012             /* Sync the temp back to its slot and load from there.  */
5013             temp_sync(s, its, s->reserved_regs, 0, 0);
5014         }
5015         /* fall through */
5016 
5017     case TEMP_VAL_MEM:
5018         lowpart_ofs = 0;
5019         if (HOST_BIG_ENDIAN) {
5020             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5021         }
5022         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5023                              its->mem_offset + lowpart_ofs)) {
5024             goto done;
5025         }
5026         /* Load the input into the destination vector register. */
5027         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5028         break;
5029 
5030     default:
5031         g_assert_not_reached();
5032     }
5033 
5034     /* We now have a vector input register, so dup must succeed. */
5035     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5036     tcg_debug_assert(ok);
5037 
5038  done:
5039     ots->mem_coherent = 0;
5040     if (IS_DEAD_ARG(1)) {
5041         temp_dead(s, its);
5042     }
5043     if (NEED_SYNC_ARG(0)) {
5044         temp_sync(s, ots, s->reserved_regs, 0, 0);
5045     }
5046     if (IS_DEAD_ARG(0)) {
5047         temp_dead(s, ots);
5048     }
5049 }
5050 
5051 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5052 {
5053     const TCGLifeData arg_life = op->life;
5054     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5055     TCGRegSet i_allocated_regs;
5056     TCGRegSet o_allocated_regs;
5057     int i, k, nb_iargs, nb_oargs;
5058     TCGReg reg;
5059     TCGArg arg;
5060     const TCGArgConstraint *args_ct;
5061     const TCGArgConstraint *arg_ct;
5062     TCGTemp *ts;
5063     TCGArg new_args[TCG_MAX_OP_ARGS];
5064     int const_args[TCG_MAX_OP_ARGS];
5065     TCGCond op_cond;
5066 
5067     nb_oargs = def->nb_oargs;
5068     nb_iargs = def->nb_iargs;
5069 
5070     /* copy constants */
5071     memcpy(new_args + nb_oargs + nb_iargs,
5072            op->args + nb_oargs + nb_iargs,
5073            sizeof(TCGArg) * def->nb_cargs);
5074 
5075     i_allocated_regs = s->reserved_regs;
5076     o_allocated_regs = s->reserved_regs;
5077 
5078     switch (op->opc) {
5079     case INDEX_op_brcond_i32:
5080     case INDEX_op_brcond_i64:
5081         op_cond = op->args[2];
5082         break;
5083     case INDEX_op_setcond_i32:
5084     case INDEX_op_setcond_i64:
5085     case INDEX_op_negsetcond_i32:
5086     case INDEX_op_negsetcond_i64:
5087     case INDEX_op_cmp_vec:
5088         op_cond = op->args[3];
5089         break;
5090     case INDEX_op_brcond2_i32:
5091         op_cond = op->args[4];
5092         break;
5093     case INDEX_op_movcond_i32:
5094     case INDEX_op_movcond_i64:
5095     case INDEX_op_setcond2_i32:
5096     case INDEX_op_cmpsel_vec:
5097         op_cond = op->args[5];
5098         break;
5099     default:
5100         /* No condition within opcode. */
5101         op_cond = TCG_COND_ALWAYS;
5102         break;
5103     }
5104 
5105     args_ct = opcode_args_ct(op);
5106 
5107     /* satisfy input constraints */
5108     for (k = 0; k < nb_iargs; k++) {
5109         TCGRegSet i_preferred_regs, i_required_regs;
5110         bool allocate_new_reg, copyto_new_reg;
5111         TCGTemp *ts2;
5112         int i1, i2;
5113 
5114         i = args_ct[nb_oargs + k].sort_index;
5115         arg = op->args[i];
5116         arg_ct = &args_ct[i];
5117         ts = arg_temp(arg);
5118 
5119         if (ts->val_type == TEMP_VAL_CONST) {
5120 #ifdef TCG_REG_ZERO
5121             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5122                 /* Hardware zero register: indicate register via non-const. */
5123                 const_args[i] = 0;
5124                 new_args[i] = TCG_REG_ZERO;
5125                 continue;
5126             }
5127 #endif
5128 
5129             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5130                                        op_cond, TCGOP_VECE(op))) {
5131                 /* constant is OK for instruction */
5132                 const_args[i] = 1;
5133                 new_args[i] = ts->val;
5134                 continue;
5135             }
5136         }
5137 
5138         reg = ts->reg;
5139         i_preferred_regs = 0;
5140         i_required_regs = arg_ct->regs;
5141         allocate_new_reg = false;
5142         copyto_new_reg = false;
5143 
5144         switch (arg_ct->pair) {
5145         case 0: /* not paired */
5146             if (arg_ct->ialias) {
5147                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5148 
5149                 /*
5150                  * If the input is readonly, then it cannot also be an
5151                  * output and aliased to itself.  If the input is not
5152                  * dead after the instruction, we must allocate a new
5153                  * register and move it.
5154                  */
5155                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5156                     || args_ct[arg_ct->alias_index].newreg) {
5157                     allocate_new_reg = true;
5158                 } else if (ts->val_type == TEMP_VAL_REG) {
5159                     /*
5160                      * Check if the current register has already been
5161                      * allocated for another input.
5162                      */
5163                     allocate_new_reg =
5164                         tcg_regset_test_reg(i_allocated_regs, reg);
5165                 }
5166             }
5167             if (!allocate_new_reg) {
5168                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5169                           i_preferred_regs);
5170                 reg = ts->reg;
5171                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5172             }
5173             if (allocate_new_reg) {
5174                 /*
5175                  * Allocate a new register matching the constraint
5176                  * and move the temporary register into it.
5177                  */
5178                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5179                           i_allocated_regs, 0);
5180                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5181                                     i_preferred_regs, ts->indirect_base);
5182                 copyto_new_reg = true;
5183             }
5184             break;
5185 
5186         case 1:
5187             /* First of an input pair; if i1 == i2, the second is an output. */
5188             i1 = i;
5189             i2 = arg_ct->pair_index;
5190             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5191 
5192             /*
5193              * It is easier to default to allocating a new pair
5194              * and to identify a few cases where it's not required.
5195              */
5196             if (arg_ct->ialias) {
5197                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5198                 if (IS_DEAD_ARG(i1) &&
5199                     IS_DEAD_ARG(i2) &&
5200                     !temp_readonly(ts) &&
5201                     ts->val_type == TEMP_VAL_REG &&
5202                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5203                     tcg_regset_test_reg(i_required_regs, reg) &&
5204                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5205                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5206                     (ts2
5207                      ? ts2->val_type == TEMP_VAL_REG &&
5208                        ts2->reg == reg + 1 &&
5209                        !temp_readonly(ts2)
5210                      : s->reg_to_temp[reg + 1] == NULL)) {
5211                     break;
5212                 }
5213             } else {
5214                 /* Without aliasing, the pair must also be an input. */
5215                 tcg_debug_assert(ts2);
5216                 if (ts->val_type == TEMP_VAL_REG &&
5217                     ts2->val_type == TEMP_VAL_REG &&
5218                     ts2->reg == reg + 1 &&
5219                     tcg_regset_test_reg(i_required_regs, reg)) {
5220                     break;
5221                 }
5222             }
5223             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5224                                      0, ts->indirect_base);
5225             goto do_pair;
5226 
5227         case 2: /* pair second */
5228             reg = new_args[arg_ct->pair_index] + 1;
5229             goto do_pair;
5230 
5231         case 3: /* ialias with second output, no first input */
5232             tcg_debug_assert(arg_ct->ialias);
5233             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5234 
5235             if (IS_DEAD_ARG(i) &&
5236                 !temp_readonly(ts) &&
5237                 ts->val_type == TEMP_VAL_REG &&
5238                 reg > 0 &&
5239                 s->reg_to_temp[reg - 1] == NULL &&
5240                 tcg_regset_test_reg(i_required_regs, reg) &&
5241                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5242                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5243                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5244                 break;
5245             }
5246             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5247                                      i_allocated_regs, 0,
5248                                      ts->indirect_base);
5249             tcg_regset_set_reg(i_allocated_regs, reg);
5250             reg += 1;
5251             goto do_pair;
5252 
5253         do_pair:
5254             /*
5255              * If an aliased input is not dead after the instruction,
5256              * we must allocate a new register and move it.
5257              */
5258             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5259                 TCGRegSet t_allocated_regs = i_allocated_regs;
5260 
5261                 /*
5262                  * Because of the alias, and the continued life, make sure
5263                  * that the temp is somewhere *other* than the reg pair,
5264                  * and we get a copy in reg.
5265                  */
5266                 tcg_regset_set_reg(t_allocated_regs, reg);
5267                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5268                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5269                     /* If ts was already in reg, copy it somewhere else. */
5270                     TCGReg nr;
5271                     bool ok;
5272 
5273                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5274                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5275                                        t_allocated_regs, 0, ts->indirect_base);
5276                     ok = tcg_out_mov(s, ts->type, nr, reg);
5277                     tcg_debug_assert(ok);
5278 
5279                     set_temp_val_reg(s, ts, nr);
5280                 } else {
5281                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5282                               t_allocated_regs, 0);
5283                     copyto_new_reg = true;
5284                 }
5285             } else {
5286                 /* Preferably allocate to reg, otherwise copy. */
5287                 i_required_regs = (TCGRegSet)1 << reg;
5288                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5289                           i_preferred_regs);
5290                 copyto_new_reg = ts->reg != reg;
5291             }
5292             break;
5293 
5294         default:
5295             g_assert_not_reached();
5296         }
5297 
5298         if (copyto_new_reg) {
5299             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5300                 /*
5301                  * Cross register class move not supported.  Sync the
5302                  * temp back to its slot and load from there.
5303                  */
5304                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5305                 tcg_out_ld(s, ts->type, reg,
5306                            ts->mem_base->reg, ts->mem_offset);
5307             }
5308         }
5309         new_args[i] = reg;
5310         const_args[i] = 0;
5311         tcg_regset_set_reg(i_allocated_regs, reg);
5312     }
5313 
5314     /* mark dead temporaries and free the associated registers */
5315     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5316         if (IS_DEAD_ARG(i)) {
5317             temp_dead(s, arg_temp(op->args[i]));
5318         }
5319     }
5320 
5321     if (def->flags & TCG_OPF_COND_BRANCH) {
5322         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5323     } else if (def->flags & TCG_OPF_BB_END) {
5324         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5325     } else {
5326         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5327             /* XXX: permit generic clobber register list ? */
5328             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5329                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5330                     tcg_reg_free(s, i, i_allocated_regs);
5331                 }
5332             }
5333         }
5334         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5335             /* sync globals if the op has side effects and might trigger
5336                an exception. */
5337             sync_globals(s, i_allocated_regs);
5338         }
5339 
5340         /* satisfy the output constraints */
5341         for (k = 0; k < nb_oargs; k++) {
5342             i = args_ct[k].sort_index;
5343             arg = op->args[i];
5344             arg_ct = &args_ct[i];
5345             ts = arg_temp(arg);
5346 
5347             /* ENV should not be modified.  */
5348             tcg_debug_assert(!temp_readonly(ts));
5349 
5350             switch (arg_ct->pair) {
5351             case 0: /* not paired */
5352                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5353                     reg = new_args[arg_ct->alias_index];
5354                 } else if (arg_ct->newreg) {
5355                     reg = tcg_reg_alloc(s, arg_ct->regs,
5356                                         i_allocated_regs | o_allocated_regs,
5357                                         output_pref(op, k), ts->indirect_base);
5358                 } else {
5359                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5360                                         output_pref(op, k), ts->indirect_base);
5361                 }
5362                 break;
5363 
5364             case 1: /* first of pair */
5365                 if (arg_ct->oalias) {
5366                     reg = new_args[arg_ct->alias_index];
5367                 } else if (arg_ct->newreg) {
5368                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5369                                              i_allocated_regs | o_allocated_regs,
5370                                              output_pref(op, k),
5371                                              ts->indirect_base);
5372                 } else {
5373                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5374                                              output_pref(op, k),
5375                                              ts->indirect_base);
5376                 }
5377                 break;
5378 
5379             case 2: /* second of pair */
5380                 if (arg_ct->oalias) {
5381                     reg = new_args[arg_ct->alias_index];
5382                 } else {
5383                     reg = new_args[arg_ct->pair_index] + 1;
5384                 }
5385                 break;
5386 
5387             case 3: /* first of pair, aliasing with a second input */
5388                 tcg_debug_assert(!arg_ct->newreg);
5389                 reg = new_args[arg_ct->pair_index] - 1;
5390                 break;
5391 
5392             default:
5393                 g_assert_not_reached();
5394             }
5395             tcg_regset_set_reg(o_allocated_regs, reg);
5396             set_temp_val_reg(s, ts, reg);
5397             ts->mem_coherent = 0;
5398             new_args[i] = reg;
5399         }
5400     }
5401 
5402     /* emit instruction */
5403     TCGType type = TCGOP_TYPE(op);
5404     switch (op->opc) {
5405     case INDEX_op_ext_i32_i64:
5406         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5407         break;
5408     case INDEX_op_extu_i32_i64:
5409         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5410         break;
5411     case INDEX_op_extrl_i64_i32:
5412         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5413         break;
5414 
5415     case INDEX_op_add:
5416     case INDEX_op_and:
5417     case INDEX_op_andc:
5418     case INDEX_op_divs:
5419     case INDEX_op_divu:
5420     case INDEX_op_eqv:
5421     case INDEX_op_mul:
5422     case INDEX_op_mulsh:
5423     case INDEX_op_muluh:
5424     case INDEX_op_nand:
5425     case INDEX_op_nor:
5426     case INDEX_op_or:
5427     case INDEX_op_orc:
5428     case INDEX_op_xor:
5429         {
5430             const TCGOutOpBinary *out =
5431                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5432 
5433             /* Constants should never appear in the first source operand. */
5434             tcg_debug_assert(!const_args[1]);
5435             if (const_args[2]) {
5436                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5437             } else {
5438                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5439             }
5440         }
5441         break;
5442 
5443     case INDEX_op_sub:
5444         {
5445             const TCGOutOpSubtract *out = &outop_sub;
5446 
5447             /*
5448              * Constants should never appear in the second source operand.
5449              * These are folded to add with negative constant.
5450              */
5451             tcg_debug_assert(!const_args[2]);
5452             if (const_args[1]) {
5453                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5454             } else {
5455                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5456             }
5457         }
5458         break;
5459 
5460     case INDEX_op_neg:
5461     case INDEX_op_not:
5462         {
5463             const TCGOutOpUnary *out =
5464                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5465 
5466             /* Constants should have been folded. */
5467             tcg_debug_assert(!const_args[1]);
5468             out->out_rr(s, type, new_args[0], new_args[1]);
5469         }
5470         break;
5471 
5472     case INDEX_op_divs2:
5473     case INDEX_op_divu2:
5474         {
5475             const TCGOutOpDivRem *out =
5476                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5477 
5478             /* Only used by x86 and s390x, which use matching constraints. */
5479             tcg_debug_assert(new_args[0] == new_args[2]);
5480             tcg_debug_assert(new_args[1] == new_args[3]);
5481             tcg_debug_assert(!const_args[4]);
5482             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5483         }
5484         break;
5485 
5486     default:
5487         if (def->flags & TCG_OPF_VECTOR) {
5488             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5489                            TCGOP_VECE(op), new_args, const_args);
5490         } else {
5491             tcg_out_op(s, op->opc, type, new_args, const_args);
5492         }
5493         break;
5494     }
5495 
5496     /* move the outputs in the correct register if needed */
5497     for(i = 0; i < nb_oargs; i++) {
5498         ts = arg_temp(op->args[i]);
5499 
5500         /* ENV should not be modified.  */
5501         tcg_debug_assert(!temp_readonly(ts));
5502 
5503         if (NEED_SYNC_ARG(i)) {
5504             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5505         } else if (IS_DEAD_ARG(i)) {
5506             temp_dead(s, ts);
5507         }
5508     }
5509 }
5510 
5511 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5512 {
5513     const TCGLifeData arg_life = op->life;
5514     TCGTemp *ots, *itsl, *itsh;
5515     TCGType vtype = TCGOP_TYPE(op);
5516 
5517     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5518     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5519     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5520 
5521     ots = arg_temp(op->args[0]);
5522     itsl = arg_temp(op->args[1]);
5523     itsh = arg_temp(op->args[2]);
5524 
5525     /* ENV should not be modified.  */
5526     tcg_debug_assert(!temp_readonly(ots));
5527 
5528     /* Allocate the output register now.  */
5529     if (ots->val_type != TEMP_VAL_REG) {
5530         TCGRegSet allocated_regs = s->reserved_regs;
5531         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5532         TCGReg oreg;
5533 
5534         /* Make sure to not spill the input registers. */
5535         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5536             tcg_regset_set_reg(allocated_regs, itsl->reg);
5537         }
5538         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5539             tcg_regset_set_reg(allocated_regs, itsh->reg);
5540         }
5541 
5542         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5543                              output_pref(op, 0), ots->indirect_base);
5544         set_temp_val_reg(s, ots, oreg);
5545     }
5546 
5547     /* Promote dup2 of immediates to dupi_vec. */
5548     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5549         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5550         MemOp vece = MO_64;
5551 
5552         if (val == dup_const(MO_8, val)) {
5553             vece = MO_8;
5554         } else if (val == dup_const(MO_16, val)) {
5555             vece = MO_16;
5556         } else if (val == dup_const(MO_32, val)) {
5557             vece = MO_32;
5558         }
5559 
5560         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5561         goto done;
5562     }
5563 
5564     /* If the two inputs form one 64-bit value, try dupm_vec. */
5565     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5566         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5567         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5568         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5569 
5570         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5571         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5572 
5573         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5574                              its->mem_base->reg, its->mem_offset)) {
5575             goto done;
5576         }
5577     }
5578 
5579     /* Fall back to generic expansion. */
5580     return false;
5581 
5582  done:
5583     ots->mem_coherent = 0;
5584     if (IS_DEAD_ARG(1)) {
5585         temp_dead(s, itsl);
5586     }
5587     if (IS_DEAD_ARG(2)) {
5588         temp_dead(s, itsh);
5589     }
5590     if (NEED_SYNC_ARG(0)) {
5591         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5592     } else if (IS_DEAD_ARG(0)) {
5593         temp_dead(s, ots);
5594     }
5595     return true;
5596 }
5597 
5598 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5599                          TCGRegSet allocated_regs)
5600 {
5601     if (ts->val_type == TEMP_VAL_REG) {
5602         if (ts->reg != reg) {
5603             tcg_reg_free(s, reg, allocated_regs);
5604             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5605                 /*
5606                  * Cross register class move not supported.  Sync the
5607                  * temp back to its slot and load from there.
5608                  */
5609                 temp_sync(s, ts, allocated_regs, 0, 0);
5610                 tcg_out_ld(s, ts->type, reg,
5611                            ts->mem_base->reg, ts->mem_offset);
5612             }
5613         }
5614     } else {
5615         TCGRegSet arg_set = 0;
5616 
5617         tcg_reg_free(s, reg, allocated_regs);
5618         tcg_regset_set_reg(arg_set, reg);
5619         temp_load(s, ts, arg_set, allocated_regs, 0);
5620     }
5621 }
5622 
5623 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5624                          TCGRegSet allocated_regs)
5625 {
5626     /*
5627      * When the destination is on the stack, load up the temp and store.
5628      * If there are many call-saved registers, the temp might live to
5629      * see another use; otherwise it'll be discarded.
5630      */
5631     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5632     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5633                arg_slot_stk_ofs(arg_slot));
5634 }
5635 
5636 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5637                             TCGTemp *ts, TCGRegSet *allocated_regs)
5638 {
5639     if (arg_slot_reg_p(l->arg_slot)) {
5640         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5641         load_arg_reg(s, reg, ts, *allocated_regs);
5642         tcg_regset_set_reg(*allocated_regs, reg);
5643     } else {
5644         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5645     }
5646 }
5647 
5648 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5649                          intptr_t ref_off, TCGRegSet *allocated_regs)
5650 {
5651     TCGReg reg;
5652 
5653     if (arg_slot_reg_p(arg_slot)) {
5654         reg = tcg_target_call_iarg_regs[arg_slot];
5655         tcg_reg_free(s, reg, *allocated_regs);
5656         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5657         tcg_regset_set_reg(*allocated_regs, reg);
5658     } else {
5659         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5660                             *allocated_regs, 0, false);
5661         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5662         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5663                    arg_slot_stk_ofs(arg_slot));
5664     }
5665 }
5666 
5667 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5668 {
5669     const int nb_oargs = TCGOP_CALLO(op);
5670     const int nb_iargs = TCGOP_CALLI(op);
5671     const TCGLifeData arg_life = op->life;
5672     const TCGHelperInfo *info = tcg_call_info(op);
5673     TCGRegSet allocated_regs = s->reserved_regs;
5674     int i;
5675 
5676     /*
5677      * Move inputs into place in reverse order,
5678      * so that we place stacked arguments first.
5679      */
5680     for (i = nb_iargs - 1; i >= 0; --i) {
5681         const TCGCallArgumentLoc *loc = &info->in[i];
5682         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5683 
5684         switch (loc->kind) {
5685         case TCG_CALL_ARG_NORMAL:
5686         case TCG_CALL_ARG_EXTEND_U:
5687         case TCG_CALL_ARG_EXTEND_S:
5688             load_arg_normal(s, loc, ts, &allocated_regs);
5689             break;
5690         case TCG_CALL_ARG_BY_REF:
5691             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5692             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5693                          arg_slot_stk_ofs(loc->ref_slot),
5694                          &allocated_regs);
5695             break;
5696         case TCG_CALL_ARG_BY_REF_N:
5697             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5698             break;
5699         default:
5700             g_assert_not_reached();
5701         }
5702     }
5703 
5704     /* Mark dead temporaries and free the associated registers.  */
5705     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5706         if (IS_DEAD_ARG(i)) {
5707             temp_dead(s, arg_temp(op->args[i]));
5708         }
5709     }
5710 
5711     /* Clobber call registers.  */
5712     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5713         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5714             tcg_reg_free(s, i, allocated_regs);
5715         }
5716     }
5717 
5718     /*
5719      * Save globals if they might be written by the helper,
5720      * sync them if they might be read.
5721      */
5722     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5723         /* Nothing to do */
5724     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5725         sync_globals(s, allocated_regs);
5726     } else {
5727         save_globals(s, allocated_regs);
5728     }
5729 
5730     /*
5731      * If the ABI passes a pointer to the returned struct as the first
5732      * argument, load that now.  Pass a pointer to the output home slot.
5733      */
5734     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5735         TCGTemp *ts = arg_temp(op->args[0]);
5736 
5737         if (!ts->mem_allocated) {
5738             temp_allocate_frame(s, ts);
5739         }
5740         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5741     }
5742 
5743     tcg_out_call(s, tcg_call_func(op), info);
5744 
5745     /* Assign output registers and emit moves if needed.  */
5746     switch (info->out_kind) {
5747     case TCG_CALL_RET_NORMAL:
5748         for (i = 0; i < nb_oargs; i++) {
5749             TCGTemp *ts = arg_temp(op->args[i]);
5750             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5751 
5752             /* ENV should not be modified.  */
5753             tcg_debug_assert(!temp_readonly(ts));
5754 
5755             set_temp_val_reg(s, ts, reg);
5756             ts->mem_coherent = 0;
5757         }
5758         break;
5759 
5760     case TCG_CALL_RET_BY_VEC:
5761         {
5762             TCGTemp *ts = arg_temp(op->args[0]);
5763 
5764             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5765             tcg_debug_assert(ts->temp_subindex == 0);
5766             if (!ts->mem_allocated) {
5767                 temp_allocate_frame(s, ts);
5768             }
5769             tcg_out_st(s, TCG_TYPE_V128,
5770                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5771                        ts->mem_base->reg, ts->mem_offset);
5772         }
5773         /* fall through to mark all parts in memory */
5774 
5775     case TCG_CALL_RET_BY_REF:
5776         /* The callee has performed a write through the reference. */
5777         for (i = 0; i < nb_oargs; i++) {
5778             TCGTemp *ts = arg_temp(op->args[i]);
5779             ts->val_type = TEMP_VAL_MEM;
5780         }
5781         break;
5782 
5783     default:
5784         g_assert_not_reached();
5785     }
5786 
5787     /* Flush or discard output registers as needed. */
5788     for (i = 0; i < nb_oargs; i++) {
5789         TCGTemp *ts = arg_temp(op->args[i]);
5790         if (NEED_SYNC_ARG(i)) {
5791             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5792         } else if (IS_DEAD_ARG(i)) {
5793             temp_dead(s, ts);
5794         }
5795     }
5796 }
5797 
5798 /**
5799  * atom_and_align_for_opc:
5800  * @s: tcg context
5801  * @opc: memory operation code
5802  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5803  * @allow_two_ops: true if we are prepared to issue two operations
5804  *
5805  * Return the alignment and atomicity to use for the inline fast path
5806  * for the given memory operation.  The alignment may be larger than
5807  * that specified in @opc, and the correct alignment will be diagnosed
5808  * by the slow path helper.
5809  *
5810  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5811  * and issue two loads or stores for subalignment.
5812  */
5813 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5814                                            MemOp host_atom, bool allow_two_ops)
5815 {
5816     MemOp align = memop_alignment_bits(opc);
5817     MemOp size = opc & MO_SIZE;
5818     MemOp half = size ? size - 1 : 0;
5819     MemOp atom = opc & MO_ATOM_MASK;
5820     MemOp atmax;
5821 
5822     switch (atom) {
5823     case MO_ATOM_NONE:
5824         /* The operation requires no specific atomicity. */
5825         atmax = MO_8;
5826         break;
5827 
5828     case MO_ATOM_IFALIGN:
5829         atmax = size;
5830         break;
5831 
5832     case MO_ATOM_IFALIGN_PAIR:
5833         atmax = half;
5834         break;
5835 
5836     case MO_ATOM_WITHIN16:
5837         atmax = size;
5838         if (size == MO_128) {
5839             /* Misalignment implies !within16, and therefore no atomicity. */
5840         } else if (host_atom != MO_ATOM_WITHIN16) {
5841             /* The host does not implement within16, so require alignment. */
5842             align = MAX(align, size);
5843         }
5844         break;
5845 
5846     case MO_ATOM_WITHIN16_PAIR:
5847         atmax = size;
5848         /*
5849          * Misalignment implies !within16, and therefore half atomicity.
5850          * Any host prepared for two operations can implement this with
5851          * half alignment.
5852          */
5853         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5854             align = MAX(align, half);
5855         }
5856         break;
5857 
5858     case MO_ATOM_SUBALIGN:
5859         atmax = size;
5860         if (host_atom != MO_ATOM_SUBALIGN) {
5861             /* If unaligned but not odd, there are subobjects up to half. */
5862             if (allow_two_ops) {
5863                 align = MAX(align, half);
5864             } else {
5865                 align = MAX(align, size);
5866             }
5867         }
5868         break;
5869 
5870     default:
5871         g_assert_not_reached();
5872     }
5873 
5874     return (TCGAtomAlign){ .atom = atmax, .align = align };
5875 }
5876 
5877 /*
5878  * Similarly for qemu_ld/st slow path helpers.
5879  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5880  * using only the provided backend tcg_out_* functions.
5881  */
5882 
5883 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5884 {
5885     int ofs = arg_slot_stk_ofs(slot);
5886 
5887     /*
5888      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5889      * require extension to uint64_t, adjust the address for uint32_t.
5890      */
5891     if (HOST_BIG_ENDIAN &&
5892         TCG_TARGET_REG_BITS == 64 &&
5893         type == TCG_TYPE_I32) {
5894         ofs += 4;
5895     }
5896     return ofs;
5897 }
5898 
5899 static void tcg_out_helper_load_slots(TCGContext *s,
5900                                       unsigned nmov, TCGMovExtend *mov,
5901                                       const TCGLdstHelperParam *parm)
5902 {
5903     unsigned i;
5904     TCGReg dst3;
5905 
5906     /*
5907      * Start from the end, storing to the stack first.
5908      * This frees those registers, so we need not consider overlap.
5909      */
5910     for (i = nmov; i-- > 0; ) {
5911         unsigned slot = mov[i].dst;
5912 
5913         if (arg_slot_reg_p(slot)) {
5914             goto found_reg;
5915         }
5916 
5917         TCGReg src = mov[i].src;
5918         TCGType dst_type = mov[i].dst_type;
5919         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5920 
5921         /* The argument is going onto the stack; extend into scratch. */
5922         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5923             tcg_debug_assert(parm->ntmp != 0);
5924             mov[i].dst = src = parm->tmp[0];
5925             tcg_out_movext1(s, &mov[i]);
5926         }
5927 
5928         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5929                    tcg_out_helper_stk_ofs(dst_type, slot));
5930     }
5931     return;
5932 
5933  found_reg:
5934     /*
5935      * The remaining arguments are in registers.
5936      * Convert slot numbers to argument registers.
5937      */
5938     nmov = i + 1;
5939     for (i = 0; i < nmov; ++i) {
5940         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5941     }
5942 
5943     switch (nmov) {
5944     case 4:
5945         /* The backend must have provided enough temps for the worst case. */
5946         tcg_debug_assert(parm->ntmp >= 2);
5947 
5948         dst3 = mov[3].dst;
5949         for (unsigned j = 0; j < 3; ++j) {
5950             if (dst3 == mov[j].src) {
5951                 /*
5952                  * Conflict. Copy the source to a temporary, perform the
5953                  * remaining moves, then the extension from our scratch
5954                  * on the way out.
5955                  */
5956                 TCGReg scratch = parm->tmp[1];
5957 
5958                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5959                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5960                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5961                 break;
5962             }
5963         }
5964 
5965         /* No conflicts: perform this move and continue. */
5966         tcg_out_movext1(s, &mov[3]);
5967         /* fall through */
5968 
5969     case 3:
5970         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5971                         parm->ntmp ? parm->tmp[0] : -1);
5972         break;
5973     case 2:
5974         tcg_out_movext2(s, mov, mov + 1,
5975                         parm->ntmp ? parm->tmp[0] : -1);
5976         break;
5977     case 1:
5978         tcg_out_movext1(s, mov);
5979         break;
5980     default:
5981         g_assert_not_reached();
5982     }
5983 }
5984 
5985 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5986                                     TCGType type, tcg_target_long imm,
5987                                     const TCGLdstHelperParam *parm)
5988 {
5989     if (arg_slot_reg_p(slot)) {
5990         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5991     } else {
5992         int ofs = tcg_out_helper_stk_ofs(type, slot);
5993         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5994             tcg_debug_assert(parm->ntmp != 0);
5995             tcg_out_movi(s, type, parm->tmp[0], imm);
5996             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5997         }
5998     }
5999 }
6000 
6001 static void tcg_out_helper_load_common_args(TCGContext *s,
6002                                             const TCGLabelQemuLdst *ldst,
6003                                             const TCGLdstHelperParam *parm,
6004                                             const TCGHelperInfo *info,
6005                                             unsigned next_arg)
6006 {
6007     TCGMovExtend ptr_mov = {
6008         .dst_type = TCG_TYPE_PTR,
6009         .src_type = TCG_TYPE_PTR,
6010         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6011     };
6012     const TCGCallArgumentLoc *loc = &info->in[0];
6013     TCGType type;
6014     unsigned slot;
6015     tcg_target_ulong imm;
6016 
6017     /*
6018      * Handle env, which is always first.
6019      */
6020     ptr_mov.dst = loc->arg_slot;
6021     ptr_mov.src = TCG_AREG0;
6022     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6023 
6024     /*
6025      * Handle oi.
6026      */
6027     imm = ldst->oi;
6028     loc = &info->in[next_arg];
6029     type = TCG_TYPE_I32;
6030     switch (loc->kind) {
6031     case TCG_CALL_ARG_NORMAL:
6032         break;
6033     case TCG_CALL_ARG_EXTEND_U:
6034     case TCG_CALL_ARG_EXTEND_S:
6035         /* No extension required for MemOpIdx. */
6036         tcg_debug_assert(imm <= INT32_MAX);
6037         type = TCG_TYPE_REG;
6038         break;
6039     default:
6040         g_assert_not_reached();
6041     }
6042     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6043     next_arg++;
6044 
6045     /*
6046      * Handle ra.
6047      */
6048     loc = &info->in[next_arg];
6049     slot = loc->arg_slot;
6050     if (parm->ra_gen) {
6051         int arg_reg = -1;
6052         TCGReg ra_reg;
6053 
6054         if (arg_slot_reg_p(slot)) {
6055             arg_reg = tcg_target_call_iarg_regs[slot];
6056         }
6057         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6058 
6059         ptr_mov.dst = slot;
6060         ptr_mov.src = ra_reg;
6061         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6062     } else {
6063         imm = (uintptr_t)ldst->raddr;
6064         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6065     }
6066 }
6067 
6068 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6069                                        const TCGCallArgumentLoc *loc,
6070                                        TCGType dst_type, TCGType src_type,
6071                                        TCGReg lo, TCGReg hi)
6072 {
6073     MemOp reg_mo;
6074 
6075     if (dst_type <= TCG_TYPE_REG) {
6076         MemOp src_ext;
6077 
6078         switch (loc->kind) {
6079         case TCG_CALL_ARG_NORMAL:
6080             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6081             break;
6082         case TCG_CALL_ARG_EXTEND_U:
6083             dst_type = TCG_TYPE_REG;
6084             src_ext = MO_UL;
6085             break;
6086         case TCG_CALL_ARG_EXTEND_S:
6087             dst_type = TCG_TYPE_REG;
6088             src_ext = MO_SL;
6089             break;
6090         default:
6091             g_assert_not_reached();
6092         }
6093 
6094         mov[0].dst = loc->arg_slot;
6095         mov[0].dst_type = dst_type;
6096         mov[0].src = lo;
6097         mov[0].src_type = src_type;
6098         mov[0].src_ext = src_ext;
6099         return 1;
6100     }
6101 
6102     if (TCG_TARGET_REG_BITS == 32) {
6103         assert(dst_type == TCG_TYPE_I64);
6104         reg_mo = MO_32;
6105     } else {
6106         assert(dst_type == TCG_TYPE_I128);
6107         reg_mo = MO_64;
6108     }
6109 
6110     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6111     mov[0].src = lo;
6112     mov[0].dst_type = TCG_TYPE_REG;
6113     mov[0].src_type = TCG_TYPE_REG;
6114     mov[0].src_ext = reg_mo;
6115 
6116     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6117     mov[1].src = hi;
6118     mov[1].dst_type = TCG_TYPE_REG;
6119     mov[1].src_type = TCG_TYPE_REG;
6120     mov[1].src_ext = reg_mo;
6121 
6122     return 2;
6123 }
6124 
6125 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6126                                    const TCGLdstHelperParam *parm)
6127 {
6128     const TCGHelperInfo *info;
6129     const TCGCallArgumentLoc *loc;
6130     TCGMovExtend mov[2];
6131     unsigned next_arg, nmov;
6132     MemOp mop = get_memop(ldst->oi);
6133 
6134     switch (mop & MO_SIZE) {
6135     case MO_8:
6136     case MO_16:
6137     case MO_32:
6138         info = &info_helper_ld32_mmu;
6139         break;
6140     case MO_64:
6141         info = &info_helper_ld64_mmu;
6142         break;
6143     case MO_128:
6144         info = &info_helper_ld128_mmu;
6145         break;
6146     default:
6147         g_assert_not_reached();
6148     }
6149 
6150     /* Defer env argument. */
6151     next_arg = 1;
6152 
6153     loc = &info->in[next_arg];
6154     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6155         /*
6156          * 32-bit host with 32-bit guest: zero-extend the guest address
6157          * to 64-bits for the helper by storing the low part, then
6158          * load a zero for the high part.
6159          */
6160         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6161                                TCG_TYPE_I32, TCG_TYPE_I32,
6162                                ldst->addr_reg, -1);
6163         tcg_out_helper_load_slots(s, 1, mov, parm);
6164 
6165         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6166                                 TCG_TYPE_I32, 0, parm);
6167         next_arg += 2;
6168     } else {
6169         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6170                                       ldst->addr_reg, -1);
6171         tcg_out_helper_load_slots(s, nmov, mov, parm);
6172         next_arg += nmov;
6173     }
6174 
6175     switch (info->out_kind) {
6176     case TCG_CALL_RET_NORMAL:
6177     case TCG_CALL_RET_BY_VEC:
6178         break;
6179     case TCG_CALL_RET_BY_REF:
6180         /*
6181          * The return reference is in the first argument slot.
6182          * We need memory in which to return: re-use the top of stack.
6183          */
6184         {
6185             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6186 
6187             if (arg_slot_reg_p(0)) {
6188                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6189                                  TCG_REG_CALL_STACK, ofs_slot0);
6190             } else {
6191                 tcg_debug_assert(parm->ntmp != 0);
6192                 tcg_out_addi_ptr(s, parm->tmp[0],
6193                                  TCG_REG_CALL_STACK, ofs_slot0);
6194                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6195                            TCG_REG_CALL_STACK, ofs_slot0);
6196             }
6197         }
6198         break;
6199     default:
6200         g_assert_not_reached();
6201     }
6202 
6203     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6204 }
6205 
6206 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6207                                   bool load_sign,
6208                                   const TCGLdstHelperParam *parm)
6209 {
6210     MemOp mop = get_memop(ldst->oi);
6211     TCGMovExtend mov[2];
6212     int ofs_slot0;
6213 
6214     switch (ldst->type) {
6215     case TCG_TYPE_I64:
6216         if (TCG_TARGET_REG_BITS == 32) {
6217             break;
6218         }
6219         /* fall through */
6220 
6221     case TCG_TYPE_I32:
6222         mov[0].dst = ldst->datalo_reg;
6223         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6224         mov[0].dst_type = ldst->type;
6225         mov[0].src_type = TCG_TYPE_REG;
6226 
6227         /*
6228          * If load_sign, then we allowed the helper to perform the
6229          * appropriate sign extension to tcg_target_ulong, and all
6230          * we need now is a plain move.
6231          *
6232          * If they do not, then we expect the relevant extension
6233          * instruction to be no more expensive than a move, and
6234          * we thus save the icache etc by only using one of two
6235          * helper functions.
6236          */
6237         if (load_sign || !(mop & MO_SIGN)) {
6238             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6239                 mov[0].src_ext = MO_32;
6240             } else {
6241                 mov[0].src_ext = MO_64;
6242             }
6243         } else {
6244             mov[0].src_ext = mop & MO_SSIZE;
6245         }
6246         tcg_out_movext1(s, mov);
6247         return;
6248 
6249     case TCG_TYPE_I128:
6250         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6251         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6252         switch (TCG_TARGET_CALL_RET_I128) {
6253         case TCG_CALL_RET_NORMAL:
6254             break;
6255         case TCG_CALL_RET_BY_VEC:
6256             tcg_out_st(s, TCG_TYPE_V128,
6257                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6258                        TCG_REG_CALL_STACK, ofs_slot0);
6259             /* fall through */
6260         case TCG_CALL_RET_BY_REF:
6261             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6262                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6263             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6264                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6265             return;
6266         default:
6267             g_assert_not_reached();
6268         }
6269         break;
6270 
6271     default:
6272         g_assert_not_reached();
6273     }
6274 
6275     mov[0].dst = ldst->datalo_reg;
6276     mov[0].src =
6277         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6278     mov[0].dst_type = TCG_TYPE_REG;
6279     mov[0].src_type = TCG_TYPE_REG;
6280     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6281 
6282     mov[1].dst = ldst->datahi_reg;
6283     mov[1].src =
6284         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6285     mov[1].dst_type = TCG_TYPE_REG;
6286     mov[1].src_type = TCG_TYPE_REG;
6287     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6288 
6289     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6290 }
6291 
6292 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6293                                    const TCGLdstHelperParam *parm)
6294 {
6295     const TCGHelperInfo *info;
6296     const TCGCallArgumentLoc *loc;
6297     TCGMovExtend mov[4];
6298     TCGType data_type;
6299     unsigned next_arg, nmov, n;
6300     MemOp mop = get_memop(ldst->oi);
6301 
6302     switch (mop & MO_SIZE) {
6303     case MO_8:
6304     case MO_16:
6305     case MO_32:
6306         info = &info_helper_st32_mmu;
6307         data_type = TCG_TYPE_I32;
6308         break;
6309     case MO_64:
6310         info = &info_helper_st64_mmu;
6311         data_type = TCG_TYPE_I64;
6312         break;
6313     case MO_128:
6314         info = &info_helper_st128_mmu;
6315         data_type = TCG_TYPE_I128;
6316         break;
6317     default:
6318         g_assert_not_reached();
6319     }
6320 
6321     /* Defer env argument. */
6322     next_arg = 1;
6323     nmov = 0;
6324 
6325     /* Handle addr argument. */
6326     loc = &info->in[next_arg];
6327     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6328     if (TCG_TARGET_REG_BITS == 32) {
6329         /*
6330          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6331          * to 64-bits for the helper by storing the low part.  Later,
6332          * after we have processed the register inputs, we will load a
6333          * zero for the high part.
6334          */
6335         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6336                                TCG_TYPE_I32, TCG_TYPE_I32,
6337                                ldst->addr_reg, -1);
6338         next_arg += 2;
6339         nmov += 1;
6340     } else {
6341         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6342                                    ldst->addr_reg, -1);
6343         next_arg += n;
6344         nmov += n;
6345     }
6346 
6347     /* Handle data argument. */
6348     loc = &info->in[next_arg];
6349     switch (loc->kind) {
6350     case TCG_CALL_ARG_NORMAL:
6351     case TCG_CALL_ARG_EXTEND_U:
6352     case TCG_CALL_ARG_EXTEND_S:
6353         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6354                                    ldst->datalo_reg, ldst->datahi_reg);
6355         next_arg += n;
6356         nmov += n;
6357         tcg_out_helper_load_slots(s, nmov, mov, parm);
6358         break;
6359 
6360     case TCG_CALL_ARG_BY_REF:
6361         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6362         tcg_debug_assert(data_type == TCG_TYPE_I128);
6363         tcg_out_st(s, TCG_TYPE_I64,
6364                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6365                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6366         tcg_out_st(s, TCG_TYPE_I64,
6367                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6368                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6369 
6370         tcg_out_helper_load_slots(s, nmov, mov, parm);
6371 
6372         if (arg_slot_reg_p(loc->arg_slot)) {
6373             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6374                              TCG_REG_CALL_STACK,
6375                              arg_slot_stk_ofs(loc->ref_slot));
6376         } else {
6377             tcg_debug_assert(parm->ntmp != 0);
6378             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6379                              arg_slot_stk_ofs(loc->ref_slot));
6380             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6381                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6382         }
6383         next_arg += 2;
6384         break;
6385 
6386     default:
6387         g_assert_not_reached();
6388     }
6389 
6390     if (TCG_TARGET_REG_BITS == 32) {
6391         /* Zero extend the address by loading a zero for the high part. */
6392         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6393         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6394     }
6395 
6396     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6397 }
6398 
6399 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6400 {
6401     int i, start_words, num_insns;
6402     TCGOp *op;
6403 
6404     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6405                  && qemu_log_in_addr_range(pc_start))) {
6406         FILE *logfile = qemu_log_trylock();
6407         if (logfile) {
6408             fprintf(logfile, "OP:\n");
6409             tcg_dump_ops(s, logfile, false);
6410             fprintf(logfile, "\n");
6411             qemu_log_unlock(logfile);
6412         }
6413     }
6414 
6415 #ifdef CONFIG_DEBUG_TCG
6416     /* Ensure all labels referenced have been emitted.  */
6417     {
6418         TCGLabel *l;
6419         bool error = false;
6420 
6421         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6422             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6423                 qemu_log_mask(CPU_LOG_TB_OP,
6424                               "$L%d referenced but not present.\n", l->id);
6425                 error = true;
6426             }
6427         }
6428         assert(!error);
6429     }
6430 #endif
6431 
6432     /* Do not reuse any EBB that may be allocated within the TB. */
6433     tcg_temp_ebb_reset_freed(s);
6434 
6435     tcg_optimize(s);
6436 
6437     reachable_code_pass(s);
6438     liveness_pass_0(s);
6439     liveness_pass_1(s);
6440 
6441     if (s->nb_indirects > 0) {
6442         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6443                      && qemu_log_in_addr_range(pc_start))) {
6444             FILE *logfile = qemu_log_trylock();
6445             if (logfile) {
6446                 fprintf(logfile, "OP before indirect lowering:\n");
6447                 tcg_dump_ops(s, logfile, false);
6448                 fprintf(logfile, "\n");
6449                 qemu_log_unlock(logfile);
6450             }
6451         }
6452 
6453         /* Replace indirect temps with direct temps.  */
6454         if (liveness_pass_2(s)) {
6455             /* If changes were made, re-run liveness.  */
6456             liveness_pass_1(s);
6457         }
6458     }
6459 
6460     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6461                  && qemu_log_in_addr_range(pc_start))) {
6462         FILE *logfile = qemu_log_trylock();
6463         if (logfile) {
6464             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6465             tcg_dump_ops(s, logfile, true);
6466             fprintf(logfile, "\n");
6467             qemu_log_unlock(logfile);
6468         }
6469     }
6470 
6471     /* Initialize goto_tb jump offsets. */
6472     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6473     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6474     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6475     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6476 
6477     tcg_reg_alloc_start(s);
6478 
6479     /*
6480      * Reset the buffer pointers when restarting after overflow.
6481      * TODO: Move this into translate-all.c with the rest of the
6482      * buffer management.  Having only this done here is confusing.
6483      */
6484     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6485     s->code_ptr = s->code_buf;
6486     s->data_gen_ptr = NULL;
6487 
6488     QSIMPLEQ_INIT(&s->ldst_labels);
6489     s->pool_labels = NULL;
6490 
6491     start_words = s->insn_start_words;
6492     s->gen_insn_data =
6493         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6494 
6495     tcg_out_tb_start(s);
6496 
6497     num_insns = -1;
6498     QTAILQ_FOREACH(op, &s->ops, link) {
6499         TCGOpcode opc = op->opc;
6500 
6501         switch (opc) {
6502         case INDEX_op_mov:
6503         case INDEX_op_mov_vec:
6504             tcg_reg_alloc_mov(s, op);
6505             break;
6506         case INDEX_op_dup_vec:
6507             tcg_reg_alloc_dup(s, op);
6508             break;
6509         case INDEX_op_insn_start:
6510             if (num_insns >= 0) {
6511                 size_t off = tcg_current_code_size(s);
6512                 s->gen_insn_end_off[num_insns] = off;
6513                 /* Assert that we do not overflow our stored offset.  */
6514                 assert(s->gen_insn_end_off[num_insns] == off);
6515             }
6516             num_insns++;
6517             for (i = 0; i < start_words; ++i) {
6518                 s->gen_insn_data[num_insns * start_words + i] =
6519                     tcg_get_insn_start_param(op, i);
6520             }
6521             break;
6522         case INDEX_op_discard:
6523             temp_dead(s, arg_temp(op->args[0]));
6524             break;
6525         case INDEX_op_set_label:
6526             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6527             tcg_out_label(s, arg_label(op->args[0]));
6528             break;
6529         case INDEX_op_call:
6530             tcg_reg_alloc_call(s, op);
6531             break;
6532         case INDEX_op_exit_tb:
6533             tcg_out_exit_tb(s, op->args[0]);
6534             break;
6535         case INDEX_op_goto_tb:
6536             tcg_out_goto_tb(s, op->args[0]);
6537             break;
6538         case INDEX_op_dup2_vec:
6539             if (tcg_reg_alloc_dup2(s, op)) {
6540                 break;
6541             }
6542             /* fall through */
6543         default:
6544             /* Sanity check that we've not introduced any unhandled opcodes. */
6545             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6546                                               TCGOP_FLAGS(op)));
6547             /* Note: in order to speed up the code, it would be much
6548                faster to have specialized register allocator functions for
6549                some common argument patterns */
6550             tcg_reg_alloc_op(s, op);
6551             break;
6552         }
6553         /* Test for (pending) buffer overflow.  The assumption is that any
6554            one operation beginning below the high water mark cannot overrun
6555            the buffer completely.  Thus we can test for overflow after
6556            generating code without having to check during generation.  */
6557         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6558             return -1;
6559         }
6560         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6561         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6562             return -2;
6563         }
6564     }
6565     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6566     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6567 
6568     /* Generate TB finalization at the end of block */
6569     i = tcg_out_ldst_finalize(s);
6570     if (i < 0) {
6571         return i;
6572     }
6573     i = tcg_out_pool_finalize(s);
6574     if (i < 0) {
6575         return i;
6576     }
6577     if (!tcg_resolve_relocs(s)) {
6578         return -2;
6579     }
6580 
6581 #ifndef CONFIG_TCG_INTERPRETER
6582     /* flush instruction cache */
6583     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6584                         (uintptr_t)s->code_buf,
6585                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6586 #endif
6587 
6588     return tcg_current_code_size(s);
6589 }
6590 
6591 #ifdef ELF_HOST_MACHINE
6592 /* In order to use this feature, the backend needs to do three things:
6593 
6594    (1) Define ELF_HOST_MACHINE to indicate both what value to
6595        put into the ELF image and to indicate support for the feature.
6596 
6597    (2) Define tcg_register_jit.  This should create a buffer containing
6598        the contents of a .debug_frame section that describes the post-
6599        prologue unwind info for the tcg machine.
6600 
6601    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6602 */
6603 
6604 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6605 typedef enum {
6606     JIT_NOACTION = 0,
6607     JIT_REGISTER_FN,
6608     JIT_UNREGISTER_FN
6609 } jit_actions_t;
6610 
6611 struct jit_code_entry {
6612     struct jit_code_entry *next_entry;
6613     struct jit_code_entry *prev_entry;
6614     const void *symfile_addr;
6615     uint64_t symfile_size;
6616 };
6617 
6618 struct jit_descriptor {
6619     uint32_t version;
6620     uint32_t action_flag;
6621     struct jit_code_entry *relevant_entry;
6622     struct jit_code_entry *first_entry;
6623 };
6624 
6625 void __jit_debug_register_code(void) __attribute__((noinline));
6626 void __jit_debug_register_code(void)
6627 {
6628     asm("");
6629 }
6630 
6631 /* Must statically initialize the version, because GDB may check
6632    the version before we can set it.  */
6633 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6634 
6635 /* End GDB interface.  */
6636 
6637 static int find_string(const char *strtab, const char *str)
6638 {
6639     const char *p = strtab + 1;
6640 
6641     while (1) {
6642         if (strcmp(p, str) == 0) {
6643             return p - strtab;
6644         }
6645         p += strlen(p) + 1;
6646     }
6647 }
6648 
6649 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6650                                  const void *debug_frame,
6651                                  size_t debug_frame_size)
6652 {
6653     struct __attribute__((packed)) DebugInfo {
6654         uint32_t  len;
6655         uint16_t  version;
6656         uint32_t  abbrev;
6657         uint8_t   ptr_size;
6658         uint8_t   cu_die;
6659         uint16_t  cu_lang;
6660         uintptr_t cu_low_pc;
6661         uintptr_t cu_high_pc;
6662         uint8_t   fn_die;
6663         char      fn_name[16];
6664         uintptr_t fn_low_pc;
6665         uintptr_t fn_high_pc;
6666         uint8_t   cu_eoc;
6667     };
6668 
6669     struct ElfImage {
6670         ElfW(Ehdr) ehdr;
6671         ElfW(Phdr) phdr;
6672         ElfW(Shdr) shdr[7];
6673         ElfW(Sym)  sym[2];
6674         struct DebugInfo di;
6675         uint8_t    da[24];
6676         char       str[80];
6677     };
6678 
6679     struct ElfImage *img;
6680 
6681     static const struct ElfImage img_template = {
6682         .ehdr = {
6683             .e_ident[EI_MAG0] = ELFMAG0,
6684             .e_ident[EI_MAG1] = ELFMAG1,
6685             .e_ident[EI_MAG2] = ELFMAG2,
6686             .e_ident[EI_MAG3] = ELFMAG3,
6687             .e_ident[EI_CLASS] = ELF_CLASS,
6688             .e_ident[EI_DATA] = ELF_DATA,
6689             .e_ident[EI_VERSION] = EV_CURRENT,
6690             .e_type = ET_EXEC,
6691             .e_machine = ELF_HOST_MACHINE,
6692             .e_version = EV_CURRENT,
6693             .e_phoff = offsetof(struct ElfImage, phdr),
6694             .e_shoff = offsetof(struct ElfImage, shdr),
6695             .e_ehsize = sizeof(ElfW(Shdr)),
6696             .e_phentsize = sizeof(ElfW(Phdr)),
6697             .e_phnum = 1,
6698             .e_shentsize = sizeof(ElfW(Shdr)),
6699             .e_shnum = ARRAY_SIZE(img->shdr),
6700             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6701 #ifdef ELF_HOST_FLAGS
6702             .e_flags = ELF_HOST_FLAGS,
6703 #endif
6704 #ifdef ELF_OSABI
6705             .e_ident[EI_OSABI] = ELF_OSABI,
6706 #endif
6707         },
6708         .phdr = {
6709             .p_type = PT_LOAD,
6710             .p_flags = PF_X,
6711         },
6712         .shdr = {
6713             [0] = { .sh_type = SHT_NULL },
6714             /* Trick: The contents of code_gen_buffer are not present in
6715                this fake ELF file; that got allocated elsewhere.  Therefore
6716                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6717                will not look for contents.  We can record any address.  */
6718             [1] = { /* .text */
6719                 .sh_type = SHT_NOBITS,
6720                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6721             },
6722             [2] = { /* .debug_info */
6723                 .sh_type = SHT_PROGBITS,
6724                 .sh_offset = offsetof(struct ElfImage, di),
6725                 .sh_size = sizeof(struct DebugInfo),
6726             },
6727             [3] = { /* .debug_abbrev */
6728                 .sh_type = SHT_PROGBITS,
6729                 .sh_offset = offsetof(struct ElfImage, da),
6730                 .sh_size = sizeof(img->da),
6731             },
6732             [4] = { /* .debug_frame */
6733                 .sh_type = SHT_PROGBITS,
6734                 .sh_offset = sizeof(struct ElfImage),
6735             },
6736             [5] = { /* .symtab */
6737                 .sh_type = SHT_SYMTAB,
6738                 .sh_offset = offsetof(struct ElfImage, sym),
6739                 .sh_size = sizeof(img->sym),
6740                 .sh_info = 1,
6741                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6742                 .sh_entsize = sizeof(ElfW(Sym)),
6743             },
6744             [6] = { /* .strtab */
6745                 .sh_type = SHT_STRTAB,
6746                 .sh_offset = offsetof(struct ElfImage, str),
6747                 .sh_size = sizeof(img->str),
6748             }
6749         },
6750         .sym = {
6751             [1] = { /* code_gen_buffer */
6752                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6753                 .st_shndx = 1,
6754             }
6755         },
6756         .di = {
6757             .len = sizeof(struct DebugInfo) - 4,
6758             .version = 2,
6759             .ptr_size = sizeof(void *),
6760             .cu_die = 1,
6761             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6762             .fn_die = 2,
6763             .fn_name = "code_gen_buffer"
6764         },
6765         .da = {
6766             1,          /* abbrev number (the cu) */
6767             0x11, 1,    /* DW_TAG_compile_unit, has children */
6768             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6769             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6770             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6771             0, 0,       /* end of abbrev */
6772             2,          /* abbrev number (the fn) */
6773             0x2e, 0,    /* DW_TAG_subprogram, no children */
6774             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6775             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6776             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6777             0, 0,       /* end of abbrev */
6778             0           /* no more abbrev */
6779         },
6780         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6781                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6782     };
6783 
6784     /* We only need a single jit entry; statically allocate it.  */
6785     static struct jit_code_entry one_entry;
6786 
6787     uintptr_t buf = (uintptr_t)buf_ptr;
6788     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6789     DebugFrameHeader *dfh;
6790 
6791     img = g_malloc(img_size);
6792     *img = img_template;
6793 
6794     img->phdr.p_vaddr = buf;
6795     img->phdr.p_paddr = buf;
6796     img->phdr.p_memsz = buf_size;
6797 
6798     img->shdr[1].sh_name = find_string(img->str, ".text");
6799     img->shdr[1].sh_addr = buf;
6800     img->shdr[1].sh_size = buf_size;
6801 
6802     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6803     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6804 
6805     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6806     img->shdr[4].sh_size = debug_frame_size;
6807 
6808     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6809     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6810 
6811     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6812     img->sym[1].st_value = buf;
6813     img->sym[1].st_size = buf_size;
6814 
6815     img->di.cu_low_pc = buf;
6816     img->di.cu_high_pc = buf + buf_size;
6817     img->di.fn_low_pc = buf;
6818     img->di.fn_high_pc = buf + buf_size;
6819 
6820     dfh = (DebugFrameHeader *)(img + 1);
6821     memcpy(dfh, debug_frame, debug_frame_size);
6822     dfh->fde.func_start = buf;
6823     dfh->fde.func_len = buf_size;
6824 
6825 #ifdef DEBUG_JIT
6826     /* Enable this block to be able to debug the ELF image file creation.
6827        One can use readelf, objdump, or other inspection utilities.  */
6828     {
6829         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6830         FILE *f = fopen(jit, "w+b");
6831         if (f) {
6832             if (fwrite(img, img_size, 1, f) != img_size) {
6833                 /* Avoid stupid unused return value warning for fwrite.  */
6834             }
6835             fclose(f);
6836         }
6837     }
6838 #endif
6839 
6840     one_entry.symfile_addr = img;
6841     one_entry.symfile_size = img_size;
6842 
6843     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6844     __jit_debug_descriptor.relevant_entry = &one_entry;
6845     __jit_debug_descriptor.first_entry = &one_entry;
6846     __jit_debug_register_code();
6847 }
6848 #else
6849 /* No support for the feature.  Provide the entry point expected by exec.c,
6850    and implement the internal function we declared earlier.  */
6851 
6852 static void tcg_register_jit_int(const void *buf, size_t size,
6853                                  const void *debug_frame,
6854                                  size_t debug_frame_size)
6855 {
6856 }
6857 
6858 void tcg_register_jit(const void *buf, size_t buf_size)
6859 {
6860 }
6861 #endif /* ELF_HOST_MACHINE */
6862 
6863 #if !TCG_TARGET_MAYBE_vec
6864 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6865 {
6866     g_assert_not_reached();
6867 }
6868 #endif
6869