xref: /openbmc/qemu/tcg/tcg.c (revision 005a87e148dc20f59835b328336240759703d63d)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpDivRem {
990     TCGOutOp base;
991     void (*out_rr01r)(TCGContext *s, TCGType type,
992                       TCGReg a0, TCGReg a1, TCGReg a4);
993 } TCGOutOpDivRem;
994 
995 typedef struct TCGOutOpUnary {
996     TCGOutOp base;
997     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
998 } TCGOutOpUnary;
999 
1000 typedef struct TCGOutOpSubtract {
1001     TCGOutOp base;
1002     void (*out_rrr)(TCGContext *s, TCGType type,
1003                     TCGReg a0, TCGReg a1, TCGReg a2);
1004     void (*out_rir)(TCGContext *s, TCGType type,
1005                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1006 } TCGOutOpSubtract;
1007 
1008 #include "tcg-target.c.inc"
1009 
1010 #ifndef CONFIG_TCG_INTERPRETER
1011 /* Validate CPUTLBDescFast placement. */
1012 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1013                         sizeof(CPUNegativeOffsetState))
1014                   < MIN_TLB_MASK_TABLE_OFS);
1015 #endif
1016 
1017 /*
1018  * Register V as the TCGOutOp for O.
1019  * This verifies that V is of type T, otherwise give a nice compiler error.
1020  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1021  */
1022 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1023 
1024 /* Register allocation descriptions for every TCGOpcode. */
1025 static const TCGOutOp * const all_outop[NB_OPS] = {
1026     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1027     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1028     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1029     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1030     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1031     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1032     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1033     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1034     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1035     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1036     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1037     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1038     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1039     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1040     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1041     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1042     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1043     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1044     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1045     OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl),
1046     OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr),
1047     OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1048     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1049     OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1050     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1051     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1052 };
1053 
1054 #undef OUTOP
1055 
1056 /*
1057  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1058  * and registered the target's TCG globals) must register with this function
1059  * before initiating translation.
1060  *
1061  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1062  * of tcg_region_init() for the reasoning behind this.
1063  *
1064  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1065  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1066  * is not used anymore for translation once this function is called.
1067  *
1068  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1069  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1070  * modes.
1071  */
1072 #ifdef CONFIG_USER_ONLY
1073 void tcg_register_thread(void)
1074 {
1075     tcg_ctx = &tcg_init_ctx;
1076 }
1077 #else
1078 void tcg_register_thread(void)
1079 {
1080     TCGContext *s = g_malloc(sizeof(*s));
1081     unsigned int i, n;
1082 
1083     *s = tcg_init_ctx;
1084 
1085     /* Relink mem_base.  */
1086     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1087         if (tcg_init_ctx.temps[i].mem_base) {
1088             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1089             tcg_debug_assert(b >= 0 && b < n);
1090             s->temps[i].mem_base = &s->temps[b];
1091         }
1092     }
1093 
1094     /* Claim an entry in tcg_ctxs */
1095     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1096     g_assert(n < tcg_max_ctxs);
1097     qatomic_set(&tcg_ctxs[n], s);
1098 
1099     if (n > 0) {
1100         tcg_region_initial_alloc(s);
1101     }
1102 
1103     tcg_ctx = s;
1104 }
1105 #endif /* !CONFIG_USER_ONLY */
1106 
1107 /* pool based memory allocation */
1108 void *tcg_malloc_internal(TCGContext *s, int size)
1109 {
1110     TCGPool *p;
1111     int pool_size;
1112 
1113     if (size > TCG_POOL_CHUNK_SIZE) {
1114         /* big malloc: insert a new pool (XXX: could optimize) */
1115         p = g_malloc(sizeof(TCGPool) + size);
1116         p->size = size;
1117         p->next = s->pool_first_large;
1118         s->pool_first_large = p;
1119         return p->data;
1120     } else {
1121         p = s->pool_current;
1122         if (!p) {
1123             p = s->pool_first;
1124             if (!p)
1125                 goto new_pool;
1126         } else {
1127             if (!p->next) {
1128             new_pool:
1129                 pool_size = TCG_POOL_CHUNK_SIZE;
1130                 p = g_malloc(sizeof(TCGPool) + pool_size);
1131                 p->size = pool_size;
1132                 p->next = NULL;
1133                 if (s->pool_current) {
1134                     s->pool_current->next = p;
1135                 } else {
1136                     s->pool_first = p;
1137                 }
1138             } else {
1139                 p = p->next;
1140             }
1141         }
1142     }
1143     s->pool_current = p;
1144     s->pool_cur = p->data + size;
1145     s->pool_end = p->data + p->size;
1146     return p->data;
1147 }
1148 
1149 void tcg_pool_reset(TCGContext *s)
1150 {
1151     TCGPool *p, *t;
1152     for (p = s->pool_first_large; p; p = t) {
1153         t = p->next;
1154         g_free(p);
1155     }
1156     s->pool_first_large = NULL;
1157     s->pool_cur = s->pool_end = NULL;
1158     s->pool_current = NULL;
1159 }
1160 
1161 /*
1162  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1163  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1164  * We only use these for layout in tcg_out_ld_helper_ret and
1165  * tcg_out_st_helper_args, and share them between several of
1166  * the helpers, with the end result that it's easier to build manually.
1167  */
1168 
1169 #if TCG_TARGET_REG_BITS == 32
1170 # define dh_typecode_ttl  dh_typecode_i32
1171 #else
1172 # define dh_typecode_ttl  dh_typecode_i64
1173 #endif
1174 
1175 static TCGHelperInfo info_helper_ld32_mmu = {
1176     .flags = TCG_CALL_NO_WG,
1177     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1178               | dh_typemask(env, 1)
1179               | dh_typemask(i64, 2)  /* uint64_t addr */
1180               | dh_typemask(i32, 3)  /* unsigned oi */
1181               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1182 };
1183 
1184 static TCGHelperInfo info_helper_ld64_mmu = {
1185     .flags = TCG_CALL_NO_WG,
1186     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1187               | dh_typemask(env, 1)
1188               | dh_typemask(i64, 2)  /* uint64_t addr */
1189               | dh_typemask(i32, 3)  /* unsigned oi */
1190               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1191 };
1192 
1193 static TCGHelperInfo info_helper_ld128_mmu = {
1194     .flags = TCG_CALL_NO_WG,
1195     .typemask = dh_typemask(i128, 0) /* return Int128 */
1196               | dh_typemask(env, 1)
1197               | dh_typemask(i64, 2)  /* uint64_t addr */
1198               | dh_typemask(i32, 3)  /* unsigned oi */
1199               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1200 };
1201 
1202 static TCGHelperInfo info_helper_st32_mmu = {
1203     .flags = TCG_CALL_NO_WG,
1204     .typemask = dh_typemask(void, 0)
1205               | dh_typemask(env, 1)
1206               | dh_typemask(i64, 2)  /* uint64_t addr */
1207               | dh_typemask(i32, 3)  /* uint32_t data */
1208               | dh_typemask(i32, 4)  /* unsigned oi */
1209               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1210 };
1211 
1212 static TCGHelperInfo info_helper_st64_mmu = {
1213     .flags = TCG_CALL_NO_WG,
1214     .typemask = dh_typemask(void, 0)
1215               | dh_typemask(env, 1)
1216               | dh_typemask(i64, 2)  /* uint64_t addr */
1217               | dh_typemask(i64, 3)  /* uint64_t data */
1218               | dh_typemask(i32, 4)  /* unsigned oi */
1219               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1220 };
1221 
1222 static TCGHelperInfo info_helper_st128_mmu = {
1223     .flags = TCG_CALL_NO_WG,
1224     .typemask = dh_typemask(void, 0)
1225               | dh_typemask(env, 1)
1226               | dh_typemask(i64, 2)  /* uint64_t addr */
1227               | dh_typemask(i128, 3) /* Int128 data */
1228               | dh_typemask(i32, 4)  /* unsigned oi */
1229               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1230 };
1231 
1232 #ifdef CONFIG_TCG_INTERPRETER
1233 static ffi_type *typecode_to_ffi(int argmask)
1234 {
1235     /*
1236      * libffi does not support __int128_t, so we have forced Int128
1237      * to use the structure definition instead of the builtin type.
1238      */
1239     static ffi_type *ffi_type_i128_elements[3] = {
1240         &ffi_type_uint64,
1241         &ffi_type_uint64,
1242         NULL
1243     };
1244     static ffi_type ffi_type_i128 = {
1245         .size = 16,
1246         .alignment = __alignof__(Int128),
1247         .type = FFI_TYPE_STRUCT,
1248         .elements = ffi_type_i128_elements,
1249     };
1250 
1251     switch (argmask) {
1252     case dh_typecode_void:
1253         return &ffi_type_void;
1254     case dh_typecode_i32:
1255         return &ffi_type_uint32;
1256     case dh_typecode_s32:
1257         return &ffi_type_sint32;
1258     case dh_typecode_i64:
1259         return &ffi_type_uint64;
1260     case dh_typecode_s64:
1261         return &ffi_type_sint64;
1262     case dh_typecode_ptr:
1263         return &ffi_type_pointer;
1264     case dh_typecode_i128:
1265         return &ffi_type_i128;
1266     }
1267     g_assert_not_reached();
1268 }
1269 
1270 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1271 {
1272     unsigned typemask = info->typemask;
1273     struct {
1274         ffi_cif cif;
1275         ffi_type *args[];
1276     } *ca;
1277     ffi_status status;
1278     int nargs;
1279 
1280     /* Ignoring the return type, find the last non-zero field. */
1281     nargs = 32 - clz32(typemask >> 3);
1282     nargs = DIV_ROUND_UP(nargs, 3);
1283     assert(nargs <= MAX_CALL_IARGS);
1284 
1285     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1286     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1287     ca->cif.nargs = nargs;
1288 
1289     if (nargs != 0) {
1290         ca->cif.arg_types = ca->args;
1291         for (int j = 0; j < nargs; ++j) {
1292             int typecode = extract32(typemask, (j + 1) * 3, 3);
1293             ca->args[j] = typecode_to_ffi(typecode);
1294         }
1295     }
1296 
1297     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1298                           ca->cif.rtype, ca->cif.arg_types);
1299     assert(status == FFI_OK);
1300 
1301     return &ca->cif;
1302 }
1303 
1304 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1305 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1306 #else
1307 #define HELPER_INFO_INIT(I)      (&(I)->init)
1308 #define HELPER_INFO_INIT_VAL(I)  1
1309 #endif /* CONFIG_TCG_INTERPRETER */
1310 
1311 static inline bool arg_slot_reg_p(unsigned arg_slot)
1312 {
1313     /*
1314      * Split the sizeof away from the comparison to avoid Werror from
1315      * "unsigned < 0 is always false", when iarg_regs is empty.
1316      */
1317     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1318     return arg_slot < nreg;
1319 }
1320 
1321 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1322 {
1323     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1324     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1325 
1326     tcg_debug_assert(stk_slot < max);
1327     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1328 }
1329 
1330 typedef struct TCGCumulativeArgs {
1331     int arg_idx;                /* tcg_gen_callN args[] */
1332     int info_in_idx;            /* TCGHelperInfo in[] */
1333     int arg_slot;               /* regs+stack slot */
1334     int ref_slot;               /* stack slots for references */
1335 } TCGCumulativeArgs;
1336 
1337 static void layout_arg_even(TCGCumulativeArgs *cum)
1338 {
1339     cum->arg_slot += cum->arg_slot & 1;
1340 }
1341 
1342 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1343                          TCGCallArgumentKind kind)
1344 {
1345     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1346 
1347     *loc = (TCGCallArgumentLoc){
1348         .kind = kind,
1349         .arg_idx = cum->arg_idx,
1350         .arg_slot = cum->arg_slot,
1351     };
1352     cum->info_in_idx++;
1353     cum->arg_slot++;
1354 }
1355 
1356 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1357                                 TCGHelperInfo *info, int n)
1358 {
1359     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1360 
1361     for (int i = 0; i < n; ++i) {
1362         /* Layout all using the same arg_idx, adjusting the subindex. */
1363         loc[i] = (TCGCallArgumentLoc){
1364             .kind = TCG_CALL_ARG_NORMAL,
1365             .arg_idx = cum->arg_idx,
1366             .tmp_subindex = i,
1367             .arg_slot = cum->arg_slot + i,
1368         };
1369     }
1370     cum->info_in_idx += n;
1371     cum->arg_slot += n;
1372 }
1373 
1374 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1375 {
1376     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1377     int n = 128 / TCG_TARGET_REG_BITS;
1378 
1379     /* The first subindex carries the pointer. */
1380     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1381 
1382     /*
1383      * The callee is allowed to clobber memory associated with
1384      * structure pass by-reference.  Therefore we must make copies.
1385      * Allocate space from "ref_slot", which will be adjusted to
1386      * follow the parameters on the stack.
1387      */
1388     loc[0].ref_slot = cum->ref_slot;
1389 
1390     /*
1391      * Subsequent words also go into the reference slot, but
1392      * do not accumulate into the regular arguments.
1393      */
1394     for (int i = 1; i < n; ++i) {
1395         loc[i] = (TCGCallArgumentLoc){
1396             .kind = TCG_CALL_ARG_BY_REF_N,
1397             .arg_idx = cum->arg_idx,
1398             .tmp_subindex = i,
1399             .ref_slot = cum->ref_slot + i,
1400         };
1401     }
1402     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1403     cum->ref_slot += n;
1404 }
1405 
1406 static void init_call_layout(TCGHelperInfo *info)
1407 {
1408     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1409     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1410     unsigned typemask = info->typemask;
1411     unsigned typecode;
1412     TCGCumulativeArgs cum = { };
1413 
1414     /*
1415      * Parse and place any function return value.
1416      */
1417     typecode = typemask & 7;
1418     switch (typecode) {
1419     case dh_typecode_void:
1420         info->nr_out = 0;
1421         break;
1422     case dh_typecode_i32:
1423     case dh_typecode_s32:
1424     case dh_typecode_ptr:
1425         info->nr_out = 1;
1426         info->out_kind = TCG_CALL_RET_NORMAL;
1427         break;
1428     case dh_typecode_i64:
1429     case dh_typecode_s64:
1430         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1431         info->out_kind = TCG_CALL_RET_NORMAL;
1432         /* Query the last register now to trigger any assert early. */
1433         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1434         break;
1435     case dh_typecode_i128:
1436         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1437         info->out_kind = TCG_TARGET_CALL_RET_I128;
1438         switch (TCG_TARGET_CALL_RET_I128) {
1439         case TCG_CALL_RET_NORMAL:
1440             /* Query the last register now to trigger any assert early. */
1441             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1442             break;
1443         case TCG_CALL_RET_BY_VEC:
1444             /* Query the single register now to trigger any assert early. */
1445             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1446             break;
1447         case TCG_CALL_RET_BY_REF:
1448             /*
1449              * Allocate the first argument to the output.
1450              * We don't need to store this anywhere, just make it
1451              * unavailable for use in the input loop below.
1452              */
1453             cum.arg_slot = 1;
1454             break;
1455         default:
1456             qemu_build_not_reached();
1457         }
1458         break;
1459     default:
1460         g_assert_not_reached();
1461     }
1462 
1463     /*
1464      * Parse and place function arguments.
1465      */
1466     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1467         TCGCallArgumentKind kind;
1468         TCGType type;
1469 
1470         typecode = typemask & 7;
1471         switch (typecode) {
1472         case dh_typecode_i32:
1473         case dh_typecode_s32:
1474             type = TCG_TYPE_I32;
1475             break;
1476         case dh_typecode_i64:
1477         case dh_typecode_s64:
1478             type = TCG_TYPE_I64;
1479             break;
1480         case dh_typecode_ptr:
1481             type = TCG_TYPE_PTR;
1482             break;
1483         case dh_typecode_i128:
1484             type = TCG_TYPE_I128;
1485             break;
1486         default:
1487             g_assert_not_reached();
1488         }
1489 
1490         switch (type) {
1491         case TCG_TYPE_I32:
1492             switch (TCG_TARGET_CALL_ARG_I32) {
1493             case TCG_CALL_ARG_EVEN:
1494                 layout_arg_even(&cum);
1495                 /* fall through */
1496             case TCG_CALL_ARG_NORMAL:
1497                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1498                 break;
1499             case TCG_CALL_ARG_EXTEND:
1500                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1501                 layout_arg_1(&cum, info, kind);
1502                 break;
1503             default:
1504                 qemu_build_not_reached();
1505             }
1506             break;
1507 
1508         case TCG_TYPE_I64:
1509             switch (TCG_TARGET_CALL_ARG_I64) {
1510             case TCG_CALL_ARG_EVEN:
1511                 layout_arg_even(&cum);
1512                 /* fall through */
1513             case TCG_CALL_ARG_NORMAL:
1514                 if (TCG_TARGET_REG_BITS == 32) {
1515                     layout_arg_normal_n(&cum, info, 2);
1516                 } else {
1517                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1518                 }
1519                 break;
1520             default:
1521                 qemu_build_not_reached();
1522             }
1523             break;
1524 
1525         case TCG_TYPE_I128:
1526             switch (TCG_TARGET_CALL_ARG_I128) {
1527             case TCG_CALL_ARG_EVEN:
1528                 layout_arg_even(&cum);
1529                 /* fall through */
1530             case TCG_CALL_ARG_NORMAL:
1531                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1532                 break;
1533             case TCG_CALL_ARG_BY_REF:
1534                 layout_arg_by_ref(&cum, info);
1535                 break;
1536             default:
1537                 qemu_build_not_reached();
1538             }
1539             break;
1540 
1541         default:
1542             g_assert_not_reached();
1543         }
1544     }
1545     info->nr_in = cum.info_in_idx;
1546 
1547     /* Validate that we didn't overrun the input array. */
1548     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1549     /* Validate the backend has enough argument space. */
1550     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1551 
1552     /*
1553      * Relocate the "ref_slot" area to the end of the parameters.
1554      * Minimizing this stack offset helps code size for x86,
1555      * which has a signed 8-bit offset encoding.
1556      */
1557     if (cum.ref_slot != 0) {
1558         int ref_base = 0;
1559 
1560         if (cum.arg_slot > max_reg_slots) {
1561             int align = __alignof(Int128) / sizeof(tcg_target_long);
1562 
1563             ref_base = cum.arg_slot - max_reg_slots;
1564             if (align > 1) {
1565                 ref_base = ROUND_UP(ref_base, align);
1566             }
1567         }
1568         assert(ref_base + cum.ref_slot <= max_stk_slots);
1569         ref_base += max_reg_slots;
1570 
1571         if (ref_base != 0) {
1572             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1573                 TCGCallArgumentLoc *loc = &info->in[i];
1574                 switch (loc->kind) {
1575                 case TCG_CALL_ARG_BY_REF:
1576                 case TCG_CALL_ARG_BY_REF_N:
1577                     loc->ref_slot += ref_base;
1578                     break;
1579                 default:
1580                     break;
1581                 }
1582             }
1583         }
1584     }
1585 }
1586 
1587 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1588 static void process_constraint_sets(void);
1589 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1590                                             TCGReg reg, const char *name);
1591 
1592 static void tcg_context_init(unsigned max_threads)
1593 {
1594     TCGContext *s = &tcg_init_ctx;
1595     int n, i;
1596     TCGTemp *ts;
1597 
1598     memset(s, 0, sizeof(*s));
1599     s->nb_globals = 0;
1600 
1601     init_call_layout(&info_helper_ld32_mmu);
1602     init_call_layout(&info_helper_ld64_mmu);
1603     init_call_layout(&info_helper_ld128_mmu);
1604     init_call_layout(&info_helper_st32_mmu);
1605     init_call_layout(&info_helper_st64_mmu);
1606     init_call_layout(&info_helper_st128_mmu);
1607 
1608     tcg_target_init(s);
1609     process_constraint_sets();
1610 
1611     /* Reverse the order of the saved registers, assuming they're all at
1612        the start of tcg_target_reg_alloc_order.  */
1613     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1614         int r = tcg_target_reg_alloc_order[n];
1615         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1616             break;
1617         }
1618     }
1619     for (i = 0; i < n; ++i) {
1620         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1621     }
1622     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1623         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1624     }
1625 
1626     tcg_ctx = s;
1627     /*
1628      * In user-mode we simply share the init context among threads, since we
1629      * use a single region. See the documentation tcg_region_init() for the
1630      * reasoning behind this.
1631      * In system-mode we will have at most max_threads TCG threads.
1632      */
1633 #ifdef CONFIG_USER_ONLY
1634     tcg_ctxs = &tcg_ctx;
1635     tcg_cur_ctxs = 1;
1636     tcg_max_ctxs = 1;
1637 #else
1638     tcg_max_ctxs = max_threads;
1639     tcg_ctxs = g_new0(TCGContext *, max_threads);
1640 #endif
1641 
1642     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1643     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1644     tcg_env = temp_tcgv_ptr(ts);
1645 }
1646 
1647 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1648 {
1649     tcg_context_init(max_threads);
1650     tcg_region_init(tb_size, splitwx, max_threads);
1651 }
1652 
1653 /*
1654  * Allocate TBs right before their corresponding translated code, making
1655  * sure that TBs and code are on different cache lines.
1656  */
1657 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1658 {
1659     uintptr_t align = qemu_icache_linesize;
1660     TranslationBlock *tb;
1661     void *next;
1662 
1663  retry:
1664     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1665     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1666 
1667     if (unlikely(next > s->code_gen_highwater)) {
1668         if (tcg_region_alloc(s)) {
1669             return NULL;
1670         }
1671         goto retry;
1672     }
1673     qatomic_set(&s->code_gen_ptr, next);
1674     return tb;
1675 }
1676 
1677 void tcg_prologue_init(void)
1678 {
1679     TCGContext *s = tcg_ctx;
1680     size_t prologue_size;
1681 
1682     s->code_ptr = s->code_gen_ptr;
1683     s->code_buf = s->code_gen_ptr;
1684     s->data_gen_ptr = NULL;
1685 
1686 #ifndef CONFIG_TCG_INTERPRETER
1687     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1688 #endif
1689 
1690     s->pool_labels = NULL;
1691 
1692     qemu_thread_jit_write();
1693     /* Generate the prologue.  */
1694     tcg_target_qemu_prologue(s);
1695 
1696     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1697     {
1698         int result = tcg_out_pool_finalize(s);
1699         tcg_debug_assert(result == 0);
1700     }
1701 
1702     prologue_size = tcg_current_code_size(s);
1703     perf_report_prologue(s->code_gen_ptr, prologue_size);
1704 
1705 #ifndef CONFIG_TCG_INTERPRETER
1706     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1707                         (uintptr_t)s->code_buf, prologue_size);
1708 #endif
1709 
1710     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1711         FILE *logfile = qemu_log_trylock();
1712         if (logfile) {
1713             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1714             if (s->data_gen_ptr) {
1715                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1716                 size_t data_size = prologue_size - code_size;
1717                 size_t i;
1718 
1719                 disas(logfile, s->code_gen_ptr, code_size);
1720 
1721                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1722                     if (sizeof(tcg_target_ulong) == 8) {
1723                         fprintf(logfile,
1724                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1725                                 (uintptr_t)s->data_gen_ptr + i,
1726                                 *(uint64_t *)(s->data_gen_ptr + i));
1727                     } else {
1728                         fprintf(logfile,
1729                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1730                                 (uintptr_t)s->data_gen_ptr + i,
1731                                 *(uint32_t *)(s->data_gen_ptr + i));
1732                     }
1733                 }
1734             } else {
1735                 disas(logfile, s->code_gen_ptr, prologue_size);
1736             }
1737             fprintf(logfile, "\n");
1738             qemu_log_unlock(logfile);
1739         }
1740     }
1741 
1742 #ifndef CONFIG_TCG_INTERPRETER
1743     /*
1744      * Assert that goto_ptr is implemented completely, setting an epilogue.
1745      * For tci, we use NULL as the signal to return from the interpreter,
1746      * so skip this check.
1747      */
1748     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1749 #endif
1750 
1751     tcg_region_prologue_set(s);
1752 }
1753 
1754 void tcg_func_start(TCGContext *s)
1755 {
1756     tcg_pool_reset(s);
1757     s->nb_temps = s->nb_globals;
1758 
1759     /* No temps have been previously allocated for size or locality.  */
1760     tcg_temp_ebb_reset_freed(s);
1761 
1762     /* No constant temps have been previously allocated. */
1763     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1764         if (s->const_table[i]) {
1765             g_hash_table_remove_all(s->const_table[i]);
1766         }
1767     }
1768 
1769     s->nb_ops = 0;
1770     s->nb_labels = 0;
1771     s->current_frame_offset = s->frame_start;
1772 
1773 #ifdef CONFIG_DEBUG_TCG
1774     s->goto_tb_issue_mask = 0;
1775 #endif
1776 
1777     QTAILQ_INIT(&s->ops);
1778     QTAILQ_INIT(&s->free_ops);
1779     s->emit_before_op = NULL;
1780     QSIMPLEQ_INIT(&s->labels);
1781 
1782     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1783     tcg_debug_assert(s->insn_start_words > 0);
1784 }
1785 
1786 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1787 {
1788     int n = s->nb_temps++;
1789 
1790     if (n >= TCG_MAX_TEMPS) {
1791         tcg_raise_tb_overflow(s);
1792     }
1793     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1794 }
1795 
1796 static TCGTemp *tcg_global_alloc(TCGContext *s)
1797 {
1798     TCGTemp *ts;
1799 
1800     tcg_debug_assert(s->nb_globals == s->nb_temps);
1801     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1802     s->nb_globals++;
1803     ts = tcg_temp_alloc(s);
1804     ts->kind = TEMP_GLOBAL;
1805 
1806     return ts;
1807 }
1808 
1809 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1810                                             TCGReg reg, const char *name)
1811 {
1812     TCGTemp *ts;
1813 
1814     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1815 
1816     ts = tcg_global_alloc(s);
1817     ts->base_type = type;
1818     ts->type = type;
1819     ts->kind = TEMP_FIXED;
1820     ts->reg = reg;
1821     ts->name = name;
1822     tcg_regset_set_reg(s->reserved_regs, reg);
1823 
1824     return ts;
1825 }
1826 
1827 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1828 {
1829     s->frame_start = start;
1830     s->frame_end = start + size;
1831     s->frame_temp
1832         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1833 }
1834 
1835 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1836                                             const char *name, TCGType type)
1837 {
1838     TCGContext *s = tcg_ctx;
1839     TCGTemp *base_ts = tcgv_ptr_temp(base);
1840     TCGTemp *ts = tcg_global_alloc(s);
1841     int indirect_reg = 0;
1842 
1843     switch (base_ts->kind) {
1844     case TEMP_FIXED:
1845         break;
1846     case TEMP_GLOBAL:
1847         /* We do not support double-indirect registers.  */
1848         tcg_debug_assert(!base_ts->indirect_reg);
1849         base_ts->indirect_base = 1;
1850         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1851                             ? 2 : 1);
1852         indirect_reg = 1;
1853         break;
1854     default:
1855         g_assert_not_reached();
1856     }
1857 
1858     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1859         TCGTemp *ts2 = tcg_global_alloc(s);
1860         char buf[64];
1861 
1862         ts->base_type = TCG_TYPE_I64;
1863         ts->type = TCG_TYPE_I32;
1864         ts->indirect_reg = indirect_reg;
1865         ts->mem_allocated = 1;
1866         ts->mem_base = base_ts;
1867         ts->mem_offset = offset;
1868         pstrcpy(buf, sizeof(buf), name);
1869         pstrcat(buf, sizeof(buf), "_0");
1870         ts->name = strdup(buf);
1871 
1872         tcg_debug_assert(ts2 == ts + 1);
1873         ts2->base_type = TCG_TYPE_I64;
1874         ts2->type = TCG_TYPE_I32;
1875         ts2->indirect_reg = indirect_reg;
1876         ts2->mem_allocated = 1;
1877         ts2->mem_base = base_ts;
1878         ts2->mem_offset = offset + 4;
1879         ts2->temp_subindex = 1;
1880         pstrcpy(buf, sizeof(buf), name);
1881         pstrcat(buf, sizeof(buf), "_1");
1882         ts2->name = strdup(buf);
1883     } else {
1884         ts->base_type = type;
1885         ts->type = type;
1886         ts->indirect_reg = indirect_reg;
1887         ts->mem_allocated = 1;
1888         ts->mem_base = base_ts;
1889         ts->mem_offset = offset;
1890         ts->name = name;
1891     }
1892     return ts;
1893 }
1894 
1895 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1896 {
1897     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1898     return temp_tcgv_i32(ts);
1899 }
1900 
1901 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1902 {
1903     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1904     return temp_tcgv_i64(ts);
1905 }
1906 
1907 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1908 {
1909     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1910     return temp_tcgv_ptr(ts);
1911 }
1912 
1913 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1914 {
1915     TCGContext *s = tcg_ctx;
1916     TCGTemp *ts;
1917     int n;
1918 
1919     if (kind == TEMP_EBB) {
1920         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1921 
1922         if (idx < TCG_MAX_TEMPS) {
1923             /* There is already an available temp with the right type.  */
1924             clear_bit(idx, s->free_temps[type].l);
1925 
1926             ts = &s->temps[idx];
1927             ts->temp_allocated = 1;
1928             tcg_debug_assert(ts->base_type == type);
1929             tcg_debug_assert(ts->kind == kind);
1930             return ts;
1931         }
1932     } else {
1933         tcg_debug_assert(kind == TEMP_TB);
1934     }
1935 
1936     switch (type) {
1937     case TCG_TYPE_I32:
1938     case TCG_TYPE_V64:
1939     case TCG_TYPE_V128:
1940     case TCG_TYPE_V256:
1941         n = 1;
1942         break;
1943     case TCG_TYPE_I64:
1944         n = 64 / TCG_TARGET_REG_BITS;
1945         break;
1946     case TCG_TYPE_I128:
1947         n = 128 / TCG_TARGET_REG_BITS;
1948         break;
1949     default:
1950         g_assert_not_reached();
1951     }
1952 
1953     ts = tcg_temp_alloc(s);
1954     ts->base_type = type;
1955     ts->temp_allocated = 1;
1956     ts->kind = kind;
1957 
1958     if (n == 1) {
1959         ts->type = type;
1960     } else {
1961         ts->type = TCG_TYPE_REG;
1962 
1963         for (int i = 1; i < n; ++i) {
1964             TCGTemp *ts2 = tcg_temp_alloc(s);
1965 
1966             tcg_debug_assert(ts2 == ts + i);
1967             ts2->base_type = type;
1968             ts2->type = TCG_TYPE_REG;
1969             ts2->temp_allocated = 1;
1970             ts2->temp_subindex = i;
1971             ts2->kind = kind;
1972         }
1973     }
1974     return ts;
1975 }
1976 
1977 TCGv_i32 tcg_temp_new_i32(void)
1978 {
1979     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1980 }
1981 
1982 TCGv_i32 tcg_temp_ebb_new_i32(void)
1983 {
1984     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1985 }
1986 
1987 TCGv_i64 tcg_temp_new_i64(void)
1988 {
1989     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1990 }
1991 
1992 TCGv_i64 tcg_temp_ebb_new_i64(void)
1993 {
1994     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1995 }
1996 
1997 TCGv_ptr tcg_temp_new_ptr(void)
1998 {
1999     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
2000 }
2001 
2002 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2003 {
2004     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2005 }
2006 
2007 TCGv_i128 tcg_temp_new_i128(void)
2008 {
2009     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2010 }
2011 
2012 TCGv_i128 tcg_temp_ebb_new_i128(void)
2013 {
2014     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2015 }
2016 
2017 TCGv_vec tcg_temp_new_vec(TCGType type)
2018 {
2019     TCGTemp *t;
2020 
2021 #ifdef CONFIG_DEBUG_TCG
2022     switch (type) {
2023     case TCG_TYPE_V64:
2024         assert(TCG_TARGET_HAS_v64);
2025         break;
2026     case TCG_TYPE_V128:
2027         assert(TCG_TARGET_HAS_v128);
2028         break;
2029     case TCG_TYPE_V256:
2030         assert(TCG_TARGET_HAS_v256);
2031         break;
2032     default:
2033         g_assert_not_reached();
2034     }
2035 #endif
2036 
2037     t = tcg_temp_new_internal(type, TEMP_EBB);
2038     return temp_tcgv_vec(t);
2039 }
2040 
2041 /* Create a new temp of the same type as an existing temp.  */
2042 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2043 {
2044     TCGTemp *t = tcgv_vec_temp(match);
2045 
2046     tcg_debug_assert(t->temp_allocated != 0);
2047 
2048     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2049     return temp_tcgv_vec(t);
2050 }
2051 
2052 void tcg_temp_free_internal(TCGTemp *ts)
2053 {
2054     TCGContext *s = tcg_ctx;
2055 
2056     switch (ts->kind) {
2057     case TEMP_CONST:
2058     case TEMP_TB:
2059         /* Silently ignore free. */
2060         break;
2061     case TEMP_EBB:
2062         tcg_debug_assert(ts->temp_allocated != 0);
2063         ts->temp_allocated = 0;
2064         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2065         break;
2066     default:
2067         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2068         g_assert_not_reached();
2069     }
2070 }
2071 
2072 void tcg_temp_free_i32(TCGv_i32 arg)
2073 {
2074     tcg_temp_free_internal(tcgv_i32_temp(arg));
2075 }
2076 
2077 void tcg_temp_free_i64(TCGv_i64 arg)
2078 {
2079     tcg_temp_free_internal(tcgv_i64_temp(arg));
2080 }
2081 
2082 void tcg_temp_free_i128(TCGv_i128 arg)
2083 {
2084     tcg_temp_free_internal(tcgv_i128_temp(arg));
2085 }
2086 
2087 void tcg_temp_free_ptr(TCGv_ptr arg)
2088 {
2089     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2090 }
2091 
2092 void tcg_temp_free_vec(TCGv_vec arg)
2093 {
2094     tcg_temp_free_internal(tcgv_vec_temp(arg));
2095 }
2096 
2097 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2098 {
2099     TCGContext *s = tcg_ctx;
2100     GHashTable *h = s->const_table[type];
2101     TCGTemp *ts;
2102 
2103     if (h == NULL) {
2104         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2105         s->const_table[type] = h;
2106     }
2107 
2108     ts = g_hash_table_lookup(h, &val);
2109     if (ts == NULL) {
2110         int64_t *val_ptr;
2111 
2112         ts = tcg_temp_alloc(s);
2113 
2114         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2115             TCGTemp *ts2 = tcg_temp_alloc(s);
2116 
2117             tcg_debug_assert(ts2 == ts + 1);
2118 
2119             ts->base_type = TCG_TYPE_I64;
2120             ts->type = TCG_TYPE_I32;
2121             ts->kind = TEMP_CONST;
2122             ts->temp_allocated = 1;
2123 
2124             ts2->base_type = TCG_TYPE_I64;
2125             ts2->type = TCG_TYPE_I32;
2126             ts2->kind = TEMP_CONST;
2127             ts2->temp_allocated = 1;
2128             ts2->temp_subindex = 1;
2129 
2130             /*
2131              * Retain the full value of the 64-bit constant in the low
2132              * part, so that the hash table works.  Actual uses will
2133              * truncate the value to the low part.
2134              */
2135             ts[HOST_BIG_ENDIAN].val = val;
2136             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2137             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2138         } else {
2139             ts->base_type = type;
2140             ts->type = type;
2141             ts->kind = TEMP_CONST;
2142             ts->temp_allocated = 1;
2143             ts->val = val;
2144             val_ptr = &ts->val;
2145         }
2146         g_hash_table_insert(h, val_ptr, ts);
2147     }
2148 
2149     return ts;
2150 }
2151 
2152 TCGv_i32 tcg_constant_i32(int32_t val)
2153 {
2154     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2155 }
2156 
2157 TCGv_i64 tcg_constant_i64(int64_t val)
2158 {
2159     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2160 }
2161 
2162 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2163 {
2164     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2165 }
2166 
2167 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2168 {
2169     val = dup_const(vece, val);
2170     return temp_tcgv_vec(tcg_constant_internal(type, val));
2171 }
2172 
2173 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2174 {
2175     TCGTemp *t = tcgv_vec_temp(match);
2176 
2177     tcg_debug_assert(t->temp_allocated != 0);
2178     return tcg_constant_vec(t->base_type, vece, val);
2179 }
2180 
2181 #ifdef CONFIG_DEBUG_TCG
2182 size_t temp_idx(TCGTemp *ts)
2183 {
2184     ptrdiff_t n = ts - tcg_ctx->temps;
2185     assert(n >= 0 && n < tcg_ctx->nb_temps);
2186     return n;
2187 }
2188 
2189 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2190 {
2191     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2192 
2193     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2194     assert(o % sizeof(TCGTemp) == 0);
2195 
2196     return (void *)tcg_ctx + (uintptr_t)v;
2197 }
2198 #endif /* CONFIG_DEBUG_TCG */
2199 
2200 /*
2201  * Return true if OP may appear in the opcode stream with TYPE.
2202  * Test the runtime variable that controls each opcode.
2203  */
2204 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2205 {
2206     bool has_type;
2207 
2208     switch (type) {
2209     case TCG_TYPE_I32:
2210         has_type = true;
2211         break;
2212     case TCG_TYPE_I64:
2213         has_type = TCG_TARGET_REG_BITS == 64;
2214         break;
2215     case TCG_TYPE_V64:
2216         has_type = TCG_TARGET_HAS_v64;
2217         break;
2218     case TCG_TYPE_V128:
2219         has_type = TCG_TARGET_HAS_v128;
2220         break;
2221     case TCG_TYPE_V256:
2222         has_type = TCG_TARGET_HAS_v256;
2223         break;
2224     default:
2225         has_type = false;
2226         break;
2227     }
2228 
2229     switch (op) {
2230     case INDEX_op_discard:
2231     case INDEX_op_set_label:
2232     case INDEX_op_call:
2233     case INDEX_op_br:
2234     case INDEX_op_mb:
2235     case INDEX_op_insn_start:
2236     case INDEX_op_exit_tb:
2237     case INDEX_op_goto_tb:
2238     case INDEX_op_goto_ptr:
2239     case INDEX_op_qemu_ld_i32:
2240     case INDEX_op_qemu_st_i32:
2241     case INDEX_op_qemu_ld_i64:
2242     case INDEX_op_qemu_st_i64:
2243         return true;
2244 
2245     case INDEX_op_qemu_st8_i32:
2246         return TCG_TARGET_HAS_qemu_st8_i32;
2247 
2248     case INDEX_op_qemu_ld_i128:
2249     case INDEX_op_qemu_st_i128:
2250         return TCG_TARGET_HAS_qemu_ldst_i128;
2251 
2252     case INDEX_op_add:
2253     case INDEX_op_and:
2254     case INDEX_op_mov:
2255     case INDEX_op_or:
2256     case INDEX_op_xor:
2257         return has_type;
2258 
2259     case INDEX_op_setcond_i32:
2260     case INDEX_op_brcond_i32:
2261     case INDEX_op_movcond_i32:
2262     case INDEX_op_ld8u_i32:
2263     case INDEX_op_ld8s_i32:
2264     case INDEX_op_ld16u_i32:
2265     case INDEX_op_ld16s_i32:
2266     case INDEX_op_ld_i32:
2267     case INDEX_op_st8_i32:
2268     case INDEX_op_st16_i32:
2269     case INDEX_op_st_i32:
2270     case INDEX_op_extract_i32:
2271     case INDEX_op_sextract_i32:
2272     case INDEX_op_deposit_i32:
2273         return true;
2274 
2275     case INDEX_op_negsetcond_i32:
2276         return TCG_TARGET_HAS_negsetcond_i32;
2277     case INDEX_op_extract2_i32:
2278         return TCG_TARGET_HAS_extract2_i32;
2279     case INDEX_op_add2_i32:
2280         return TCG_TARGET_HAS_add2_i32;
2281     case INDEX_op_sub2_i32:
2282         return TCG_TARGET_HAS_sub2_i32;
2283     case INDEX_op_mulu2_i32:
2284         return TCG_TARGET_HAS_mulu2_i32;
2285     case INDEX_op_muls2_i32:
2286         return TCG_TARGET_HAS_muls2_i32;
2287     case INDEX_op_bswap16_i32:
2288         return TCG_TARGET_HAS_bswap16_i32;
2289     case INDEX_op_bswap32_i32:
2290         return TCG_TARGET_HAS_bswap32_i32;
2291     case INDEX_op_clz_i32:
2292         return TCG_TARGET_HAS_clz_i32;
2293     case INDEX_op_ctz_i32:
2294         return TCG_TARGET_HAS_ctz_i32;
2295     case INDEX_op_ctpop_i32:
2296         return TCG_TARGET_HAS_ctpop_i32;
2297 
2298     case INDEX_op_brcond2_i32:
2299     case INDEX_op_setcond2_i32:
2300         return TCG_TARGET_REG_BITS == 32;
2301 
2302     case INDEX_op_setcond_i64:
2303     case INDEX_op_brcond_i64:
2304     case INDEX_op_movcond_i64:
2305     case INDEX_op_ld8u_i64:
2306     case INDEX_op_ld8s_i64:
2307     case INDEX_op_ld16u_i64:
2308     case INDEX_op_ld16s_i64:
2309     case INDEX_op_ld32u_i64:
2310     case INDEX_op_ld32s_i64:
2311     case INDEX_op_ld_i64:
2312     case INDEX_op_st8_i64:
2313     case INDEX_op_st16_i64:
2314     case INDEX_op_st32_i64:
2315     case INDEX_op_st_i64:
2316     case INDEX_op_ext_i32_i64:
2317     case INDEX_op_extu_i32_i64:
2318     case INDEX_op_extract_i64:
2319     case INDEX_op_sextract_i64:
2320     case INDEX_op_deposit_i64:
2321         return TCG_TARGET_REG_BITS == 64;
2322 
2323     case INDEX_op_negsetcond_i64:
2324         return TCG_TARGET_HAS_negsetcond_i64;
2325     case INDEX_op_extract2_i64:
2326         return TCG_TARGET_HAS_extract2_i64;
2327     case INDEX_op_extrl_i64_i32:
2328     case INDEX_op_extrh_i64_i32:
2329         return TCG_TARGET_HAS_extr_i64_i32;
2330     case INDEX_op_bswap16_i64:
2331         return TCG_TARGET_HAS_bswap16_i64;
2332     case INDEX_op_bswap32_i64:
2333         return TCG_TARGET_HAS_bswap32_i64;
2334     case INDEX_op_bswap64_i64:
2335         return TCG_TARGET_HAS_bswap64_i64;
2336     case INDEX_op_clz_i64:
2337         return TCG_TARGET_HAS_clz_i64;
2338     case INDEX_op_ctz_i64:
2339         return TCG_TARGET_HAS_ctz_i64;
2340     case INDEX_op_ctpop_i64:
2341         return TCG_TARGET_HAS_ctpop_i64;
2342     case INDEX_op_add2_i64:
2343         return TCG_TARGET_HAS_add2_i64;
2344     case INDEX_op_sub2_i64:
2345         return TCG_TARGET_HAS_sub2_i64;
2346     case INDEX_op_mulu2_i64:
2347         return TCG_TARGET_HAS_mulu2_i64;
2348     case INDEX_op_muls2_i64:
2349         return TCG_TARGET_HAS_muls2_i64;
2350 
2351     case INDEX_op_mov_vec:
2352     case INDEX_op_dup_vec:
2353     case INDEX_op_dupm_vec:
2354     case INDEX_op_ld_vec:
2355     case INDEX_op_st_vec:
2356     case INDEX_op_add_vec:
2357     case INDEX_op_sub_vec:
2358     case INDEX_op_and_vec:
2359     case INDEX_op_or_vec:
2360     case INDEX_op_xor_vec:
2361     case INDEX_op_cmp_vec:
2362         return has_type;
2363     case INDEX_op_dup2_vec:
2364         return has_type && TCG_TARGET_REG_BITS == 32;
2365     case INDEX_op_not_vec:
2366         return has_type && TCG_TARGET_HAS_not_vec;
2367     case INDEX_op_neg_vec:
2368         return has_type && TCG_TARGET_HAS_neg_vec;
2369     case INDEX_op_abs_vec:
2370         return has_type && TCG_TARGET_HAS_abs_vec;
2371     case INDEX_op_andc_vec:
2372         return has_type && TCG_TARGET_HAS_andc_vec;
2373     case INDEX_op_orc_vec:
2374         return has_type && TCG_TARGET_HAS_orc_vec;
2375     case INDEX_op_nand_vec:
2376         return has_type && TCG_TARGET_HAS_nand_vec;
2377     case INDEX_op_nor_vec:
2378         return has_type && TCG_TARGET_HAS_nor_vec;
2379     case INDEX_op_eqv_vec:
2380         return has_type && TCG_TARGET_HAS_eqv_vec;
2381     case INDEX_op_mul_vec:
2382         return has_type && TCG_TARGET_HAS_mul_vec;
2383     case INDEX_op_shli_vec:
2384     case INDEX_op_shri_vec:
2385     case INDEX_op_sari_vec:
2386         return has_type && TCG_TARGET_HAS_shi_vec;
2387     case INDEX_op_shls_vec:
2388     case INDEX_op_shrs_vec:
2389     case INDEX_op_sars_vec:
2390         return has_type && TCG_TARGET_HAS_shs_vec;
2391     case INDEX_op_shlv_vec:
2392     case INDEX_op_shrv_vec:
2393     case INDEX_op_sarv_vec:
2394         return has_type && TCG_TARGET_HAS_shv_vec;
2395     case INDEX_op_rotli_vec:
2396         return has_type && TCG_TARGET_HAS_roti_vec;
2397     case INDEX_op_rotls_vec:
2398         return has_type && TCG_TARGET_HAS_rots_vec;
2399     case INDEX_op_rotlv_vec:
2400     case INDEX_op_rotrv_vec:
2401         return has_type && TCG_TARGET_HAS_rotv_vec;
2402     case INDEX_op_ssadd_vec:
2403     case INDEX_op_usadd_vec:
2404     case INDEX_op_sssub_vec:
2405     case INDEX_op_ussub_vec:
2406         return has_type && TCG_TARGET_HAS_sat_vec;
2407     case INDEX_op_smin_vec:
2408     case INDEX_op_umin_vec:
2409     case INDEX_op_smax_vec:
2410     case INDEX_op_umax_vec:
2411         return has_type && TCG_TARGET_HAS_minmax_vec;
2412     case INDEX_op_bitsel_vec:
2413         return has_type && TCG_TARGET_HAS_bitsel_vec;
2414     case INDEX_op_cmpsel_vec:
2415         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2416 
2417     default:
2418         if (op < INDEX_op_last_generic) {
2419             const TCGOutOp *outop;
2420             TCGConstraintSetIndex con_set;
2421 
2422             if (!has_type) {
2423                 return false;
2424             }
2425 
2426             outop = all_outop[op];
2427             tcg_debug_assert(outop != NULL);
2428 
2429             con_set = outop->static_constraint;
2430             if (con_set == C_Dynamic) {
2431                 con_set = outop->dynamic_constraint(type, flags);
2432             }
2433             if (con_set >= 0) {
2434                 return true;
2435             }
2436             tcg_debug_assert(con_set == C_NotImplemented);
2437             return false;
2438         }
2439         tcg_debug_assert(op < NB_OPS);
2440         return true;
2441 
2442     case INDEX_op_last_generic:
2443         g_assert_not_reached();
2444     }
2445 }
2446 
2447 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2448 {
2449     unsigned width;
2450 
2451     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2452     width = (type == TCG_TYPE_I32 ? 32 : 64);
2453 
2454     tcg_debug_assert(ofs < width);
2455     tcg_debug_assert(len > 0);
2456     tcg_debug_assert(len <= width - ofs);
2457 
2458     return TCG_TARGET_deposit_valid(type, ofs, len);
2459 }
2460 
2461 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2462 
2463 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2464                           TCGTemp *ret, TCGTemp **args)
2465 {
2466     TCGv_i64 extend_free[MAX_CALL_IARGS];
2467     int n_extend = 0;
2468     TCGOp *op;
2469     int i, n, pi = 0, total_args;
2470 
2471     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2472         init_call_layout(info);
2473         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2474     }
2475 
2476     total_args = info->nr_out + info->nr_in + 2;
2477     op = tcg_op_alloc(INDEX_op_call, total_args);
2478 
2479 #ifdef CONFIG_PLUGIN
2480     /* Flag helpers that may affect guest state */
2481     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2482         tcg_ctx->plugin_insn->calls_helpers = true;
2483     }
2484 #endif
2485 
2486     TCGOP_CALLO(op) = n = info->nr_out;
2487     switch (n) {
2488     case 0:
2489         tcg_debug_assert(ret == NULL);
2490         break;
2491     case 1:
2492         tcg_debug_assert(ret != NULL);
2493         op->args[pi++] = temp_arg(ret);
2494         break;
2495     case 2:
2496     case 4:
2497         tcg_debug_assert(ret != NULL);
2498         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2499         tcg_debug_assert(ret->temp_subindex == 0);
2500         for (i = 0; i < n; ++i) {
2501             op->args[pi++] = temp_arg(ret + i);
2502         }
2503         break;
2504     default:
2505         g_assert_not_reached();
2506     }
2507 
2508     TCGOP_CALLI(op) = n = info->nr_in;
2509     for (i = 0; i < n; i++) {
2510         const TCGCallArgumentLoc *loc = &info->in[i];
2511         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2512 
2513         switch (loc->kind) {
2514         case TCG_CALL_ARG_NORMAL:
2515         case TCG_CALL_ARG_BY_REF:
2516         case TCG_CALL_ARG_BY_REF_N:
2517             op->args[pi++] = temp_arg(ts);
2518             break;
2519 
2520         case TCG_CALL_ARG_EXTEND_U:
2521         case TCG_CALL_ARG_EXTEND_S:
2522             {
2523                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2524                 TCGv_i32 orig = temp_tcgv_i32(ts);
2525 
2526                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2527                     tcg_gen_ext_i32_i64(temp, orig);
2528                 } else {
2529                     tcg_gen_extu_i32_i64(temp, orig);
2530                 }
2531                 op->args[pi++] = tcgv_i64_arg(temp);
2532                 extend_free[n_extend++] = temp;
2533             }
2534             break;
2535 
2536         default:
2537             g_assert_not_reached();
2538         }
2539     }
2540     op->args[pi++] = (uintptr_t)func;
2541     op->args[pi++] = (uintptr_t)info;
2542     tcg_debug_assert(pi == total_args);
2543 
2544     if (tcg_ctx->emit_before_op) {
2545         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2546     } else {
2547         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2548     }
2549 
2550     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2551     for (i = 0; i < n_extend; ++i) {
2552         tcg_temp_free_i64(extend_free[i]);
2553     }
2554 }
2555 
2556 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2557 {
2558     tcg_gen_callN(func, info, ret, NULL);
2559 }
2560 
2561 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2562 {
2563     tcg_gen_callN(func, info, ret, &t1);
2564 }
2565 
2566 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2567                    TCGTemp *t1, TCGTemp *t2)
2568 {
2569     TCGTemp *args[2] = { t1, t2 };
2570     tcg_gen_callN(func, info, ret, args);
2571 }
2572 
2573 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2574                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2575 {
2576     TCGTemp *args[3] = { t1, t2, t3 };
2577     tcg_gen_callN(func, info, ret, args);
2578 }
2579 
2580 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2581                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2582 {
2583     TCGTemp *args[4] = { t1, t2, t3, t4 };
2584     tcg_gen_callN(func, info, ret, args);
2585 }
2586 
2587 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2588                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2589 {
2590     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2591     tcg_gen_callN(func, info, ret, args);
2592 }
2593 
2594 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2595                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2596                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2597 {
2598     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2599     tcg_gen_callN(func, info, ret, args);
2600 }
2601 
2602 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2603                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2604                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2605 {
2606     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2607     tcg_gen_callN(func, info, ret, args);
2608 }
2609 
2610 static void tcg_reg_alloc_start(TCGContext *s)
2611 {
2612     int i, n;
2613 
2614     for (i = 0, n = s->nb_temps; i < n; i++) {
2615         TCGTemp *ts = &s->temps[i];
2616         TCGTempVal val = TEMP_VAL_MEM;
2617 
2618         switch (ts->kind) {
2619         case TEMP_CONST:
2620             val = TEMP_VAL_CONST;
2621             break;
2622         case TEMP_FIXED:
2623             val = TEMP_VAL_REG;
2624             break;
2625         case TEMP_GLOBAL:
2626             break;
2627         case TEMP_EBB:
2628             val = TEMP_VAL_DEAD;
2629             /* fall through */
2630         case TEMP_TB:
2631             ts->mem_allocated = 0;
2632             break;
2633         default:
2634             g_assert_not_reached();
2635         }
2636         ts->val_type = val;
2637     }
2638 
2639     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2640 }
2641 
2642 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2643                                  TCGTemp *ts)
2644 {
2645     int idx = temp_idx(ts);
2646 
2647     switch (ts->kind) {
2648     case TEMP_FIXED:
2649     case TEMP_GLOBAL:
2650         pstrcpy(buf, buf_size, ts->name);
2651         break;
2652     case TEMP_TB:
2653         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2654         break;
2655     case TEMP_EBB:
2656         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2657         break;
2658     case TEMP_CONST:
2659         switch (ts->type) {
2660         case TCG_TYPE_I32:
2661             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2662             break;
2663 #if TCG_TARGET_REG_BITS > 32
2664         case TCG_TYPE_I64:
2665             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2666             break;
2667 #endif
2668         case TCG_TYPE_V64:
2669         case TCG_TYPE_V128:
2670         case TCG_TYPE_V256:
2671             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2672                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2673             break;
2674         default:
2675             g_assert_not_reached();
2676         }
2677         break;
2678     }
2679     return buf;
2680 }
2681 
2682 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2683                              int buf_size, TCGArg arg)
2684 {
2685     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2686 }
2687 
2688 static const char * const cond_name[] =
2689 {
2690     [TCG_COND_NEVER] = "never",
2691     [TCG_COND_ALWAYS] = "always",
2692     [TCG_COND_EQ] = "eq",
2693     [TCG_COND_NE] = "ne",
2694     [TCG_COND_LT] = "lt",
2695     [TCG_COND_GE] = "ge",
2696     [TCG_COND_LE] = "le",
2697     [TCG_COND_GT] = "gt",
2698     [TCG_COND_LTU] = "ltu",
2699     [TCG_COND_GEU] = "geu",
2700     [TCG_COND_LEU] = "leu",
2701     [TCG_COND_GTU] = "gtu",
2702     [TCG_COND_TSTEQ] = "tsteq",
2703     [TCG_COND_TSTNE] = "tstne",
2704 };
2705 
2706 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2707 {
2708     [MO_UB]   = "ub",
2709     [MO_SB]   = "sb",
2710     [MO_LEUW] = "leuw",
2711     [MO_LESW] = "lesw",
2712     [MO_LEUL] = "leul",
2713     [MO_LESL] = "lesl",
2714     [MO_LEUQ] = "leq",
2715     [MO_BEUW] = "beuw",
2716     [MO_BESW] = "besw",
2717     [MO_BEUL] = "beul",
2718     [MO_BESL] = "besl",
2719     [MO_BEUQ] = "beq",
2720     [MO_128 + MO_BE] = "beo",
2721     [MO_128 + MO_LE] = "leo",
2722 };
2723 
2724 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2725     [MO_UNALN >> MO_ASHIFT]    = "un+",
2726     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2727     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2728     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2729     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2730     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2731     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2732     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2733 };
2734 
2735 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2736     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2737     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2738     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2739     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2740     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2741     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2742 };
2743 
2744 static const char bswap_flag_name[][6] = {
2745     [TCG_BSWAP_IZ] = "iz",
2746     [TCG_BSWAP_OZ] = "oz",
2747     [TCG_BSWAP_OS] = "os",
2748     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2749     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2750 };
2751 
2752 #ifdef CONFIG_PLUGIN
2753 static const char * const plugin_from_name[] = {
2754     "from-tb",
2755     "from-insn",
2756     "after-insn",
2757     "after-tb",
2758 };
2759 #endif
2760 
2761 static inline bool tcg_regset_single(TCGRegSet d)
2762 {
2763     return (d & (d - 1)) == 0;
2764 }
2765 
2766 static inline TCGReg tcg_regset_first(TCGRegSet d)
2767 {
2768     if (TCG_TARGET_NB_REGS <= 32) {
2769         return ctz32(d);
2770     } else {
2771         return ctz64(d);
2772     }
2773 }
2774 
2775 /* Return only the number of characters output -- no error return. */
2776 #define ne_fprintf(...) \
2777     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2778 
2779 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2780 {
2781     char buf[128];
2782     TCGOp *op;
2783 
2784     QTAILQ_FOREACH(op, &s->ops, link) {
2785         int i, k, nb_oargs, nb_iargs, nb_cargs;
2786         const TCGOpDef *def;
2787         TCGOpcode c;
2788         int col = 0;
2789 
2790         c = op->opc;
2791         def = &tcg_op_defs[c];
2792 
2793         if (c == INDEX_op_insn_start) {
2794             nb_oargs = 0;
2795             col += ne_fprintf(f, "\n ----");
2796 
2797             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2798                 col += ne_fprintf(f, " %016" PRIx64,
2799                                   tcg_get_insn_start_param(op, i));
2800             }
2801         } else if (c == INDEX_op_call) {
2802             const TCGHelperInfo *info = tcg_call_info(op);
2803             void *func = tcg_call_func(op);
2804 
2805             /* variable number of arguments */
2806             nb_oargs = TCGOP_CALLO(op);
2807             nb_iargs = TCGOP_CALLI(op);
2808             nb_cargs = def->nb_cargs;
2809 
2810             col += ne_fprintf(f, " %s ", def->name);
2811 
2812             /*
2813              * Print the function name from TCGHelperInfo, if available.
2814              * Note that plugins have a template function for the info,
2815              * but the actual function pointer comes from the plugin.
2816              */
2817             if (func == info->func) {
2818                 col += ne_fprintf(f, "%s", info->name);
2819             } else {
2820                 col += ne_fprintf(f, "plugin(%p)", func);
2821             }
2822 
2823             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2824             for (i = 0; i < nb_oargs; i++) {
2825                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2826                                                             op->args[i]));
2827             }
2828             for (i = 0; i < nb_iargs; i++) {
2829                 TCGArg arg = op->args[nb_oargs + i];
2830                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2831                 col += ne_fprintf(f, ",%s", t);
2832             }
2833         } else {
2834             if (def->flags & TCG_OPF_INT) {
2835                 col += ne_fprintf(f, " %s_i%d ",
2836                                   def->name,
2837                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2838             } else if (def->flags & TCG_OPF_VECTOR) {
2839                 col += ne_fprintf(f, "%s v%d,e%d,",
2840                                   def->name,
2841                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2842                                   8 << TCGOP_VECE(op));
2843             } else {
2844                 col += ne_fprintf(f, " %s ", def->name);
2845             }
2846 
2847             nb_oargs = def->nb_oargs;
2848             nb_iargs = def->nb_iargs;
2849             nb_cargs = def->nb_cargs;
2850 
2851             k = 0;
2852             for (i = 0; i < nb_oargs; i++) {
2853                 const char *sep =  k ? "," : "";
2854                 col += ne_fprintf(f, "%s%s", sep,
2855                                   tcg_get_arg_str(s, buf, sizeof(buf),
2856                                                   op->args[k++]));
2857             }
2858             for (i = 0; i < nb_iargs; i++) {
2859                 const char *sep =  k ? "," : "";
2860                 col += ne_fprintf(f, "%s%s", sep,
2861                                   tcg_get_arg_str(s, buf, sizeof(buf),
2862                                                   op->args[k++]));
2863             }
2864             switch (c) {
2865             case INDEX_op_brcond_i32:
2866             case INDEX_op_setcond_i32:
2867             case INDEX_op_negsetcond_i32:
2868             case INDEX_op_movcond_i32:
2869             case INDEX_op_brcond2_i32:
2870             case INDEX_op_setcond2_i32:
2871             case INDEX_op_brcond_i64:
2872             case INDEX_op_setcond_i64:
2873             case INDEX_op_negsetcond_i64:
2874             case INDEX_op_movcond_i64:
2875             case INDEX_op_cmp_vec:
2876             case INDEX_op_cmpsel_vec:
2877                 if (op->args[k] < ARRAY_SIZE(cond_name)
2878                     && cond_name[op->args[k]]) {
2879                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2880                 } else {
2881                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2882                 }
2883                 i = 1;
2884                 break;
2885             case INDEX_op_qemu_ld_i32:
2886             case INDEX_op_qemu_st_i32:
2887             case INDEX_op_qemu_st8_i32:
2888             case INDEX_op_qemu_ld_i64:
2889             case INDEX_op_qemu_st_i64:
2890             case INDEX_op_qemu_ld_i128:
2891             case INDEX_op_qemu_st_i128:
2892                 {
2893                     const char *s_al, *s_op, *s_at;
2894                     MemOpIdx oi = op->args[k++];
2895                     MemOp mop = get_memop(oi);
2896                     unsigned ix = get_mmuidx(oi);
2897 
2898                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2899                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2900                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2901                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2902 
2903                     /* If all fields are accounted for, print symbolically. */
2904                     if (!mop && s_al && s_op && s_at) {
2905                         col += ne_fprintf(f, ",%s%s%s,%u",
2906                                           s_at, s_al, s_op, ix);
2907                     } else {
2908                         mop = get_memop(oi);
2909                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2910                     }
2911                     i = 1;
2912                 }
2913                 break;
2914             case INDEX_op_bswap16_i32:
2915             case INDEX_op_bswap16_i64:
2916             case INDEX_op_bswap32_i32:
2917             case INDEX_op_bswap32_i64:
2918             case INDEX_op_bswap64_i64:
2919                 {
2920                     TCGArg flags = op->args[k];
2921                     const char *name = NULL;
2922 
2923                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2924                         name = bswap_flag_name[flags];
2925                     }
2926                     if (name) {
2927                         col += ne_fprintf(f, ",%s", name);
2928                     } else {
2929                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2930                     }
2931                     i = k = 1;
2932                 }
2933                 break;
2934 #ifdef CONFIG_PLUGIN
2935             case INDEX_op_plugin_cb:
2936                 {
2937                     TCGArg from = op->args[k++];
2938                     const char *name = NULL;
2939 
2940                     if (from < ARRAY_SIZE(plugin_from_name)) {
2941                         name = plugin_from_name[from];
2942                     }
2943                     if (name) {
2944                         col += ne_fprintf(f, "%s", name);
2945                     } else {
2946                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2947                     }
2948                     i = 1;
2949                 }
2950                 break;
2951 #endif
2952             default:
2953                 i = 0;
2954                 break;
2955             }
2956             switch (c) {
2957             case INDEX_op_set_label:
2958             case INDEX_op_br:
2959             case INDEX_op_brcond_i32:
2960             case INDEX_op_brcond_i64:
2961             case INDEX_op_brcond2_i32:
2962                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2963                                   arg_label(op->args[k])->id);
2964                 i++, k++;
2965                 break;
2966             case INDEX_op_mb:
2967                 {
2968                     TCGBar membar = op->args[k];
2969                     const char *b_op, *m_op;
2970 
2971                     switch (membar & TCG_BAR_SC) {
2972                     case 0:
2973                         b_op = "none";
2974                         break;
2975                     case TCG_BAR_LDAQ:
2976                         b_op = "acq";
2977                         break;
2978                     case TCG_BAR_STRL:
2979                         b_op = "rel";
2980                         break;
2981                     case TCG_BAR_SC:
2982                         b_op = "seq";
2983                         break;
2984                     default:
2985                         g_assert_not_reached();
2986                     }
2987 
2988                     switch (membar & TCG_MO_ALL) {
2989                     case 0:
2990                         m_op = "none";
2991                         break;
2992                     case TCG_MO_LD_LD:
2993                         m_op = "rr";
2994                         break;
2995                     case TCG_MO_LD_ST:
2996                         m_op = "rw";
2997                         break;
2998                     case TCG_MO_ST_LD:
2999                         m_op = "wr";
3000                         break;
3001                     case TCG_MO_ST_ST:
3002                         m_op = "ww";
3003                         break;
3004                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3005                         m_op = "rr+rw";
3006                         break;
3007                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3008                         m_op = "rr+wr";
3009                         break;
3010                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3011                         m_op = "rr+ww";
3012                         break;
3013                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3014                         m_op = "rw+wr";
3015                         break;
3016                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3017                         m_op = "rw+ww";
3018                         break;
3019                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3020                         m_op = "wr+ww";
3021                         break;
3022                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3023                         m_op = "rr+rw+wr";
3024                         break;
3025                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3026                         m_op = "rr+rw+ww";
3027                         break;
3028                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3029                         m_op = "rr+wr+ww";
3030                         break;
3031                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3032                         m_op = "rw+wr+ww";
3033                         break;
3034                     case TCG_MO_ALL:
3035                         m_op = "all";
3036                         break;
3037                     default:
3038                         g_assert_not_reached();
3039                     }
3040 
3041                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3042                     i++, k++;
3043                 }
3044                 break;
3045             default:
3046                 break;
3047             }
3048             for (; i < nb_cargs; i++, k++) {
3049                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3050                                   op->args[k]);
3051             }
3052         }
3053 
3054         if (have_prefs || op->life) {
3055             for (; col < 40; ++col) {
3056                 putc(' ', f);
3057             }
3058         }
3059 
3060         if (op->life) {
3061             unsigned life = op->life;
3062 
3063             if (life & (SYNC_ARG * 3)) {
3064                 ne_fprintf(f, "  sync:");
3065                 for (i = 0; i < 2; ++i) {
3066                     if (life & (SYNC_ARG << i)) {
3067                         ne_fprintf(f, " %d", i);
3068                     }
3069                 }
3070             }
3071             life /= DEAD_ARG;
3072             if (life) {
3073                 ne_fprintf(f, "  dead:");
3074                 for (i = 0; life; ++i, life >>= 1) {
3075                     if (life & 1) {
3076                         ne_fprintf(f, " %d", i);
3077                     }
3078                 }
3079             }
3080         }
3081 
3082         if (have_prefs) {
3083             for (i = 0; i < nb_oargs; ++i) {
3084                 TCGRegSet set = output_pref(op, i);
3085 
3086                 if (i == 0) {
3087                     ne_fprintf(f, "  pref=");
3088                 } else {
3089                     ne_fprintf(f, ",");
3090                 }
3091                 if (set == 0) {
3092                     ne_fprintf(f, "none");
3093                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3094                     ne_fprintf(f, "all");
3095 #ifdef CONFIG_DEBUG_TCG
3096                 } else if (tcg_regset_single(set)) {
3097                     TCGReg reg = tcg_regset_first(set);
3098                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3099 #endif
3100                 } else if (TCG_TARGET_NB_REGS <= 32) {
3101                     ne_fprintf(f, "0x%x", (uint32_t)set);
3102                 } else {
3103                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3104                 }
3105             }
3106         }
3107 
3108         putc('\n', f);
3109     }
3110 }
3111 
3112 /* we give more priority to constraints with less registers */
3113 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3114 {
3115     int n;
3116 
3117     arg_ct += k;
3118     n = ctpop64(arg_ct->regs);
3119 
3120     /*
3121      * Sort constraints of a single register first, which includes output
3122      * aliases (which must exactly match the input already allocated).
3123      */
3124     if (n == 1 || arg_ct->oalias) {
3125         return INT_MAX;
3126     }
3127 
3128     /*
3129      * Sort register pairs next, first then second immediately after.
3130      * Arbitrarily sort multiple pairs by the index of the first reg;
3131      * there shouldn't be many pairs.
3132      */
3133     switch (arg_ct->pair) {
3134     case 1:
3135     case 3:
3136         return (k + 1) * 2;
3137     case 2:
3138         return (arg_ct->pair_index + 1) * 2 - 1;
3139     }
3140 
3141     /* Finally, sort by decreasing register count. */
3142     assert(n > 1);
3143     return -n;
3144 }
3145 
3146 /* sort from highest priority to lowest */
3147 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3148 {
3149     int i, j;
3150 
3151     for (i = 0; i < n; i++) {
3152         a[start + i].sort_index = start + i;
3153     }
3154     if (n <= 1) {
3155         return;
3156     }
3157     for (i = 0; i < n - 1; i++) {
3158         for (j = i + 1; j < n; j++) {
3159             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3160             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3161             if (p1 < p2) {
3162                 int tmp = a[start + i].sort_index;
3163                 a[start + i].sort_index = a[start + j].sort_index;
3164                 a[start + j].sort_index = tmp;
3165             }
3166         }
3167     }
3168 }
3169 
3170 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3171 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3172 
3173 static void process_constraint_sets(void)
3174 {
3175     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3176         const TCGConstraintSet *tdefs = &constraint_sets[c];
3177         TCGArgConstraint *args_ct = all_cts[c];
3178         int nb_oargs = tdefs->nb_oargs;
3179         int nb_iargs = tdefs->nb_iargs;
3180         int nb_args = nb_oargs + nb_iargs;
3181         bool saw_alias_pair = false;
3182 
3183         for (int i = 0; i < nb_args; i++) {
3184             const char *ct_str = tdefs->args_ct_str[i];
3185             bool input_p = i >= nb_oargs;
3186             int o;
3187 
3188             switch (*ct_str) {
3189             case '0' ... '9':
3190                 o = *ct_str - '0';
3191                 tcg_debug_assert(input_p);
3192                 tcg_debug_assert(o < nb_oargs);
3193                 tcg_debug_assert(args_ct[o].regs != 0);
3194                 tcg_debug_assert(!args_ct[o].oalias);
3195                 args_ct[i] = args_ct[o];
3196                 /* The output sets oalias.  */
3197                 args_ct[o].oalias = 1;
3198                 args_ct[o].alias_index = i;
3199                 /* The input sets ialias. */
3200                 args_ct[i].ialias = 1;
3201                 args_ct[i].alias_index = o;
3202                 if (args_ct[i].pair) {
3203                     saw_alias_pair = true;
3204                 }
3205                 tcg_debug_assert(ct_str[1] == '\0');
3206                 continue;
3207 
3208             case '&':
3209                 tcg_debug_assert(!input_p);
3210                 args_ct[i].newreg = true;
3211                 ct_str++;
3212                 break;
3213 
3214             case 'p': /* plus */
3215                 /* Allocate to the register after the previous. */
3216                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3217                 o = i - 1;
3218                 tcg_debug_assert(!args_ct[o].pair);
3219                 tcg_debug_assert(!args_ct[o].ct);
3220                 args_ct[i] = (TCGArgConstraint){
3221                     .pair = 2,
3222                     .pair_index = o,
3223                     .regs = args_ct[o].regs << 1,
3224                     .newreg = args_ct[o].newreg,
3225                 };
3226                 args_ct[o].pair = 1;
3227                 args_ct[o].pair_index = i;
3228                 tcg_debug_assert(ct_str[1] == '\0');
3229                 continue;
3230 
3231             case 'm': /* minus */
3232                 /* Allocate to the register before the previous. */
3233                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3234                 o = i - 1;
3235                 tcg_debug_assert(!args_ct[o].pair);
3236                 tcg_debug_assert(!args_ct[o].ct);
3237                 args_ct[i] = (TCGArgConstraint){
3238                     .pair = 1,
3239                     .pair_index = o,
3240                     .regs = args_ct[o].regs >> 1,
3241                     .newreg = args_ct[o].newreg,
3242                 };
3243                 args_ct[o].pair = 2;
3244                 args_ct[o].pair_index = i;
3245                 tcg_debug_assert(ct_str[1] == '\0');
3246                 continue;
3247             }
3248 
3249             do {
3250                 switch (*ct_str) {
3251                 case 'i':
3252                     args_ct[i].ct |= TCG_CT_CONST;
3253                     break;
3254 #ifdef TCG_REG_ZERO
3255                 case 'z':
3256                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3257                     break;
3258 #endif
3259 
3260                 /* Include all of the target-specific constraints. */
3261 
3262 #undef CONST
3263 #define CONST(CASE, MASK) \
3264     case CASE: args_ct[i].ct |= MASK; break;
3265 #define REGS(CASE, MASK) \
3266     case CASE: args_ct[i].regs |= MASK; break;
3267 
3268 #include "tcg-target-con-str.h"
3269 
3270 #undef REGS
3271 #undef CONST
3272                 default:
3273                 case '0' ... '9':
3274                 case '&':
3275                 case 'p':
3276                 case 'm':
3277                     /* Typo in TCGConstraintSet constraint. */
3278                     g_assert_not_reached();
3279                 }
3280             } while (*++ct_str != '\0');
3281         }
3282 
3283         /*
3284          * Fix up output pairs that are aliased with inputs.
3285          * When we created the alias, we copied pair from the output.
3286          * There are three cases:
3287          *    (1a) Pairs of inputs alias pairs of outputs.
3288          *    (1b) One input aliases the first of a pair of outputs.
3289          *    (2)  One input aliases the second of a pair of outputs.
3290          *
3291          * Case 1a is handled by making sure that the pair_index'es are
3292          * properly updated so that they appear the same as a pair of inputs.
3293          *
3294          * Case 1b is handled by setting the pair_index of the input to
3295          * itself, simply so it doesn't point to an unrelated argument.
3296          * Since we don't encounter the "second" during the input allocation
3297          * phase, nothing happens with the second half of the input pair.
3298          *
3299          * Case 2 is handled by setting the second input to pair=3, the
3300          * first output to pair=3, and the pair_index'es to match.
3301          */
3302         if (saw_alias_pair) {
3303             for (int i = nb_oargs; i < nb_args; i++) {
3304                 int o, o2, i2;
3305 
3306                 /*
3307                  * Since [0-9pm] must be alone in the constraint string,
3308                  * the only way they can both be set is if the pair comes
3309                  * from the output alias.
3310                  */
3311                 if (!args_ct[i].ialias) {
3312                     continue;
3313                 }
3314                 switch (args_ct[i].pair) {
3315                 case 0:
3316                     break;
3317                 case 1:
3318                     o = args_ct[i].alias_index;
3319                     o2 = args_ct[o].pair_index;
3320                     tcg_debug_assert(args_ct[o].pair == 1);
3321                     tcg_debug_assert(args_ct[o2].pair == 2);
3322                     if (args_ct[o2].oalias) {
3323                         /* Case 1a */
3324                         i2 = args_ct[o2].alias_index;
3325                         tcg_debug_assert(args_ct[i2].pair == 2);
3326                         args_ct[i2].pair_index = i;
3327                         args_ct[i].pair_index = i2;
3328                     } else {
3329                         /* Case 1b */
3330                         args_ct[i].pair_index = i;
3331                     }
3332                     break;
3333                 case 2:
3334                     o = args_ct[i].alias_index;
3335                     o2 = args_ct[o].pair_index;
3336                     tcg_debug_assert(args_ct[o].pair == 2);
3337                     tcg_debug_assert(args_ct[o2].pair == 1);
3338                     if (args_ct[o2].oalias) {
3339                         /* Case 1a */
3340                         i2 = args_ct[o2].alias_index;
3341                         tcg_debug_assert(args_ct[i2].pair == 1);
3342                         args_ct[i2].pair_index = i;
3343                         args_ct[i].pair_index = i2;
3344                     } else {
3345                         /* Case 2 */
3346                         args_ct[i].pair = 3;
3347                         args_ct[o2].pair = 3;
3348                         args_ct[i].pair_index = o2;
3349                         args_ct[o2].pair_index = i;
3350                     }
3351                     break;
3352                 default:
3353                     g_assert_not_reached();
3354                 }
3355             }
3356         }
3357 
3358         /* sort the constraints (XXX: this is just an heuristic) */
3359         sort_constraints(args_ct, 0, nb_oargs);
3360         sort_constraints(args_ct, nb_oargs, nb_iargs);
3361     }
3362 }
3363 
3364 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3365 {
3366     TCGOpcode opc = op->opc;
3367     TCGType type = TCGOP_TYPE(op);
3368     unsigned flags = TCGOP_FLAGS(op);
3369     const TCGOpDef *def = &tcg_op_defs[opc];
3370     const TCGOutOp *outop = all_outop[opc];
3371     TCGConstraintSetIndex con_set;
3372 
3373     if (def->flags & TCG_OPF_NOT_PRESENT) {
3374         return empty_cts;
3375     }
3376 
3377     if (outop) {
3378         con_set = outop->static_constraint;
3379         if (con_set == C_Dynamic) {
3380             con_set = outop->dynamic_constraint(type, flags);
3381         }
3382     } else {
3383         con_set = tcg_target_op_def(opc, type, flags);
3384     }
3385     tcg_debug_assert(con_set >= 0);
3386     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3387 
3388     /* The constraint arguments must match TCGOpcode arguments. */
3389     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3390     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3391 
3392     return all_cts[con_set];
3393 }
3394 
3395 static void remove_label_use(TCGOp *op, int idx)
3396 {
3397     TCGLabel *label = arg_label(op->args[idx]);
3398     TCGLabelUse *use;
3399 
3400     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3401         if (use->op == op) {
3402             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3403             return;
3404         }
3405     }
3406     g_assert_not_reached();
3407 }
3408 
3409 void tcg_op_remove(TCGContext *s, TCGOp *op)
3410 {
3411     switch (op->opc) {
3412     case INDEX_op_br:
3413         remove_label_use(op, 0);
3414         break;
3415     case INDEX_op_brcond_i32:
3416     case INDEX_op_brcond_i64:
3417         remove_label_use(op, 3);
3418         break;
3419     case INDEX_op_brcond2_i32:
3420         remove_label_use(op, 5);
3421         break;
3422     default:
3423         break;
3424     }
3425 
3426     QTAILQ_REMOVE(&s->ops, op, link);
3427     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3428     s->nb_ops--;
3429 }
3430 
3431 void tcg_remove_ops_after(TCGOp *op)
3432 {
3433     TCGContext *s = tcg_ctx;
3434 
3435     while (true) {
3436         TCGOp *last = tcg_last_op();
3437         if (last == op) {
3438             return;
3439         }
3440         tcg_op_remove(s, last);
3441     }
3442 }
3443 
3444 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3445 {
3446     TCGContext *s = tcg_ctx;
3447     TCGOp *op = NULL;
3448 
3449     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3450         QTAILQ_FOREACH(op, &s->free_ops, link) {
3451             if (nargs <= op->nargs) {
3452                 QTAILQ_REMOVE(&s->free_ops, op, link);
3453                 nargs = op->nargs;
3454                 goto found;
3455             }
3456         }
3457     }
3458 
3459     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3460     nargs = MAX(4, nargs);
3461     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3462 
3463  found:
3464     memset(op, 0, offsetof(TCGOp, link));
3465     op->opc = opc;
3466     op->nargs = nargs;
3467 
3468     /* Check for bitfield overflow. */
3469     tcg_debug_assert(op->nargs == nargs);
3470 
3471     s->nb_ops++;
3472     return op;
3473 }
3474 
3475 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3476 {
3477     TCGOp *op = tcg_op_alloc(opc, nargs);
3478 
3479     if (tcg_ctx->emit_before_op) {
3480         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3481     } else {
3482         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3483     }
3484     return op;
3485 }
3486 
3487 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3488                             TCGOpcode opc, TCGType type, unsigned nargs)
3489 {
3490     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3491 
3492     TCGOP_TYPE(new_op) = type;
3493     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3494     return new_op;
3495 }
3496 
3497 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3498                            TCGOpcode opc, TCGType type, unsigned nargs)
3499 {
3500     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3501 
3502     TCGOP_TYPE(new_op) = type;
3503     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3504     return new_op;
3505 }
3506 
3507 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3508 {
3509     TCGLabelUse *u;
3510 
3511     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3512         TCGOp *op = u->op;
3513         switch (op->opc) {
3514         case INDEX_op_br:
3515             op->args[0] = label_arg(to);
3516             break;
3517         case INDEX_op_brcond_i32:
3518         case INDEX_op_brcond_i64:
3519             op->args[3] = label_arg(to);
3520             break;
3521         case INDEX_op_brcond2_i32:
3522             op->args[5] = label_arg(to);
3523             break;
3524         default:
3525             g_assert_not_reached();
3526         }
3527     }
3528 
3529     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3530 }
3531 
3532 /* Reachable analysis : remove unreachable code.  */
3533 static void __attribute__((noinline))
3534 reachable_code_pass(TCGContext *s)
3535 {
3536     TCGOp *op, *op_next, *op_prev;
3537     bool dead = false;
3538 
3539     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3540         bool remove = dead;
3541         TCGLabel *label;
3542 
3543         switch (op->opc) {
3544         case INDEX_op_set_label:
3545             label = arg_label(op->args[0]);
3546 
3547             /*
3548              * Note that the first op in the TB is always a load,
3549              * so there is always something before a label.
3550              */
3551             op_prev = QTAILQ_PREV(op, link);
3552 
3553             /*
3554              * If we find two sequential labels, move all branches to
3555              * reference the second label and remove the first label.
3556              * Do this before branch to next optimization, so that the
3557              * middle label is out of the way.
3558              */
3559             if (op_prev->opc == INDEX_op_set_label) {
3560                 move_label_uses(label, arg_label(op_prev->args[0]));
3561                 tcg_op_remove(s, op_prev);
3562                 op_prev = QTAILQ_PREV(op, link);
3563             }
3564 
3565             /*
3566              * Optimization can fold conditional branches to unconditional.
3567              * If we find a label which is preceded by an unconditional
3568              * branch to next, remove the branch.  We couldn't do this when
3569              * processing the branch because any dead code between the branch
3570              * and label had not yet been removed.
3571              */
3572             if (op_prev->opc == INDEX_op_br &&
3573                 label == arg_label(op_prev->args[0])) {
3574                 tcg_op_remove(s, op_prev);
3575                 /* Fall through means insns become live again.  */
3576                 dead = false;
3577             }
3578 
3579             if (QSIMPLEQ_EMPTY(&label->branches)) {
3580                 /*
3581                  * While there is an occasional backward branch, virtually
3582                  * all branches generated by the translators are forward.
3583                  * Which means that generally we will have already removed
3584                  * all references to the label that will be, and there is
3585                  * little to be gained by iterating.
3586                  */
3587                 remove = true;
3588             } else {
3589                 /* Once we see a label, insns become live again.  */
3590                 dead = false;
3591                 remove = false;
3592             }
3593             break;
3594 
3595         case INDEX_op_br:
3596         case INDEX_op_exit_tb:
3597         case INDEX_op_goto_ptr:
3598             /* Unconditional branches; everything following is dead.  */
3599             dead = true;
3600             break;
3601 
3602         case INDEX_op_call:
3603             /* Notice noreturn helper calls, raising exceptions.  */
3604             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3605                 dead = true;
3606             }
3607             break;
3608 
3609         case INDEX_op_insn_start:
3610             /* Never remove -- we need to keep these for unwind.  */
3611             remove = false;
3612             break;
3613 
3614         default:
3615             break;
3616         }
3617 
3618         if (remove) {
3619             tcg_op_remove(s, op);
3620         }
3621     }
3622 }
3623 
3624 #define TS_DEAD  1
3625 #define TS_MEM   2
3626 
3627 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3628 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3629 
3630 /* For liveness_pass_1, the register preferences for a given temp.  */
3631 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3632 {
3633     return ts->state_ptr;
3634 }
3635 
3636 /* For liveness_pass_1, reset the preferences for a given temp to the
3637  * maximal regset for its type.
3638  */
3639 static inline void la_reset_pref(TCGTemp *ts)
3640 {
3641     *la_temp_pref(ts)
3642         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3643 }
3644 
3645 /* liveness analysis: end of function: all temps are dead, and globals
3646    should be in memory. */
3647 static void la_func_end(TCGContext *s, int ng, int nt)
3648 {
3649     int i;
3650 
3651     for (i = 0; i < ng; ++i) {
3652         s->temps[i].state = TS_DEAD | TS_MEM;
3653         la_reset_pref(&s->temps[i]);
3654     }
3655     for (i = ng; i < nt; ++i) {
3656         s->temps[i].state = TS_DEAD;
3657         la_reset_pref(&s->temps[i]);
3658     }
3659 }
3660 
3661 /* liveness analysis: end of basic block: all temps are dead, globals
3662    and local temps should be in memory. */
3663 static void la_bb_end(TCGContext *s, int ng, int nt)
3664 {
3665     int i;
3666 
3667     for (i = 0; i < nt; ++i) {
3668         TCGTemp *ts = &s->temps[i];
3669         int state;
3670 
3671         switch (ts->kind) {
3672         case TEMP_FIXED:
3673         case TEMP_GLOBAL:
3674         case TEMP_TB:
3675             state = TS_DEAD | TS_MEM;
3676             break;
3677         case TEMP_EBB:
3678         case TEMP_CONST:
3679             state = TS_DEAD;
3680             break;
3681         default:
3682             g_assert_not_reached();
3683         }
3684         ts->state = state;
3685         la_reset_pref(ts);
3686     }
3687 }
3688 
3689 /* liveness analysis: sync globals back to memory.  */
3690 static void la_global_sync(TCGContext *s, int ng)
3691 {
3692     int i;
3693 
3694     for (i = 0; i < ng; ++i) {
3695         int state = s->temps[i].state;
3696         s->temps[i].state = state | TS_MEM;
3697         if (state == TS_DEAD) {
3698             /* If the global was previously dead, reset prefs.  */
3699             la_reset_pref(&s->temps[i]);
3700         }
3701     }
3702 }
3703 
3704 /*
3705  * liveness analysis: conditional branch: all temps are dead unless
3706  * explicitly live-across-conditional-branch, globals and local temps
3707  * should be synced.
3708  */
3709 static void la_bb_sync(TCGContext *s, int ng, int nt)
3710 {
3711     la_global_sync(s, ng);
3712 
3713     for (int i = ng; i < nt; ++i) {
3714         TCGTemp *ts = &s->temps[i];
3715         int state;
3716 
3717         switch (ts->kind) {
3718         case TEMP_TB:
3719             state = ts->state;
3720             ts->state = state | TS_MEM;
3721             if (state != TS_DEAD) {
3722                 continue;
3723             }
3724             break;
3725         case TEMP_EBB:
3726         case TEMP_CONST:
3727             continue;
3728         default:
3729             g_assert_not_reached();
3730         }
3731         la_reset_pref(&s->temps[i]);
3732     }
3733 }
3734 
3735 /* liveness analysis: sync globals back to memory and kill.  */
3736 static void la_global_kill(TCGContext *s, int ng)
3737 {
3738     int i;
3739 
3740     for (i = 0; i < ng; i++) {
3741         s->temps[i].state = TS_DEAD | TS_MEM;
3742         la_reset_pref(&s->temps[i]);
3743     }
3744 }
3745 
3746 /* liveness analysis: note live globals crossing calls.  */
3747 static void la_cross_call(TCGContext *s, int nt)
3748 {
3749     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3750     int i;
3751 
3752     for (i = 0; i < nt; i++) {
3753         TCGTemp *ts = &s->temps[i];
3754         if (!(ts->state & TS_DEAD)) {
3755             TCGRegSet *pset = la_temp_pref(ts);
3756             TCGRegSet set = *pset;
3757 
3758             set &= mask;
3759             /* If the combination is not possible, restart.  */
3760             if (set == 0) {
3761                 set = tcg_target_available_regs[ts->type] & mask;
3762             }
3763             *pset = set;
3764         }
3765     }
3766 }
3767 
3768 /*
3769  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3770  * to TEMP_EBB, if possible.
3771  */
3772 static void __attribute__((noinline))
3773 liveness_pass_0(TCGContext *s)
3774 {
3775     void * const multiple_ebb = (void *)(uintptr_t)-1;
3776     int nb_temps = s->nb_temps;
3777     TCGOp *op, *ebb;
3778 
3779     for (int i = s->nb_globals; i < nb_temps; ++i) {
3780         s->temps[i].state_ptr = NULL;
3781     }
3782 
3783     /*
3784      * Represent each EBB by the op at which it begins.  In the case of
3785      * the first EBB, this is the first op, otherwise it is a label.
3786      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3787      * within a single EBB, else MULTIPLE_EBB.
3788      */
3789     ebb = QTAILQ_FIRST(&s->ops);
3790     QTAILQ_FOREACH(op, &s->ops, link) {
3791         const TCGOpDef *def;
3792         int nb_oargs, nb_iargs;
3793 
3794         switch (op->opc) {
3795         case INDEX_op_set_label:
3796             ebb = op;
3797             continue;
3798         case INDEX_op_discard:
3799             continue;
3800         case INDEX_op_call:
3801             nb_oargs = TCGOP_CALLO(op);
3802             nb_iargs = TCGOP_CALLI(op);
3803             break;
3804         default:
3805             def = &tcg_op_defs[op->opc];
3806             nb_oargs = def->nb_oargs;
3807             nb_iargs = def->nb_iargs;
3808             break;
3809         }
3810 
3811         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3812             TCGTemp *ts = arg_temp(op->args[i]);
3813 
3814             if (ts->kind != TEMP_TB) {
3815                 continue;
3816             }
3817             if (ts->state_ptr == NULL) {
3818                 ts->state_ptr = ebb;
3819             } else if (ts->state_ptr != ebb) {
3820                 ts->state_ptr = multiple_ebb;
3821             }
3822         }
3823     }
3824 
3825     /*
3826      * For TEMP_TB that turned out not to be used beyond one EBB,
3827      * reduce the liveness to TEMP_EBB.
3828      */
3829     for (int i = s->nb_globals; i < nb_temps; ++i) {
3830         TCGTemp *ts = &s->temps[i];
3831         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3832             ts->kind = TEMP_EBB;
3833         }
3834     }
3835 }
3836 
3837 /* Liveness analysis : update the opc_arg_life array to tell if a
3838    given input arguments is dead. Instructions updating dead
3839    temporaries are removed. */
3840 static void __attribute__((noinline))
3841 liveness_pass_1(TCGContext *s)
3842 {
3843     int nb_globals = s->nb_globals;
3844     int nb_temps = s->nb_temps;
3845     TCGOp *op, *op_prev;
3846     TCGRegSet *prefs;
3847     int i;
3848 
3849     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3850     for (i = 0; i < nb_temps; ++i) {
3851         s->temps[i].state_ptr = prefs + i;
3852     }
3853 
3854     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3855     la_func_end(s, nb_globals, nb_temps);
3856 
3857     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3858         int nb_iargs, nb_oargs;
3859         TCGOpcode opc_new, opc_new2;
3860         TCGLifeData arg_life = 0;
3861         TCGTemp *ts;
3862         TCGOpcode opc = op->opc;
3863         const TCGOpDef *def = &tcg_op_defs[opc];
3864         const TCGArgConstraint *args_ct;
3865 
3866         switch (opc) {
3867         case INDEX_op_call:
3868             {
3869                 const TCGHelperInfo *info = tcg_call_info(op);
3870                 int call_flags = tcg_call_flags(op);
3871 
3872                 nb_oargs = TCGOP_CALLO(op);
3873                 nb_iargs = TCGOP_CALLI(op);
3874 
3875                 /* pure functions can be removed if their result is unused */
3876                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3877                     for (i = 0; i < nb_oargs; i++) {
3878                         ts = arg_temp(op->args[i]);
3879                         if (ts->state != TS_DEAD) {
3880                             goto do_not_remove_call;
3881                         }
3882                     }
3883                     goto do_remove;
3884                 }
3885             do_not_remove_call:
3886 
3887                 /* Output args are dead.  */
3888                 for (i = 0; i < nb_oargs; i++) {
3889                     ts = arg_temp(op->args[i]);
3890                     if (ts->state & TS_DEAD) {
3891                         arg_life |= DEAD_ARG << i;
3892                     }
3893                     if (ts->state & TS_MEM) {
3894                         arg_life |= SYNC_ARG << i;
3895                     }
3896                     ts->state = TS_DEAD;
3897                     la_reset_pref(ts);
3898                 }
3899 
3900                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3901                 memset(op->output_pref, 0, sizeof(op->output_pref));
3902 
3903                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3904                                     TCG_CALL_NO_READ_GLOBALS))) {
3905                     la_global_kill(s, nb_globals);
3906                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3907                     la_global_sync(s, nb_globals);
3908                 }
3909 
3910                 /* Record arguments that die in this helper.  */
3911                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3912                     ts = arg_temp(op->args[i]);
3913                     if (ts->state & TS_DEAD) {
3914                         arg_life |= DEAD_ARG << i;
3915                     }
3916                 }
3917 
3918                 /* For all live registers, remove call-clobbered prefs.  */
3919                 la_cross_call(s, nb_temps);
3920 
3921                 /*
3922                  * Input arguments are live for preceding opcodes.
3923                  *
3924                  * For those arguments that die, and will be allocated in
3925                  * registers, clear the register set for that arg, to be
3926                  * filled in below.  For args that will be on the stack,
3927                  * reset to any available reg.  Process arguments in reverse
3928                  * order so that if a temp is used more than once, the stack
3929                  * reset to max happens before the register reset to 0.
3930                  */
3931                 for (i = nb_iargs - 1; i >= 0; i--) {
3932                     const TCGCallArgumentLoc *loc = &info->in[i];
3933                     ts = arg_temp(op->args[nb_oargs + i]);
3934 
3935                     if (ts->state & TS_DEAD) {
3936                         switch (loc->kind) {
3937                         case TCG_CALL_ARG_NORMAL:
3938                         case TCG_CALL_ARG_EXTEND_U:
3939                         case TCG_CALL_ARG_EXTEND_S:
3940                             if (arg_slot_reg_p(loc->arg_slot)) {
3941                                 *la_temp_pref(ts) = 0;
3942                                 break;
3943                             }
3944                             /* fall through */
3945                         default:
3946                             *la_temp_pref(ts) =
3947                                 tcg_target_available_regs[ts->type];
3948                             break;
3949                         }
3950                         ts->state &= ~TS_DEAD;
3951                     }
3952                 }
3953 
3954                 /*
3955                  * For each input argument, add its input register to prefs.
3956                  * If a temp is used once, this produces a single set bit;
3957                  * if a temp is used multiple times, this produces a set.
3958                  */
3959                 for (i = 0; i < nb_iargs; i++) {
3960                     const TCGCallArgumentLoc *loc = &info->in[i];
3961                     ts = arg_temp(op->args[nb_oargs + i]);
3962 
3963                     switch (loc->kind) {
3964                     case TCG_CALL_ARG_NORMAL:
3965                     case TCG_CALL_ARG_EXTEND_U:
3966                     case TCG_CALL_ARG_EXTEND_S:
3967                         if (arg_slot_reg_p(loc->arg_slot)) {
3968                             tcg_regset_set_reg(*la_temp_pref(ts),
3969                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3970                         }
3971                         break;
3972                     default:
3973                         break;
3974                     }
3975                 }
3976             }
3977             break;
3978         case INDEX_op_insn_start:
3979             break;
3980         case INDEX_op_discard:
3981             /* mark the temporary as dead */
3982             ts = arg_temp(op->args[0]);
3983             ts->state = TS_DEAD;
3984             la_reset_pref(ts);
3985             break;
3986 
3987         case INDEX_op_add2_i32:
3988         case INDEX_op_add2_i64:
3989             opc_new = INDEX_op_add;
3990             goto do_addsub2;
3991         case INDEX_op_sub2_i32:
3992         case INDEX_op_sub2_i64:
3993             opc_new = INDEX_op_sub;
3994         do_addsub2:
3995             nb_iargs = 4;
3996             nb_oargs = 2;
3997             /* Test if the high part of the operation is dead, but not
3998                the low part.  The result can be optimized to a simple
3999                add or sub.  This happens often for x86_64 guest when the
4000                cpu mode is set to 32 bit.  */
4001             if (arg_temp(op->args[1])->state == TS_DEAD) {
4002                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4003                     goto do_remove;
4004                 }
4005                 /* Replace the opcode and adjust the args in place,
4006                    leaving 3 unused args at the end.  */
4007                 op->opc = opc = opc_new;
4008                 op->args[1] = op->args[2];
4009                 op->args[2] = op->args[4];
4010                 /* Fall through and mark the single-word operation live.  */
4011                 nb_iargs = 2;
4012                 nb_oargs = 1;
4013             }
4014             goto do_not_remove;
4015 
4016         case INDEX_op_muls2_i32:
4017         case INDEX_op_muls2_i64:
4018             opc_new = INDEX_op_mul;
4019             opc_new2 = INDEX_op_mulsh;
4020             goto do_mul2;
4021         case INDEX_op_mulu2_i32:
4022         case INDEX_op_mulu2_i64:
4023             opc_new = INDEX_op_mul;
4024             opc_new2 = INDEX_op_muluh;
4025         do_mul2:
4026             nb_iargs = 2;
4027             nb_oargs = 2;
4028             if (arg_temp(op->args[1])->state == TS_DEAD) {
4029                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4030                     /* Both parts of the operation are dead.  */
4031                     goto do_remove;
4032                 }
4033                 /* The high part of the operation is dead; generate the low. */
4034                 op->opc = opc = opc_new;
4035                 op->args[1] = op->args[2];
4036                 op->args[2] = op->args[3];
4037             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4038                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4039                 /* The low part of the operation is dead; generate the high. */
4040                 op->opc = opc = opc_new2;
4041                 op->args[0] = op->args[1];
4042                 op->args[1] = op->args[2];
4043                 op->args[2] = op->args[3];
4044             } else {
4045                 goto do_not_remove;
4046             }
4047             /* Mark the single-word operation live.  */
4048             nb_oargs = 1;
4049             goto do_not_remove;
4050 
4051         default:
4052             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4053             nb_iargs = def->nb_iargs;
4054             nb_oargs = def->nb_oargs;
4055 
4056             /* Test if the operation can be removed because all
4057                its outputs are dead. We assume that nb_oargs == 0
4058                implies side effects */
4059             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4060                 for (i = 0; i < nb_oargs; i++) {
4061                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4062                         goto do_not_remove;
4063                     }
4064                 }
4065                 goto do_remove;
4066             }
4067             goto do_not_remove;
4068 
4069         do_remove:
4070             tcg_op_remove(s, op);
4071             break;
4072 
4073         do_not_remove:
4074             for (i = 0; i < nb_oargs; i++) {
4075                 ts = arg_temp(op->args[i]);
4076 
4077                 /* Remember the preference of the uses that followed.  */
4078                 if (i < ARRAY_SIZE(op->output_pref)) {
4079                     op->output_pref[i] = *la_temp_pref(ts);
4080                 }
4081 
4082                 /* Output args are dead.  */
4083                 if (ts->state & TS_DEAD) {
4084                     arg_life |= DEAD_ARG << i;
4085                 }
4086                 if (ts->state & TS_MEM) {
4087                     arg_life |= SYNC_ARG << i;
4088                 }
4089                 ts->state = TS_DEAD;
4090                 la_reset_pref(ts);
4091             }
4092 
4093             /* If end of basic block, update.  */
4094             if (def->flags & TCG_OPF_BB_EXIT) {
4095                 la_func_end(s, nb_globals, nb_temps);
4096             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4097                 la_bb_sync(s, nb_globals, nb_temps);
4098             } else if (def->flags & TCG_OPF_BB_END) {
4099                 la_bb_end(s, nb_globals, nb_temps);
4100             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4101                 la_global_sync(s, nb_globals);
4102                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4103                     la_cross_call(s, nb_temps);
4104                 }
4105             }
4106 
4107             /* Record arguments that die in this opcode.  */
4108             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4109                 ts = arg_temp(op->args[i]);
4110                 if (ts->state & TS_DEAD) {
4111                     arg_life |= DEAD_ARG << i;
4112                 }
4113             }
4114 
4115             /* Input arguments are live for preceding opcodes.  */
4116             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4117                 ts = arg_temp(op->args[i]);
4118                 if (ts->state & TS_DEAD) {
4119                     /* For operands that were dead, initially allow
4120                        all regs for the type.  */
4121                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4122                     ts->state &= ~TS_DEAD;
4123                 }
4124             }
4125 
4126             /* Incorporate constraints for this operand.  */
4127             switch (opc) {
4128             case INDEX_op_mov:
4129                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4130                    have proper constraints.  That said, special case
4131                    moves to propagate preferences backward.  */
4132                 if (IS_DEAD_ARG(1)) {
4133                     *la_temp_pref(arg_temp(op->args[0]))
4134                         = *la_temp_pref(arg_temp(op->args[1]));
4135                 }
4136                 break;
4137 
4138             default:
4139                 args_ct = opcode_args_ct(op);
4140                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4141                     const TCGArgConstraint *ct = &args_ct[i];
4142                     TCGRegSet set, *pset;
4143 
4144                     ts = arg_temp(op->args[i]);
4145                     pset = la_temp_pref(ts);
4146                     set = *pset;
4147 
4148                     set &= ct->regs;
4149                     if (ct->ialias) {
4150                         set &= output_pref(op, ct->alias_index);
4151                     }
4152                     /* If the combination is not possible, restart.  */
4153                     if (set == 0) {
4154                         set = ct->regs;
4155                     }
4156                     *pset = set;
4157                 }
4158                 break;
4159             }
4160             break;
4161         }
4162         op->life = arg_life;
4163     }
4164 }
4165 
4166 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4167 static bool __attribute__((noinline))
4168 liveness_pass_2(TCGContext *s)
4169 {
4170     int nb_globals = s->nb_globals;
4171     int nb_temps, i;
4172     bool changes = false;
4173     TCGOp *op, *op_next;
4174 
4175     /* Create a temporary for each indirect global.  */
4176     for (i = 0; i < nb_globals; ++i) {
4177         TCGTemp *its = &s->temps[i];
4178         if (its->indirect_reg) {
4179             TCGTemp *dts = tcg_temp_alloc(s);
4180             dts->type = its->type;
4181             dts->base_type = its->base_type;
4182             dts->temp_subindex = its->temp_subindex;
4183             dts->kind = TEMP_EBB;
4184             its->state_ptr = dts;
4185         } else {
4186             its->state_ptr = NULL;
4187         }
4188         /* All globals begin dead.  */
4189         its->state = TS_DEAD;
4190     }
4191     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4192         TCGTemp *its = &s->temps[i];
4193         its->state_ptr = NULL;
4194         its->state = TS_DEAD;
4195     }
4196 
4197     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4198         TCGOpcode opc = op->opc;
4199         const TCGOpDef *def = &tcg_op_defs[opc];
4200         TCGLifeData arg_life = op->life;
4201         int nb_iargs, nb_oargs, call_flags;
4202         TCGTemp *arg_ts, *dir_ts;
4203 
4204         if (opc == INDEX_op_call) {
4205             nb_oargs = TCGOP_CALLO(op);
4206             nb_iargs = TCGOP_CALLI(op);
4207             call_flags = tcg_call_flags(op);
4208         } else {
4209             nb_iargs = def->nb_iargs;
4210             nb_oargs = def->nb_oargs;
4211 
4212             /* Set flags similar to how calls require.  */
4213             if (def->flags & TCG_OPF_COND_BRANCH) {
4214                 /* Like reading globals: sync_globals */
4215                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4216             } else if (def->flags & TCG_OPF_BB_END) {
4217                 /* Like writing globals: save_globals */
4218                 call_flags = 0;
4219             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4220                 /* Like reading globals: sync_globals */
4221                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4222             } else {
4223                 /* No effect on globals.  */
4224                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4225                               TCG_CALL_NO_WRITE_GLOBALS);
4226             }
4227         }
4228 
4229         /* Make sure that input arguments are available.  */
4230         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4231             arg_ts = arg_temp(op->args[i]);
4232             dir_ts = arg_ts->state_ptr;
4233             if (dir_ts && arg_ts->state == TS_DEAD) {
4234                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4235                                   ? INDEX_op_ld_i32
4236                                   : INDEX_op_ld_i64);
4237                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4238                                                   arg_ts->type, 3);
4239 
4240                 lop->args[0] = temp_arg(dir_ts);
4241                 lop->args[1] = temp_arg(arg_ts->mem_base);
4242                 lop->args[2] = arg_ts->mem_offset;
4243 
4244                 /* Loaded, but synced with memory.  */
4245                 arg_ts->state = TS_MEM;
4246             }
4247         }
4248 
4249         /* Perform input replacement, and mark inputs that became dead.
4250            No action is required except keeping temp_state up to date
4251            so that we reload when needed.  */
4252         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4253             arg_ts = arg_temp(op->args[i]);
4254             dir_ts = arg_ts->state_ptr;
4255             if (dir_ts) {
4256                 op->args[i] = temp_arg(dir_ts);
4257                 changes = true;
4258                 if (IS_DEAD_ARG(i)) {
4259                     arg_ts->state = TS_DEAD;
4260                 }
4261             }
4262         }
4263 
4264         /* Liveness analysis should ensure that the following are
4265            all correct, for call sites and basic block end points.  */
4266         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4267             /* Nothing to do */
4268         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4269             for (i = 0; i < nb_globals; ++i) {
4270                 /* Liveness should see that globals are synced back,
4271                    that is, either TS_DEAD or TS_MEM.  */
4272                 arg_ts = &s->temps[i];
4273                 tcg_debug_assert(arg_ts->state_ptr == 0
4274                                  || arg_ts->state != 0);
4275             }
4276         } else {
4277             for (i = 0; i < nb_globals; ++i) {
4278                 /* Liveness should see that globals are saved back,
4279                    that is, TS_DEAD, waiting to be reloaded.  */
4280                 arg_ts = &s->temps[i];
4281                 tcg_debug_assert(arg_ts->state_ptr == 0
4282                                  || arg_ts->state == TS_DEAD);
4283             }
4284         }
4285 
4286         /* Outputs become available.  */
4287         if (opc == INDEX_op_mov) {
4288             arg_ts = arg_temp(op->args[0]);
4289             dir_ts = arg_ts->state_ptr;
4290             if (dir_ts) {
4291                 op->args[0] = temp_arg(dir_ts);
4292                 changes = true;
4293 
4294                 /* The output is now live and modified.  */
4295                 arg_ts->state = 0;
4296 
4297                 if (NEED_SYNC_ARG(0)) {
4298                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4299                                       ? INDEX_op_st_i32
4300                                       : INDEX_op_st_i64);
4301                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4302                                                      arg_ts->type, 3);
4303                     TCGTemp *out_ts = dir_ts;
4304 
4305                     if (IS_DEAD_ARG(0)) {
4306                         out_ts = arg_temp(op->args[1]);
4307                         arg_ts->state = TS_DEAD;
4308                         tcg_op_remove(s, op);
4309                     } else {
4310                         arg_ts->state = TS_MEM;
4311                     }
4312 
4313                     sop->args[0] = temp_arg(out_ts);
4314                     sop->args[1] = temp_arg(arg_ts->mem_base);
4315                     sop->args[2] = arg_ts->mem_offset;
4316                 } else {
4317                     tcg_debug_assert(!IS_DEAD_ARG(0));
4318                 }
4319             }
4320         } else {
4321             for (i = 0; i < nb_oargs; i++) {
4322                 arg_ts = arg_temp(op->args[i]);
4323                 dir_ts = arg_ts->state_ptr;
4324                 if (!dir_ts) {
4325                     continue;
4326                 }
4327                 op->args[i] = temp_arg(dir_ts);
4328                 changes = true;
4329 
4330                 /* The output is now live and modified.  */
4331                 arg_ts->state = 0;
4332 
4333                 /* Sync outputs upon their last write.  */
4334                 if (NEED_SYNC_ARG(i)) {
4335                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4336                                       ? INDEX_op_st_i32
4337                                       : INDEX_op_st_i64);
4338                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4339                                                      arg_ts->type, 3);
4340 
4341                     sop->args[0] = temp_arg(dir_ts);
4342                     sop->args[1] = temp_arg(arg_ts->mem_base);
4343                     sop->args[2] = arg_ts->mem_offset;
4344 
4345                     arg_ts->state = TS_MEM;
4346                 }
4347                 /* Drop outputs that are dead.  */
4348                 if (IS_DEAD_ARG(i)) {
4349                     arg_ts->state = TS_DEAD;
4350                 }
4351             }
4352         }
4353     }
4354 
4355     return changes;
4356 }
4357 
4358 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4359 {
4360     intptr_t off;
4361     int size, align;
4362 
4363     /* When allocating an object, look at the full type. */
4364     size = tcg_type_size(ts->base_type);
4365     switch (ts->base_type) {
4366     case TCG_TYPE_I32:
4367         align = 4;
4368         break;
4369     case TCG_TYPE_I64:
4370     case TCG_TYPE_V64:
4371         align = 8;
4372         break;
4373     case TCG_TYPE_I128:
4374     case TCG_TYPE_V128:
4375     case TCG_TYPE_V256:
4376         /*
4377          * Note that we do not require aligned storage for V256,
4378          * and that we provide alignment for I128 to match V128,
4379          * even if that's above what the host ABI requires.
4380          */
4381         align = 16;
4382         break;
4383     default:
4384         g_assert_not_reached();
4385     }
4386 
4387     /*
4388      * Assume the stack is sufficiently aligned.
4389      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4390      * and do not require 16 byte vector alignment.  This seems slightly
4391      * easier than fully parameterizing the above switch statement.
4392      */
4393     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4394     off = ROUND_UP(s->current_frame_offset, align);
4395 
4396     /* If we've exhausted the stack frame, restart with a smaller TB. */
4397     if (off + size > s->frame_end) {
4398         tcg_raise_tb_overflow(s);
4399     }
4400     s->current_frame_offset = off + size;
4401 #if defined(__sparc__)
4402     off += TCG_TARGET_STACK_BIAS;
4403 #endif
4404 
4405     /* If the object was subdivided, assign memory to all the parts. */
4406     if (ts->base_type != ts->type) {
4407         int part_size = tcg_type_size(ts->type);
4408         int part_count = size / part_size;
4409 
4410         /*
4411          * Each part is allocated sequentially in tcg_temp_new_internal.
4412          * Jump back to the first part by subtracting the current index.
4413          */
4414         ts -= ts->temp_subindex;
4415         for (int i = 0; i < part_count; ++i) {
4416             ts[i].mem_offset = off + i * part_size;
4417             ts[i].mem_base = s->frame_temp;
4418             ts[i].mem_allocated = 1;
4419         }
4420     } else {
4421         ts->mem_offset = off;
4422         ts->mem_base = s->frame_temp;
4423         ts->mem_allocated = 1;
4424     }
4425 }
4426 
4427 /* Assign @reg to @ts, and update reg_to_temp[]. */
4428 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4429 {
4430     if (ts->val_type == TEMP_VAL_REG) {
4431         TCGReg old = ts->reg;
4432         tcg_debug_assert(s->reg_to_temp[old] == ts);
4433         if (old == reg) {
4434             return;
4435         }
4436         s->reg_to_temp[old] = NULL;
4437     }
4438     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4439     s->reg_to_temp[reg] = ts;
4440     ts->val_type = TEMP_VAL_REG;
4441     ts->reg = reg;
4442 }
4443 
4444 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4445 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4446 {
4447     tcg_debug_assert(type != TEMP_VAL_REG);
4448     if (ts->val_type == TEMP_VAL_REG) {
4449         TCGReg reg = ts->reg;
4450         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4451         s->reg_to_temp[reg] = NULL;
4452     }
4453     ts->val_type = type;
4454 }
4455 
4456 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4457 
4458 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4459    mark it free; otherwise mark it dead.  */
4460 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4461 {
4462     TCGTempVal new_type;
4463 
4464     switch (ts->kind) {
4465     case TEMP_FIXED:
4466         return;
4467     case TEMP_GLOBAL:
4468     case TEMP_TB:
4469         new_type = TEMP_VAL_MEM;
4470         break;
4471     case TEMP_EBB:
4472         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4473         break;
4474     case TEMP_CONST:
4475         new_type = TEMP_VAL_CONST;
4476         break;
4477     default:
4478         g_assert_not_reached();
4479     }
4480     set_temp_val_nonreg(s, ts, new_type);
4481 }
4482 
4483 /* Mark a temporary as dead.  */
4484 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4485 {
4486     temp_free_or_dead(s, ts, 1);
4487 }
4488 
4489 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4490    registers needs to be allocated to store a constant.  If 'free_or_dead'
4491    is non-zero, subsequently release the temporary; if it is positive, the
4492    temp is dead; if it is negative, the temp is free.  */
4493 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4494                       TCGRegSet preferred_regs, int free_or_dead)
4495 {
4496     if (!temp_readonly(ts) && !ts->mem_coherent) {
4497         if (!ts->mem_allocated) {
4498             temp_allocate_frame(s, ts);
4499         }
4500         switch (ts->val_type) {
4501         case TEMP_VAL_CONST:
4502             /* If we're going to free the temp immediately, then we won't
4503                require it later in a register, so attempt to store the
4504                constant to memory directly.  */
4505             if (free_or_dead
4506                 && tcg_out_sti(s, ts->type, ts->val,
4507                                ts->mem_base->reg, ts->mem_offset)) {
4508                 break;
4509             }
4510             temp_load(s, ts, tcg_target_available_regs[ts->type],
4511                       allocated_regs, preferred_regs);
4512             /* fallthrough */
4513 
4514         case TEMP_VAL_REG:
4515             tcg_out_st(s, ts->type, ts->reg,
4516                        ts->mem_base->reg, ts->mem_offset);
4517             break;
4518 
4519         case TEMP_VAL_MEM:
4520             break;
4521 
4522         case TEMP_VAL_DEAD:
4523         default:
4524             g_assert_not_reached();
4525         }
4526         ts->mem_coherent = 1;
4527     }
4528     if (free_or_dead) {
4529         temp_free_or_dead(s, ts, free_or_dead);
4530     }
4531 }
4532 
4533 /* free register 'reg' by spilling the corresponding temporary if necessary */
4534 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4535 {
4536     TCGTemp *ts = s->reg_to_temp[reg];
4537     if (ts != NULL) {
4538         temp_sync(s, ts, allocated_regs, 0, -1);
4539     }
4540 }
4541 
4542 /**
4543  * tcg_reg_alloc:
4544  * @required_regs: Set of registers in which we must allocate.
4545  * @allocated_regs: Set of registers which must be avoided.
4546  * @preferred_regs: Set of registers we should prefer.
4547  * @rev: True if we search the registers in "indirect" order.
4548  *
4549  * The allocated register must be in @required_regs & ~@allocated_regs,
4550  * but if we can put it in @preferred_regs we may save a move later.
4551  */
4552 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4553                             TCGRegSet allocated_regs,
4554                             TCGRegSet preferred_regs, bool rev)
4555 {
4556     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4557     TCGRegSet reg_ct[2];
4558     const int *order;
4559 
4560     reg_ct[1] = required_regs & ~allocated_regs;
4561     tcg_debug_assert(reg_ct[1] != 0);
4562     reg_ct[0] = reg_ct[1] & preferred_regs;
4563 
4564     /* Skip the preferred_regs option if it cannot be satisfied,
4565        or if the preference made no difference.  */
4566     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4567 
4568     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4569 
4570     /* Try free registers, preferences first.  */
4571     for (j = f; j < 2; j++) {
4572         TCGRegSet set = reg_ct[j];
4573 
4574         if (tcg_regset_single(set)) {
4575             /* One register in the set.  */
4576             TCGReg reg = tcg_regset_first(set);
4577             if (s->reg_to_temp[reg] == NULL) {
4578                 return reg;
4579             }
4580         } else {
4581             for (i = 0; i < n; i++) {
4582                 TCGReg reg = order[i];
4583                 if (s->reg_to_temp[reg] == NULL &&
4584                     tcg_regset_test_reg(set, reg)) {
4585                     return reg;
4586                 }
4587             }
4588         }
4589     }
4590 
4591     /* We must spill something.  */
4592     for (j = f; j < 2; j++) {
4593         TCGRegSet set = reg_ct[j];
4594 
4595         if (tcg_regset_single(set)) {
4596             /* One register in the set.  */
4597             TCGReg reg = tcg_regset_first(set);
4598             tcg_reg_free(s, reg, allocated_regs);
4599             return reg;
4600         } else {
4601             for (i = 0; i < n; i++) {
4602                 TCGReg reg = order[i];
4603                 if (tcg_regset_test_reg(set, reg)) {
4604                     tcg_reg_free(s, reg, allocated_regs);
4605                     return reg;
4606                 }
4607             }
4608         }
4609     }
4610 
4611     g_assert_not_reached();
4612 }
4613 
4614 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4615                                  TCGRegSet allocated_regs,
4616                                  TCGRegSet preferred_regs, bool rev)
4617 {
4618     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4619     TCGRegSet reg_ct[2];
4620     const int *order;
4621 
4622     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4623     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4624     tcg_debug_assert(reg_ct[1] != 0);
4625     reg_ct[0] = reg_ct[1] & preferred_regs;
4626 
4627     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4628 
4629     /*
4630      * Skip the preferred_regs option if it cannot be satisfied,
4631      * or if the preference made no difference.
4632      */
4633     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4634 
4635     /*
4636      * Minimize the number of flushes by looking for 2 free registers first,
4637      * then a single flush, then two flushes.
4638      */
4639     for (fmin = 2; fmin >= 0; fmin--) {
4640         for (j = k; j < 2; j++) {
4641             TCGRegSet set = reg_ct[j];
4642 
4643             for (i = 0; i < n; i++) {
4644                 TCGReg reg = order[i];
4645 
4646                 if (tcg_regset_test_reg(set, reg)) {
4647                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4648                     if (f >= fmin) {
4649                         tcg_reg_free(s, reg, allocated_regs);
4650                         tcg_reg_free(s, reg + 1, allocated_regs);
4651                         return reg;
4652                     }
4653                 }
4654             }
4655         }
4656     }
4657     g_assert_not_reached();
4658 }
4659 
4660 /* Make sure the temporary is in a register.  If needed, allocate the register
4661    from DESIRED while avoiding ALLOCATED.  */
4662 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4663                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4664 {
4665     TCGReg reg;
4666 
4667     switch (ts->val_type) {
4668     case TEMP_VAL_REG:
4669         return;
4670     case TEMP_VAL_CONST:
4671         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4672                             preferred_regs, ts->indirect_base);
4673         if (ts->type <= TCG_TYPE_I64) {
4674             tcg_out_movi(s, ts->type, reg, ts->val);
4675         } else {
4676             uint64_t val = ts->val;
4677             MemOp vece = MO_64;
4678 
4679             /*
4680              * Find the minimal vector element that matches the constant.
4681              * The targets will, in general, have to do this search anyway,
4682              * do this generically.
4683              */
4684             if (val == dup_const(MO_8, val)) {
4685                 vece = MO_8;
4686             } else if (val == dup_const(MO_16, val)) {
4687                 vece = MO_16;
4688             } else if (val == dup_const(MO_32, val)) {
4689                 vece = MO_32;
4690             }
4691 
4692             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4693         }
4694         ts->mem_coherent = 0;
4695         break;
4696     case TEMP_VAL_MEM:
4697         if (!ts->mem_allocated) {
4698             temp_allocate_frame(s, ts);
4699         }
4700         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4701                             preferred_regs, ts->indirect_base);
4702         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4703         ts->mem_coherent = 1;
4704         break;
4705     case TEMP_VAL_DEAD:
4706     default:
4707         g_assert_not_reached();
4708     }
4709     set_temp_val_reg(s, ts, reg);
4710 }
4711 
4712 /* Save a temporary to memory. 'allocated_regs' is used in case a
4713    temporary registers needs to be allocated to store a constant.  */
4714 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4715 {
4716     /* The liveness analysis already ensures that globals are back
4717        in memory. Keep an tcg_debug_assert for safety. */
4718     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4719 }
4720 
4721 /* save globals to their canonical location and assume they can be
4722    modified be the following code. 'allocated_regs' is used in case a
4723    temporary registers needs to be allocated to store a constant. */
4724 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4725 {
4726     int i, n;
4727 
4728     for (i = 0, n = s->nb_globals; i < n; i++) {
4729         temp_save(s, &s->temps[i], allocated_regs);
4730     }
4731 }
4732 
4733 /* sync globals to their canonical location and assume they can be
4734    read by the following code. 'allocated_regs' is used in case a
4735    temporary registers needs to be allocated to store a constant. */
4736 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4737 {
4738     int i, n;
4739 
4740     for (i = 0, n = s->nb_globals; i < n; i++) {
4741         TCGTemp *ts = &s->temps[i];
4742         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4743                          || ts->kind == TEMP_FIXED
4744                          || ts->mem_coherent);
4745     }
4746 }
4747 
4748 /* at the end of a basic block, we assume all temporaries are dead and
4749    all globals are stored at their canonical location. */
4750 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4751 {
4752     int i;
4753 
4754     for (i = s->nb_globals; i < s->nb_temps; i++) {
4755         TCGTemp *ts = &s->temps[i];
4756 
4757         switch (ts->kind) {
4758         case TEMP_TB:
4759             temp_save(s, ts, allocated_regs);
4760             break;
4761         case TEMP_EBB:
4762             /* The liveness analysis already ensures that temps are dead.
4763                Keep an tcg_debug_assert for safety. */
4764             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4765             break;
4766         case TEMP_CONST:
4767             /* Similarly, we should have freed any allocated register. */
4768             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4769             break;
4770         default:
4771             g_assert_not_reached();
4772         }
4773     }
4774 
4775     save_globals(s, allocated_regs);
4776 }
4777 
4778 /*
4779  * At a conditional branch, we assume all temporaries are dead unless
4780  * explicitly live-across-conditional-branch; all globals and local
4781  * temps are synced to their location.
4782  */
4783 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4784 {
4785     sync_globals(s, allocated_regs);
4786 
4787     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4788         TCGTemp *ts = &s->temps[i];
4789         /*
4790          * The liveness analysis already ensures that temps are dead.
4791          * Keep tcg_debug_asserts for safety.
4792          */
4793         switch (ts->kind) {
4794         case TEMP_TB:
4795             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4796             break;
4797         case TEMP_EBB:
4798         case TEMP_CONST:
4799             break;
4800         default:
4801             g_assert_not_reached();
4802         }
4803     }
4804 }
4805 
4806 /*
4807  * Specialized code generation for INDEX_op_mov_* with a constant.
4808  */
4809 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4810                                   tcg_target_ulong val, TCGLifeData arg_life,
4811                                   TCGRegSet preferred_regs)
4812 {
4813     /* ENV should not be modified.  */
4814     tcg_debug_assert(!temp_readonly(ots));
4815 
4816     /* The movi is not explicitly generated here.  */
4817     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4818     ots->val = val;
4819     ots->mem_coherent = 0;
4820     if (NEED_SYNC_ARG(0)) {
4821         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4822     } else if (IS_DEAD_ARG(0)) {
4823         temp_dead(s, ots);
4824     }
4825 }
4826 
4827 /*
4828  * Specialized code generation for INDEX_op_mov_*.
4829  */
4830 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4831 {
4832     const TCGLifeData arg_life = op->life;
4833     TCGRegSet allocated_regs, preferred_regs;
4834     TCGTemp *ts, *ots;
4835     TCGType otype, itype;
4836     TCGReg oreg, ireg;
4837 
4838     allocated_regs = s->reserved_regs;
4839     preferred_regs = output_pref(op, 0);
4840     ots = arg_temp(op->args[0]);
4841     ts = arg_temp(op->args[1]);
4842 
4843     /* ENV should not be modified.  */
4844     tcg_debug_assert(!temp_readonly(ots));
4845 
4846     /* Note that otype != itype for no-op truncation.  */
4847     otype = ots->type;
4848     itype = ts->type;
4849 
4850     if (ts->val_type == TEMP_VAL_CONST) {
4851         /* propagate constant or generate sti */
4852         tcg_target_ulong val = ts->val;
4853         if (IS_DEAD_ARG(1)) {
4854             temp_dead(s, ts);
4855         }
4856         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4857         return;
4858     }
4859 
4860     /* If the source value is in memory we're going to be forced
4861        to have it in a register in order to perform the copy.  Copy
4862        the SOURCE value into its own register first, that way we
4863        don't have to reload SOURCE the next time it is used. */
4864     if (ts->val_type == TEMP_VAL_MEM) {
4865         temp_load(s, ts, tcg_target_available_regs[itype],
4866                   allocated_regs, preferred_regs);
4867     }
4868     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4869     ireg = ts->reg;
4870 
4871     if (IS_DEAD_ARG(0)) {
4872         /* mov to a non-saved dead register makes no sense (even with
4873            liveness analysis disabled). */
4874         tcg_debug_assert(NEED_SYNC_ARG(0));
4875         if (!ots->mem_allocated) {
4876             temp_allocate_frame(s, ots);
4877         }
4878         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4879         if (IS_DEAD_ARG(1)) {
4880             temp_dead(s, ts);
4881         }
4882         temp_dead(s, ots);
4883         return;
4884     }
4885 
4886     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4887         /*
4888          * The mov can be suppressed.  Kill input first, so that it
4889          * is unlinked from reg_to_temp, then set the output to the
4890          * reg that we saved from the input.
4891          */
4892         temp_dead(s, ts);
4893         oreg = ireg;
4894     } else {
4895         if (ots->val_type == TEMP_VAL_REG) {
4896             oreg = ots->reg;
4897         } else {
4898             /* Make sure to not spill the input register during allocation. */
4899             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4900                                  allocated_regs | ((TCGRegSet)1 << ireg),
4901                                  preferred_regs, ots->indirect_base);
4902         }
4903         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4904             /*
4905              * Cross register class move not supported.
4906              * Store the source register into the destination slot
4907              * and leave the destination temp as TEMP_VAL_MEM.
4908              */
4909             assert(!temp_readonly(ots));
4910             if (!ts->mem_allocated) {
4911                 temp_allocate_frame(s, ots);
4912             }
4913             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4914             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4915             ots->mem_coherent = 1;
4916             return;
4917         }
4918     }
4919     set_temp_val_reg(s, ots, oreg);
4920     ots->mem_coherent = 0;
4921 
4922     if (NEED_SYNC_ARG(0)) {
4923         temp_sync(s, ots, allocated_regs, 0, 0);
4924     }
4925 }
4926 
4927 /*
4928  * Specialized code generation for INDEX_op_dup_vec.
4929  */
4930 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4931 {
4932     const TCGLifeData arg_life = op->life;
4933     TCGRegSet dup_out_regs, dup_in_regs;
4934     const TCGArgConstraint *dup_args_ct;
4935     TCGTemp *its, *ots;
4936     TCGType itype, vtype;
4937     unsigned vece;
4938     int lowpart_ofs;
4939     bool ok;
4940 
4941     ots = arg_temp(op->args[0]);
4942     its = arg_temp(op->args[1]);
4943 
4944     /* ENV should not be modified.  */
4945     tcg_debug_assert(!temp_readonly(ots));
4946 
4947     itype = its->type;
4948     vece = TCGOP_VECE(op);
4949     vtype = TCGOP_TYPE(op);
4950 
4951     if (its->val_type == TEMP_VAL_CONST) {
4952         /* Propagate constant via movi -> dupi.  */
4953         tcg_target_ulong val = its->val;
4954         if (IS_DEAD_ARG(1)) {
4955             temp_dead(s, its);
4956         }
4957         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4958         return;
4959     }
4960 
4961     dup_args_ct = opcode_args_ct(op);
4962     dup_out_regs = dup_args_ct[0].regs;
4963     dup_in_regs = dup_args_ct[1].regs;
4964 
4965     /* Allocate the output register now.  */
4966     if (ots->val_type != TEMP_VAL_REG) {
4967         TCGRegSet allocated_regs = s->reserved_regs;
4968         TCGReg oreg;
4969 
4970         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4971             /* Make sure to not spill the input register. */
4972             tcg_regset_set_reg(allocated_regs, its->reg);
4973         }
4974         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4975                              output_pref(op, 0), ots->indirect_base);
4976         set_temp_val_reg(s, ots, oreg);
4977     }
4978 
4979     switch (its->val_type) {
4980     case TEMP_VAL_REG:
4981         /*
4982          * The dup constriaints must be broad, covering all possible VECE.
4983          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4984          * to fail, indicating that extra moves are required for that case.
4985          */
4986         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4987             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4988                 goto done;
4989             }
4990             /* Try again from memory or a vector input register.  */
4991         }
4992         if (!its->mem_coherent) {
4993             /*
4994              * The input register is not synced, and so an extra store
4995              * would be required to use memory.  Attempt an integer-vector
4996              * register move first.  We do not have a TCGRegSet for this.
4997              */
4998             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4999                 break;
5000             }
5001             /* Sync the temp back to its slot and load from there.  */
5002             temp_sync(s, its, s->reserved_regs, 0, 0);
5003         }
5004         /* fall through */
5005 
5006     case TEMP_VAL_MEM:
5007         lowpart_ofs = 0;
5008         if (HOST_BIG_ENDIAN) {
5009             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5010         }
5011         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5012                              its->mem_offset + lowpart_ofs)) {
5013             goto done;
5014         }
5015         /* Load the input into the destination vector register. */
5016         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5017         break;
5018 
5019     default:
5020         g_assert_not_reached();
5021     }
5022 
5023     /* We now have a vector input register, so dup must succeed. */
5024     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5025     tcg_debug_assert(ok);
5026 
5027  done:
5028     ots->mem_coherent = 0;
5029     if (IS_DEAD_ARG(1)) {
5030         temp_dead(s, its);
5031     }
5032     if (NEED_SYNC_ARG(0)) {
5033         temp_sync(s, ots, s->reserved_regs, 0, 0);
5034     }
5035     if (IS_DEAD_ARG(0)) {
5036         temp_dead(s, ots);
5037     }
5038 }
5039 
5040 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5041 {
5042     const TCGLifeData arg_life = op->life;
5043     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5044     TCGRegSet i_allocated_regs;
5045     TCGRegSet o_allocated_regs;
5046     int i, k, nb_iargs, nb_oargs;
5047     TCGReg reg;
5048     TCGArg arg;
5049     const TCGArgConstraint *args_ct;
5050     const TCGArgConstraint *arg_ct;
5051     TCGTemp *ts;
5052     TCGArg new_args[TCG_MAX_OP_ARGS];
5053     int const_args[TCG_MAX_OP_ARGS];
5054     TCGCond op_cond;
5055 
5056     nb_oargs = def->nb_oargs;
5057     nb_iargs = def->nb_iargs;
5058 
5059     /* copy constants */
5060     memcpy(new_args + nb_oargs + nb_iargs,
5061            op->args + nb_oargs + nb_iargs,
5062            sizeof(TCGArg) * def->nb_cargs);
5063 
5064     i_allocated_regs = s->reserved_regs;
5065     o_allocated_regs = s->reserved_regs;
5066 
5067     switch (op->opc) {
5068     case INDEX_op_brcond_i32:
5069     case INDEX_op_brcond_i64:
5070         op_cond = op->args[2];
5071         break;
5072     case INDEX_op_setcond_i32:
5073     case INDEX_op_setcond_i64:
5074     case INDEX_op_negsetcond_i32:
5075     case INDEX_op_negsetcond_i64:
5076     case INDEX_op_cmp_vec:
5077         op_cond = op->args[3];
5078         break;
5079     case INDEX_op_brcond2_i32:
5080         op_cond = op->args[4];
5081         break;
5082     case INDEX_op_movcond_i32:
5083     case INDEX_op_movcond_i64:
5084     case INDEX_op_setcond2_i32:
5085     case INDEX_op_cmpsel_vec:
5086         op_cond = op->args[5];
5087         break;
5088     default:
5089         /* No condition within opcode. */
5090         op_cond = TCG_COND_ALWAYS;
5091         break;
5092     }
5093 
5094     args_ct = opcode_args_ct(op);
5095 
5096     /* satisfy input constraints */
5097     for (k = 0; k < nb_iargs; k++) {
5098         TCGRegSet i_preferred_regs, i_required_regs;
5099         bool allocate_new_reg, copyto_new_reg;
5100         TCGTemp *ts2;
5101         int i1, i2;
5102 
5103         i = args_ct[nb_oargs + k].sort_index;
5104         arg = op->args[i];
5105         arg_ct = &args_ct[i];
5106         ts = arg_temp(arg);
5107 
5108         if (ts->val_type == TEMP_VAL_CONST) {
5109 #ifdef TCG_REG_ZERO
5110             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5111                 /* Hardware zero register: indicate register via non-const. */
5112                 const_args[i] = 0;
5113                 new_args[i] = TCG_REG_ZERO;
5114                 continue;
5115             }
5116 #endif
5117 
5118             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5119                                        op_cond, TCGOP_VECE(op))) {
5120                 /* constant is OK for instruction */
5121                 const_args[i] = 1;
5122                 new_args[i] = ts->val;
5123                 continue;
5124             }
5125         }
5126 
5127         reg = ts->reg;
5128         i_preferred_regs = 0;
5129         i_required_regs = arg_ct->regs;
5130         allocate_new_reg = false;
5131         copyto_new_reg = false;
5132 
5133         switch (arg_ct->pair) {
5134         case 0: /* not paired */
5135             if (arg_ct->ialias) {
5136                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5137 
5138                 /*
5139                  * If the input is readonly, then it cannot also be an
5140                  * output and aliased to itself.  If the input is not
5141                  * dead after the instruction, we must allocate a new
5142                  * register and move it.
5143                  */
5144                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5145                     || args_ct[arg_ct->alias_index].newreg) {
5146                     allocate_new_reg = true;
5147                 } else if (ts->val_type == TEMP_VAL_REG) {
5148                     /*
5149                      * Check if the current register has already been
5150                      * allocated for another input.
5151                      */
5152                     allocate_new_reg =
5153                         tcg_regset_test_reg(i_allocated_regs, reg);
5154                 }
5155             }
5156             if (!allocate_new_reg) {
5157                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5158                           i_preferred_regs);
5159                 reg = ts->reg;
5160                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5161             }
5162             if (allocate_new_reg) {
5163                 /*
5164                  * Allocate a new register matching the constraint
5165                  * and move the temporary register into it.
5166                  */
5167                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5168                           i_allocated_regs, 0);
5169                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5170                                     i_preferred_regs, ts->indirect_base);
5171                 copyto_new_reg = true;
5172             }
5173             break;
5174 
5175         case 1:
5176             /* First of an input pair; if i1 == i2, the second is an output. */
5177             i1 = i;
5178             i2 = arg_ct->pair_index;
5179             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5180 
5181             /*
5182              * It is easier to default to allocating a new pair
5183              * and to identify a few cases where it's not required.
5184              */
5185             if (arg_ct->ialias) {
5186                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5187                 if (IS_DEAD_ARG(i1) &&
5188                     IS_DEAD_ARG(i2) &&
5189                     !temp_readonly(ts) &&
5190                     ts->val_type == TEMP_VAL_REG &&
5191                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5192                     tcg_regset_test_reg(i_required_regs, reg) &&
5193                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5194                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5195                     (ts2
5196                      ? ts2->val_type == TEMP_VAL_REG &&
5197                        ts2->reg == reg + 1 &&
5198                        !temp_readonly(ts2)
5199                      : s->reg_to_temp[reg + 1] == NULL)) {
5200                     break;
5201                 }
5202             } else {
5203                 /* Without aliasing, the pair must also be an input. */
5204                 tcg_debug_assert(ts2);
5205                 if (ts->val_type == TEMP_VAL_REG &&
5206                     ts2->val_type == TEMP_VAL_REG &&
5207                     ts2->reg == reg + 1 &&
5208                     tcg_regset_test_reg(i_required_regs, reg)) {
5209                     break;
5210                 }
5211             }
5212             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5213                                      0, ts->indirect_base);
5214             goto do_pair;
5215 
5216         case 2: /* pair second */
5217             reg = new_args[arg_ct->pair_index] + 1;
5218             goto do_pair;
5219 
5220         case 3: /* ialias with second output, no first input */
5221             tcg_debug_assert(arg_ct->ialias);
5222             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5223 
5224             if (IS_DEAD_ARG(i) &&
5225                 !temp_readonly(ts) &&
5226                 ts->val_type == TEMP_VAL_REG &&
5227                 reg > 0 &&
5228                 s->reg_to_temp[reg - 1] == NULL &&
5229                 tcg_regset_test_reg(i_required_regs, reg) &&
5230                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5231                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5232                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5233                 break;
5234             }
5235             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5236                                      i_allocated_regs, 0,
5237                                      ts->indirect_base);
5238             tcg_regset_set_reg(i_allocated_regs, reg);
5239             reg += 1;
5240             goto do_pair;
5241 
5242         do_pair:
5243             /*
5244              * If an aliased input is not dead after the instruction,
5245              * we must allocate a new register and move it.
5246              */
5247             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5248                 TCGRegSet t_allocated_regs = i_allocated_regs;
5249 
5250                 /*
5251                  * Because of the alias, and the continued life, make sure
5252                  * that the temp is somewhere *other* than the reg pair,
5253                  * and we get a copy in reg.
5254                  */
5255                 tcg_regset_set_reg(t_allocated_regs, reg);
5256                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5257                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5258                     /* If ts was already in reg, copy it somewhere else. */
5259                     TCGReg nr;
5260                     bool ok;
5261 
5262                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5263                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5264                                        t_allocated_regs, 0, ts->indirect_base);
5265                     ok = tcg_out_mov(s, ts->type, nr, reg);
5266                     tcg_debug_assert(ok);
5267 
5268                     set_temp_val_reg(s, ts, nr);
5269                 } else {
5270                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5271                               t_allocated_regs, 0);
5272                     copyto_new_reg = true;
5273                 }
5274             } else {
5275                 /* Preferably allocate to reg, otherwise copy. */
5276                 i_required_regs = (TCGRegSet)1 << reg;
5277                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5278                           i_preferred_regs);
5279                 copyto_new_reg = ts->reg != reg;
5280             }
5281             break;
5282 
5283         default:
5284             g_assert_not_reached();
5285         }
5286 
5287         if (copyto_new_reg) {
5288             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5289                 /*
5290                  * Cross register class move not supported.  Sync the
5291                  * temp back to its slot and load from there.
5292                  */
5293                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5294                 tcg_out_ld(s, ts->type, reg,
5295                            ts->mem_base->reg, ts->mem_offset);
5296             }
5297         }
5298         new_args[i] = reg;
5299         const_args[i] = 0;
5300         tcg_regset_set_reg(i_allocated_regs, reg);
5301     }
5302 
5303     /* mark dead temporaries and free the associated registers */
5304     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5305         if (IS_DEAD_ARG(i)) {
5306             temp_dead(s, arg_temp(op->args[i]));
5307         }
5308     }
5309 
5310     if (def->flags & TCG_OPF_COND_BRANCH) {
5311         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5312     } else if (def->flags & TCG_OPF_BB_END) {
5313         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5314     } else {
5315         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5316             /* XXX: permit generic clobber register list ? */
5317             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5318                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5319                     tcg_reg_free(s, i, i_allocated_regs);
5320                 }
5321             }
5322         }
5323         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5324             /* sync globals if the op has side effects and might trigger
5325                an exception. */
5326             sync_globals(s, i_allocated_regs);
5327         }
5328 
5329         /* satisfy the output constraints */
5330         for (k = 0; k < nb_oargs; k++) {
5331             i = args_ct[k].sort_index;
5332             arg = op->args[i];
5333             arg_ct = &args_ct[i];
5334             ts = arg_temp(arg);
5335 
5336             /* ENV should not be modified.  */
5337             tcg_debug_assert(!temp_readonly(ts));
5338 
5339             switch (arg_ct->pair) {
5340             case 0: /* not paired */
5341                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5342                     reg = new_args[arg_ct->alias_index];
5343                 } else if (arg_ct->newreg) {
5344                     reg = tcg_reg_alloc(s, arg_ct->regs,
5345                                         i_allocated_regs | o_allocated_regs,
5346                                         output_pref(op, k), ts->indirect_base);
5347                 } else {
5348                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5349                                         output_pref(op, k), ts->indirect_base);
5350                 }
5351                 break;
5352 
5353             case 1: /* first of pair */
5354                 if (arg_ct->oalias) {
5355                     reg = new_args[arg_ct->alias_index];
5356                 } else if (arg_ct->newreg) {
5357                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5358                                              i_allocated_regs | o_allocated_regs,
5359                                              output_pref(op, k),
5360                                              ts->indirect_base);
5361                 } else {
5362                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5363                                              output_pref(op, k),
5364                                              ts->indirect_base);
5365                 }
5366                 break;
5367 
5368             case 2: /* second of pair */
5369                 if (arg_ct->oalias) {
5370                     reg = new_args[arg_ct->alias_index];
5371                 } else {
5372                     reg = new_args[arg_ct->pair_index] + 1;
5373                 }
5374                 break;
5375 
5376             case 3: /* first of pair, aliasing with a second input */
5377                 tcg_debug_assert(!arg_ct->newreg);
5378                 reg = new_args[arg_ct->pair_index] - 1;
5379                 break;
5380 
5381             default:
5382                 g_assert_not_reached();
5383             }
5384             tcg_regset_set_reg(o_allocated_regs, reg);
5385             set_temp_val_reg(s, ts, reg);
5386             ts->mem_coherent = 0;
5387             new_args[i] = reg;
5388         }
5389     }
5390 
5391     /* emit instruction */
5392     TCGType type = TCGOP_TYPE(op);
5393     switch (op->opc) {
5394     case INDEX_op_ext_i32_i64:
5395         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5396         break;
5397     case INDEX_op_extu_i32_i64:
5398         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5399         break;
5400     case INDEX_op_extrl_i64_i32:
5401         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5402         break;
5403 
5404     case INDEX_op_add:
5405     case INDEX_op_and:
5406     case INDEX_op_andc:
5407     case INDEX_op_divs:
5408     case INDEX_op_divu:
5409     case INDEX_op_eqv:
5410     case INDEX_op_mul:
5411     case INDEX_op_mulsh:
5412     case INDEX_op_muluh:
5413     case INDEX_op_nand:
5414     case INDEX_op_nor:
5415     case INDEX_op_or:
5416     case INDEX_op_orc:
5417     case INDEX_op_rems:
5418     case INDEX_op_remu:
5419     case INDEX_op_rotl:
5420     case INDEX_op_rotr:
5421     case INDEX_op_sar:
5422     case INDEX_op_shl:
5423     case INDEX_op_shr:
5424     case INDEX_op_xor:
5425         {
5426             const TCGOutOpBinary *out =
5427                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5428 
5429             /* Constants should never appear in the first source operand. */
5430             tcg_debug_assert(!const_args[1]);
5431             if (const_args[2]) {
5432                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5433             } else {
5434                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5435             }
5436         }
5437         break;
5438 
5439     case INDEX_op_sub:
5440         {
5441             const TCGOutOpSubtract *out = &outop_sub;
5442 
5443             /*
5444              * Constants should never appear in the second source operand.
5445              * These are folded to add with negative constant.
5446              */
5447             tcg_debug_assert(!const_args[2]);
5448             if (const_args[1]) {
5449                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5450             } else {
5451                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5452             }
5453         }
5454         break;
5455 
5456     case INDEX_op_neg:
5457     case INDEX_op_not:
5458         {
5459             const TCGOutOpUnary *out =
5460                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5461 
5462             /* Constants should have been folded. */
5463             tcg_debug_assert(!const_args[1]);
5464             out->out_rr(s, type, new_args[0], new_args[1]);
5465         }
5466         break;
5467 
5468     case INDEX_op_divs2:
5469     case INDEX_op_divu2:
5470         {
5471             const TCGOutOpDivRem *out =
5472                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5473 
5474             /* Only used by x86 and s390x, which use matching constraints. */
5475             tcg_debug_assert(new_args[0] == new_args[2]);
5476             tcg_debug_assert(new_args[1] == new_args[3]);
5477             tcg_debug_assert(!const_args[4]);
5478             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5479         }
5480         break;
5481 
5482     default:
5483         if (def->flags & TCG_OPF_VECTOR) {
5484             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5485                            TCGOP_VECE(op), new_args, const_args);
5486         } else {
5487             tcg_out_op(s, op->opc, type, new_args, const_args);
5488         }
5489         break;
5490     }
5491 
5492     /* move the outputs in the correct register if needed */
5493     for(i = 0; i < nb_oargs; i++) {
5494         ts = arg_temp(op->args[i]);
5495 
5496         /* ENV should not be modified.  */
5497         tcg_debug_assert(!temp_readonly(ts));
5498 
5499         if (NEED_SYNC_ARG(i)) {
5500             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5501         } else if (IS_DEAD_ARG(i)) {
5502             temp_dead(s, ts);
5503         }
5504     }
5505 }
5506 
5507 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5508 {
5509     const TCGLifeData arg_life = op->life;
5510     TCGTemp *ots, *itsl, *itsh;
5511     TCGType vtype = TCGOP_TYPE(op);
5512 
5513     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5514     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5515     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5516 
5517     ots = arg_temp(op->args[0]);
5518     itsl = arg_temp(op->args[1]);
5519     itsh = arg_temp(op->args[2]);
5520 
5521     /* ENV should not be modified.  */
5522     tcg_debug_assert(!temp_readonly(ots));
5523 
5524     /* Allocate the output register now.  */
5525     if (ots->val_type != TEMP_VAL_REG) {
5526         TCGRegSet allocated_regs = s->reserved_regs;
5527         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5528         TCGReg oreg;
5529 
5530         /* Make sure to not spill the input registers. */
5531         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5532             tcg_regset_set_reg(allocated_regs, itsl->reg);
5533         }
5534         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5535             tcg_regset_set_reg(allocated_regs, itsh->reg);
5536         }
5537 
5538         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5539                              output_pref(op, 0), ots->indirect_base);
5540         set_temp_val_reg(s, ots, oreg);
5541     }
5542 
5543     /* Promote dup2 of immediates to dupi_vec. */
5544     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5545         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5546         MemOp vece = MO_64;
5547 
5548         if (val == dup_const(MO_8, val)) {
5549             vece = MO_8;
5550         } else if (val == dup_const(MO_16, val)) {
5551             vece = MO_16;
5552         } else if (val == dup_const(MO_32, val)) {
5553             vece = MO_32;
5554         }
5555 
5556         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5557         goto done;
5558     }
5559 
5560     /* If the two inputs form one 64-bit value, try dupm_vec. */
5561     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5562         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5563         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5564         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5565 
5566         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5567         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5568 
5569         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5570                              its->mem_base->reg, its->mem_offset)) {
5571             goto done;
5572         }
5573     }
5574 
5575     /* Fall back to generic expansion. */
5576     return false;
5577 
5578  done:
5579     ots->mem_coherent = 0;
5580     if (IS_DEAD_ARG(1)) {
5581         temp_dead(s, itsl);
5582     }
5583     if (IS_DEAD_ARG(2)) {
5584         temp_dead(s, itsh);
5585     }
5586     if (NEED_SYNC_ARG(0)) {
5587         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5588     } else if (IS_DEAD_ARG(0)) {
5589         temp_dead(s, ots);
5590     }
5591     return true;
5592 }
5593 
5594 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5595                          TCGRegSet allocated_regs)
5596 {
5597     if (ts->val_type == TEMP_VAL_REG) {
5598         if (ts->reg != reg) {
5599             tcg_reg_free(s, reg, allocated_regs);
5600             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5601                 /*
5602                  * Cross register class move not supported.  Sync the
5603                  * temp back to its slot and load from there.
5604                  */
5605                 temp_sync(s, ts, allocated_regs, 0, 0);
5606                 tcg_out_ld(s, ts->type, reg,
5607                            ts->mem_base->reg, ts->mem_offset);
5608             }
5609         }
5610     } else {
5611         TCGRegSet arg_set = 0;
5612 
5613         tcg_reg_free(s, reg, allocated_regs);
5614         tcg_regset_set_reg(arg_set, reg);
5615         temp_load(s, ts, arg_set, allocated_regs, 0);
5616     }
5617 }
5618 
5619 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5620                          TCGRegSet allocated_regs)
5621 {
5622     /*
5623      * When the destination is on the stack, load up the temp and store.
5624      * If there are many call-saved registers, the temp might live to
5625      * see another use; otherwise it'll be discarded.
5626      */
5627     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5628     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5629                arg_slot_stk_ofs(arg_slot));
5630 }
5631 
5632 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5633                             TCGTemp *ts, TCGRegSet *allocated_regs)
5634 {
5635     if (arg_slot_reg_p(l->arg_slot)) {
5636         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5637         load_arg_reg(s, reg, ts, *allocated_regs);
5638         tcg_regset_set_reg(*allocated_regs, reg);
5639     } else {
5640         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5641     }
5642 }
5643 
5644 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5645                          intptr_t ref_off, TCGRegSet *allocated_regs)
5646 {
5647     TCGReg reg;
5648 
5649     if (arg_slot_reg_p(arg_slot)) {
5650         reg = tcg_target_call_iarg_regs[arg_slot];
5651         tcg_reg_free(s, reg, *allocated_regs);
5652         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5653         tcg_regset_set_reg(*allocated_regs, reg);
5654     } else {
5655         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5656                             *allocated_regs, 0, false);
5657         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5658         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5659                    arg_slot_stk_ofs(arg_slot));
5660     }
5661 }
5662 
5663 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5664 {
5665     const int nb_oargs = TCGOP_CALLO(op);
5666     const int nb_iargs = TCGOP_CALLI(op);
5667     const TCGLifeData arg_life = op->life;
5668     const TCGHelperInfo *info = tcg_call_info(op);
5669     TCGRegSet allocated_regs = s->reserved_regs;
5670     int i;
5671 
5672     /*
5673      * Move inputs into place in reverse order,
5674      * so that we place stacked arguments first.
5675      */
5676     for (i = nb_iargs - 1; i >= 0; --i) {
5677         const TCGCallArgumentLoc *loc = &info->in[i];
5678         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5679 
5680         switch (loc->kind) {
5681         case TCG_CALL_ARG_NORMAL:
5682         case TCG_CALL_ARG_EXTEND_U:
5683         case TCG_CALL_ARG_EXTEND_S:
5684             load_arg_normal(s, loc, ts, &allocated_regs);
5685             break;
5686         case TCG_CALL_ARG_BY_REF:
5687             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5688             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5689                          arg_slot_stk_ofs(loc->ref_slot),
5690                          &allocated_regs);
5691             break;
5692         case TCG_CALL_ARG_BY_REF_N:
5693             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5694             break;
5695         default:
5696             g_assert_not_reached();
5697         }
5698     }
5699 
5700     /* Mark dead temporaries and free the associated registers.  */
5701     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5702         if (IS_DEAD_ARG(i)) {
5703             temp_dead(s, arg_temp(op->args[i]));
5704         }
5705     }
5706 
5707     /* Clobber call registers.  */
5708     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5709         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5710             tcg_reg_free(s, i, allocated_regs);
5711         }
5712     }
5713 
5714     /*
5715      * Save globals if they might be written by the helper,
5716      * sync them if they might be read.
5717      */
5718     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5719         /* Nothing to do */
5720     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5721         sync_globals(s, allocated_regs);
5722     } else {
5723         save_globals(s, allocated_regs);
5724     }
5725 
5726     /*
5727      * If the ABI passes a pointer to the returned struct as the first
5728      * argument, load that now.  Pass a pointer to the output home slot.
5729      */
5730     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5731         TCGTemp *ts = arg_temp(op->args[0]);
5732 
5733         if (!ts->mem_allocated) {
5734             temp_allocate_frame(s, ts);
5735         }
5736         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5737     }
5738 
5739     tcg_out_call(s, tcg_call_func(op), info);
5740 
5741     /* Assign output registers and emit moves if needed.  */
5742     switch (info->out_kind) {
5743     case TCG_CALL_RET_NORMAL:
5744         for (i = 0; i < nb_oargs; i++) {
5745             TCGTemp *ts = arg_temp(op->args[i]);
5746             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5747 
5748             /* ENV should not be modified.  */
5749             tcg_debug_assert(!temp_readonly(ts));
5750 
5751             set_temp_val_reg(s, ts, reg);
5752             ts->mem_coherent = 0;
5753         }
5754         break;
5755 
5756     case TCG_CALL_RET_BY_VEC:
5757         {
5758             TCGTemp *ts = arg_temp(op->args[0]);
5759 
5760             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5761             tcg_debug_assert(ts->temp_subindex == 0);
5762             if (!ts->mem_allocated) {
5763                 temp_allocate_frame(s, ts);
5764             }
5765             tcg_out_st(s, TCG_TYPE_V128,
5766                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5767                        ts->mem_base->reg, ts->mem_offset);
5768         }
5769         /* fall through to mark all parts in memory */
5770 
5771     case TCG_CALL_RET_BY_REF:
5772         /* The callee has performed a write through the reference. */
5773         for (i = 0; i < nb_oargs; i++) {
5774             TCGTemp *ts = arg_temp(op->args[i]);
5775             ts->val_type = TEMP_VAL_MEM;
5776         }
5777         break;
5778 
5779     default:
5780         g_assert_not_reached();
5781     }
5782 
5783     /* Flush or discard output registers as needed. */
5784     for (i = 0; i < nb_oargs; i++) {
5785         TCGTemp *ts = arg_temp(op->args[i]);
5786         if (NEED_SYNC_ARG(i)) {
5787             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5788         } else if (IS_DEAD_ARG(i)) {
5789             temp_dead(s, ts);
5790         }
5791     }
5792 }
5793 
5794 /**
5795  * atom_and_align_for_opc:
5796  * @s: tcg context
5797  * @opc: memory operation code
5798  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5799  * @allow_two_ops: true if we are prepared to issue two operations
5800  *
5801  * Return the alignment and atomicity to use for the inline fast path
5802  * for the given memory operation.  The alignment may be larger than
5803  * that specified in @opc, and the correct alignment will be diagnosed
5804  * by the slow path helper.
5805  *
5806  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5807  * and issue two loads or stores for subalignment.
5808  */
5809 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5810                                            MemOp host_atom, bool allow_two_ops)
5811 {
5812     MemOp align = memop_alignment_bits(opc);
5813     MemOp size = opc & MO_SIZE;
5814     MemOp half = size ? size - 1 : 0;
5815     MemOp atom = opc & MO_ATOM_MASK;
5816     MemOp atmax;
5817 
5818     switch (atom) {
5819     case MO_ATOM_NONE:
5820         /* The operation requires no specific atomicity. */
5821         atmax = MO_8;
5822         break;
5823 
5824     case MO_ATOM_IFALIGN:
5825         atmax = size;
5826         break;
5827 
5828     case MO_ATOM_IFALIGN_PAIR:
5829         atmax = half;
5830         break;
5831 
5832     case MO_ATOM_WITHIN16:
5833         atmax = size;
5834         if (size == MO_128) {
5835             /* Misalignment implies !within16, and therefore no atomicity. */
5836         } else if (host_atom != MO_ATOM_WITHIN16) {
5837             /* The host does not implement within16, so require alignment. */
5838             align = MAX(align, size);
5839         }
5840         break;
5841 
5842     case MO_ATOM_WITHIN16_PAIR:
5843         atmax = size;
5844         /*
5845          * Misalignment implies !within16, and therefore half atomicity.
5846          * Any host prepared for two operations can implement this with
5847          * half alignment.
5848          */
5849         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5850             align = MAX(align, half);
5851         }
5852         break;
5853 
5854     case MO_ATOM_SUBALIGN:
5855         atmax = size;
5856         if (host_atom != MO_ATOM_SUBALIGN) {
5857             /* If unaligned but not odd, there are subobjects up to half. */
5858             if (allow_two_ops) {
5859                 align = MAX(align, half);
5860             } else {
5861                 align = MAX(align, size);
5862             }
5863         }
5864         break;
5865 
5866     default:
5867         g_assert_not_reached();
5868     }
5869 
5870     return (TCGAtomAlign){ .atom = atmax, .align = align };
5871 }
5872 
5873 /*
5874  * Similarly for qemu_ld/st slow path helpers.
5875  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5876  * using only the provided backend tcg_out_* functions.
5877  */
5878 
5879 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5880 {
5881     int ofs = arg_slot_stk_ofs(slot);
5882 
5883     /*
5884      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5885      * require extension to uint64_t, adjust the address for uint32_t.
5886      */
5887     if (HOST_BIG_ENDIAN &&
5888         TCG_TARGET_REG_BITS == 64 &&
5889         type == TCG_TYPE_I32) {
5890         ofs += 4;
5891     }
5892     return ofs;
5893 }
5894 
5895 static void tcg_out_helper_load_slots(TCGContext *s,
5896                                       unsigned nmov, TCGMovExtend *mov,
5897                                       const TCGLdstHelperParam *parm)
5898 {
5899     unsigned i;
5900     TCGReg dst3;
5901 
5902     /*
5903      * Start from the end, storing to the stack first.
5904      * This frees those registers, so we need not consider overlap.
5905      */
5906     for (i = nmov; i-- > 0; ) {
5907         unsigned slot = mov[i].dst;
5908 
5909         if (arg_slot_reg_p(slot)) {
5910             goto found_reg;
5911         }
5912 
5913         TCGReg src = mov[i].src;
5914         TCGType dst_type = mov[i].dst_type;
5915         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5916 
5917         /* The argument is going onto the stack; extend into scratch. */
5918         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5919             tcg_debug_assert(parm->ntmp != 0);
5920             mov[i].dst = src = parm->tmp[0];
5921             tcg_out_movext1(s, &mov[i]);
5922         }
5923 
5924         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5925                    tcg_out_helper_stk_ofs(dst_type, slot));
5926     }
5927     return;
5928 
5929  found_reg:
5930     /*
5931      * The remaining arguments are in registers.
5932      * Convert slot numbers to argument registers.
5933      */
5934     nmov = i + 1;
5935     for (i = 0; i < nmov; ++i) {
5936         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5937     }
5938 
5939     switch (nmov) {
5940     case 4:
5941         /* The backend must have provided enough temps for the worst case. */
5942         tcg_debug_assert(parm->ntmp >= 2);
5943 
5944         dst3 = mov[3].dst;
5945         for (unsigned j = 0; j < 3; ++j) {
5946             if (dst3 == mov[j].src) {
5947                 /*
5948                  * Conflict. Copy the source to a temporary, perform the
5949                  * remaining moves, then the extension from our scratch
5950                  * on the way out.
5951                  */
5952                 TCGReg scratch = parm->tmp[1];
5953 
5954                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5955                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5956                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5957                 break;
5958             }
5959         }
5960 
5961         /* No conflicts: perform this move and continue. */
5962         tcg_out_movext1(s, &mov[3]);
5963         /* fall through */
5964 
5965     case 3:
5966         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5967                         parm->ntmp ? parm->tmp[0] : -1);
5968         break;
5969     case 2:
5970         tcg_out_movext2(s, mov, mov + 1,
5971                         parm->ntmp ? parm->tmp[0] : -1);
5972         break;
5973     case 1:
5974         tcg_out_movext1(s, mov);
5975         break;
5976     default:
5977         g_assert_not_reached();
5978     }
5979 }
5980 
5981 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5982                                     TCGType type, tcg_target_long imm,
5983                                     const TCGLdstHelperParam *parm)
5984 {
5985     if (arg_slot_reg_p(slot)) {
5986         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5987     } else {
5988         int ofs = tcg_out_helper_stk_ofs(type, slot);
5989         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5990             tcg_debug_assert(parm->ntmp != 0);
5991             tcg_out_movi(s, type, parm->tmp[0], imm);
5992             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5993         }
5994     }
5995 }
5996 
5997 static void tcg_out_helper_load_common_args(TCGContext *s,
5998                                             const TCGLabelQemuLdst *ldst,
5999                                             const TCGLdstHelperParam *parm,
6000                                             const TCGHelperInfo *info,
6001                                             unsigned next_arg)
6002 {
6003     TCGMovExtend ptr_mov = {
6004         .dst_type = TCG_TYPE_PTR,
6005         .src_type = TCG_TYPE_PTR,
6006         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6007     };
6008     const TCGCallArgumentLoc *loc = &info->in[0];
6009     TCGType type;
6010     unsigned slot;
6011     tcg_target_ulong imm;
6012 
6013     /*
6014      * Handle env, which is always first.
6015      */
6016     ptr_mov.dst = loc->arg_slot;
6017     ptr_mov.src = TCG_AREG0;
6018     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6019 
6020     /*
6021      * Handle oi.
6022      */
6023     imm = ldst->oi;
6024     loc = &info->in[next_arg];
6025     type = TCG_TYPE_I32;
6026     switch (loc->kind) {
6027     case TCG_CALL_ARG_NORMAL:
6028         break;
6029     case TCG_CALL_ARG_EXTEND_U:
6030     case TCG_CALL_ARG_EXTEND_S:
6031         /* No extension required for MemOpIdx. */
6032         tcg_debug_assert(imm <= INT32_MAX);
6033         type = TCG_TYPE_REG;
6034         break;
6035     default:
6036         g_assert_not_reached();
6037     }
6038     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6039     next_arg++;
6040 
6041     /*
6042      * Handle ra.
6043      */
6044     loc = &info->in[next_arg];
6045     slot = loc->arg_slot;
6046     if (parm->ra_gen) {
6047         int arg_reg = -1;
6048         TCGReg ra_reg;
6049 
6050         if (arg_slot_reg_p(slot)) {
6051             arg_reg = tcg_target_call_iarg_regs[slot];
6052         }
6053         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6054 
6055         ptr_mov.dst = slot;
6056         ptr_mov.src = ra_reg;
6057         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6058     } else {
6059         imm = (uintptr_t)ldst->raddr;
6060         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6061     }
6062 }
6063 
6064 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6065                                        const TCGCallArgumentLoc *loc,
6066                                        TCGType dst_type, TCGType src_type,
6067                                        TCGReg lo, TCGReg hi)
6068 {
6069     MemOp reg_mo;
6070 
6071     if (dst_type <= TCG_TYPE_REG) {
6072         MemOp src_ext;
6073 
6074         switch (loc->kind) {
6075         case TCG_CALL_ARG_NORMAL:
6076             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6077             break;
6078         case TCG_CALL_ARG_EXTEND_U:
6079             dst_type = TCG_TYPE_REG;
6080             src_ext = MO_UL;
6081             break;
6082         case TCG_CALL_ARG_EXTEND_S:
6083             dst_type = TCG_TYPE_REG;
6084             src_ext = MO_SL;
6085             break;
6086         default:
6087             g_assert_not_reached();
6088         }
6089 
6090         mov[0].dst = loc->arg_slot;
6091         mov[0].dst_type = dst_type;
6092         mov[0].src = lo;
6093         mov[0].src_type = src_type;
6094         mov[0].src_ext = src_ext;
6095         return 1;
6096     }
6097 
6098     if (TCG_TARGET_REG_BITS == 32) {
6099         assert(dst_type == TCG_TYPE_I64);
6100         reg_mo = MO_32;
6101     } else {
6102         assert(dst_type == TCG_TYPE_I128);
6103         reg_mo = MO_64;
6104     }
6105 
6106     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6107     mov[0].src = lo;
6108     mov[0].dst_type = TCG_TYPE_REG;
6109     mov[0].src_type = TCG_TYPE_REG;
6110     mov[0].src_ext = reg_mo;
6111 
6112     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6113     mov[1].src = hi;
6114     mov[1].dst_type = TCG_TYPE_REG;
6115     mov[1].src_type = TCG_TYPE_REG;
6116     mov[1].src_ext = reg_mo;
6117 
6118     return 2;
6119 }
6120 
6121 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6122                                    const TCGLdstHelperParam *parm)
6123 {
6124     const TCGHelperInfo *info;
6125     const TCGCallArgumentLoc *loc;
6126     TCGMovExtend mov[2];
6127     unsigned next_arg, nmov;
6128     MemOp mop = get_memop(ldst->oi);
6129 
6130     switch (mop & MO_SIZE) {
6131     case MO_8:
6132     case MO_16:
6133     case MO_32:
6134         info = &info_helper_ld32_mmu;
6135         break;
6136     case MO_64:
6137         info = &info_helper_ld64_mmu;
6138         break;
6139     case MO_128:
6140         info = &info_helper_ld128_mmu;
6141         break;
6142     default:
6143         g_assert_not_reached();
6144     }
6145 
6146     /* Defer env argument. */
6147     next_arg = 1;
6148 
6149     loc = &info->in[next_arg];
6150     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6151         /*
6152          * 32-bit host with 32-bit guest: zero-extend the guest address
6153          * to 64-bits for the helper by storing the low part, then
6154          * load a zero for the high part.
6155          */
6156         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6157                                TCG_TYPE_I32, TCG_TYPE_I32,
6158                                ldst->addr_reg, -1);
6159         tcg_out_helper_load_slots(s, 1, mov, parm);
6160 
6161         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6162                                 TCG_TYPE_I32, 0, parm);
6163         next_arg += 2;
6164     } else {
6165         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6166                                       ldst->addr_reg, -1);
6167         tcg_out_helper_load_slots(s, nmov, mov, parm);
6168         next_arg += nmov;
6169     }
6170 
6171     switch (info->out_kind) {
6172     case TCG_CALL_RET_NORMAL:
6173     case TCG_CALL_RET_BY_VEC:
6174         break;
6175     case TCG_CALL_RET_BY_REF:
6176         /*
6177          * The return reference is in the first argument slot.
6178          * We need memory in which to return: re-use the top of stack.
6179          */
6180         {
6181             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6182 
6183             if (arg_slot_reg_p(0)) {
6184                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6185                                  TCG_REG_CALL_STACK, ofs_slot0);
6186             } else {
6187                 tcg_debug_assert(parm->ntmp != 0);
6188                 tcg_out_addi_ptr(s, parm->tmp[0],
6189                                  TCG_REG_CALL_STACK, ofs_slot0);
6190                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6191                            TCG_REG_CALL_STACK, ofs_slot0);
6192             }
6193         }
6194         break;
6195     default:
6196         g_assert_not_reached();
6197     }
6198 
6199     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6200 }
6201 
6202 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6203                                   bool load_sign,
6204                                   const TCGLdstHelperParam *parm)
6205 {
6206     MemOp mop = get_memop(ldst->oi);
6207     TCGMovExtend mov[2];
6208     int ofs_slot0;
6209 
6210     switch (ldst->type) {
6211     case TCG_TYPE_I64:
6212         if (TCG_TARGET_REG_BITS == 32) {
6213             break;
6214         }
6215         /* fall through */
6216 
6217     case TCG_TYPE_I32:
6218         mov[0].dst = ldst->datalo_reg;
6219         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6220         mov[0].dst_type = ldst->type;
6221         mov[0].src_type = TCG_TYPE_REG;
6222 
6223         /*
6224          * If load_sign, then we allowed the helper to perform the
6225          * appropriate sign extension to tcg_target_ulong, and all
6226          * we need now is a plain move.
6227          *
6228          * If they do not, then we expect the relevant extension
6229          * instruction to be no more expensive than a move, and
6230          * we thus save the icache etc by only using one of two
6231          * helper functions.
6232          */
6233         if (load_sign || !(mop & MO_SIGN)) {
6234             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6235                 mov[0].src_ext = MO_32;
6236             } else {
6237                 mov[0].src_ext = MO_64;
6238             }
6239         } else {
6240             mov[0].src_ext = mop & MO_SSIZE;
6241         }
6242         tcg_out_movext1(s, mov);
6243         return;
6244 
6245     case TCG_TYPE_I128:
6246         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6247         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6248         switch (TCG_TARGET_CALL_RET_I128) {
6249         case TCG_CALL_RET_NORMAL:
6250             break;
6251         case TCG_CALL_RET_BY_VEC:
6252             tcg_out_st(s, TCG_TYPE_V128,
6253                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6254                        TCG_REG_CALL_STACK, ofs_slot0);
6255             /* fall through */
6256         case TCG_CALL_RET_BY_REF:
6257             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6258                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6259             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6260                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6261             return;
6262         default:
6263             g_assert_not_reached();
6264         }
6265         break;
6266 
6267     default:
6268         g_assert_not_reached();
6269     }
6270 
6271     mov[0].dst = ldst->datalo_reg;
6272     mov[0].src =
6273         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6274     mov[0].dst_type = TCG_TYPE_REG;
6275     mov[0].src_type = TCG_TYPE_REG;
6276     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6277 
6278     mov[1].dst = ldst->datahi_reg;
6279     mov[1].src =
6280         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6281     mov[1].dst_type = TCG_TYPE_REG;
6282     mov[1].src_type = TCG_TYPE_REG;
6283     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6284 
6285     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6286 }
6287 
6288 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6289                                    const TCGLdstHelperParam *parm)
6290 {
6291     const TCGHelperInfo *info;
6292     const TCGCallArgumentLoc *loc;
6293     TCGMovExtend mov[4];
6294     TCGType data_type;
6295     unsigned next_arg, nmov, n;
6296     MemOp mop = get_memop(ldst->oi);
6297 
6298     switch (mop & MO_SIZE) {
6299     case MO_8:
6300     case MO_16:
6301     case MO_32:
6302         info = &info_helper_st32_mmu;
6303         data_type = TCG_TYPE_I32;
6304         break;
6305     case MO_64:
6306         info = &info_helper_st64_mmu;
6307         data_type = TCG_TYPE_I64;
6308         break;
6309     case MO_128:
6310         info = &info_helper_st128_mmu;
6311         data_type = TCG_TYPE_I128;
6312         break;
6313     default:
6314         g_assert_not_reached();
6315     }
6316 
6317     /* Defer env argument. */
6318     next_arg = 1;
6319     nmov = 0;
6320 
6321     /* Handle addr argument. */
6322     loc = &info->in[next_arg];
6323     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6324     if (TCG_TARGET_REG_BITS == 32) {
6325         /*
6326          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6327          * to 64-bits for the helper by storing the low part.  Later,
6328          * after we have processed the register inputs, we will load a
6329          * zero for the high part.
6330          */
6331         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6332                                TCG_TYPE_I32, TCG_TYPE_I32,
6333                                ldst->addr_reg, -1);
6334         next_arg += 2;
6335         nmov += 1;
6336     } else {
6337         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6338                                    ldst->addr_reg, -1);
6339         next_arg += n;
6340         nmov += n;
6341     }
6342 
6343     /* Handle data argument. */
6344     loc = &info->in[next_arg];
6345     switch (loc->kind) {
6346     case TCG_CALL_ARG_NORMAL:
6347     case TCG_CALL_ARG_EXTEND_U:
6348     case TCG_CALL_ARG_EXTEND_S:
6349         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6350                                    ldst->datalo_reg, ldst->datahi_reg);
6351         next_arg += n;
6352         nmov += n;
6353         tcg_out_helper_load_slots(s, nmov, mov, parm);
6354         break;
6355 
6356     case TCG_CALL_ARG_BY_REF:
6357         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6358         tcg_debug_assert(data_type == TCG_TYPE_I128);
6359         tcg_out_st(s, TCG_TYPE_I64,
6360                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6361                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6362         tcg_out_st(s, TCG_TYPE_I64,
6363                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6364                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6365 
6366         tcg_out_helper_load_slots(s, nmov, mov, parm);
6367 
6368         if (arg_slot_reg_p(loc->arg_slot)) {
6369             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6370                              TCG_REG_CALL_STACK,
6371                              arg_slot_stk_ofs(loc->ref_slot));
6372         } else {
6373             tcg_debug_assert(parm->ntmp != 0);
6374             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6375                              arg_slot_stk_ofs(loc->ref_slot));
6376             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6377                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6378         }
6379         next_arg += 2;
6380         break;
6381 
6382     default:
6383         g_assert_not_reached();
6384     }
6385 
6386     if (TCG_TARGET_REG_BITS == 32) {
6387         /* Zero extend the address by loading a zero for the high part. */
6388         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6389         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6390     }
6391 
6392     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6393 }
6394 
6395 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6396 {
6397     int i, start_words, num_insns;
6398     TCGOp *op;
6399 
6400     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6401                  && qemu_log_in_addr_range(pc_start))) {
6402         FILE *logfile = qemu_log_trylock();
6403         if (logfile) {
6404             fprintf(logfile, "OP:\n");
6405             tcg_dump_ops(s, logfile, false);
6406             fprintf(logfile, "\n");
6407             qemu_log_unlock(logfile);
6408         }
6409     }
6410 
6411 #ifdef CONFIG_DEBUG_TCG
6412     /* Ensure all labels referenced have been emitted.  */
6413     {
6414         TCGLabel *l;
6415         bool error = false;
6416 
6417         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6418             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6419                 qemu_log_mask(CPU_LOG_TB_OP,
6420                               "$L%d referenced but not present.\n", l->id);
6421                 error = true;
6422             }
6423         }
6424         assert(!error);
6425     }
6426 #endif
6427 
6428     /* Do not reuse any EBB that may be allocated within the TB. */
6429     tcg_temp_ebb_reset_freed(s);
6430 
6431     tcg_optimize(s);
6432 
6433     reachable_code_pass(s);
6434     liveness_pass_0(s);
6435     liveness_pass_1(s);
6436 
6437     if (s->nb_indirects > 0) {
6438         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6439                      && qemu_log_in_addr_range(pc_start))) {
6440             FILE *logfile = qemu_log_trylock();
6441             if (logfile) {
6442                 fprintf(logfile, "OP before indirect lowering:\n");
6443                 tcg_dump_ops(s, logfile, false);
6444                 fprintf(logfile, "\n");
6445                 qemu_log_unlock(logfile);
6446             }
6447         }
6448 
6449         /* Replace indirect temps with direct temps.  */
6450         if (liveness_pass_2(s)) {
6451             /* If changes were made, re-run liveness.  */
6452             liveness_pass_1(s);
6453         }
6454     }
6455 
6456     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6457                  && qemu_log_in_addr_range(pc_start))) {
6458         FILE *logfile = qemu_log_trylock();
6459         if (logfile) {
6460             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6461             tcg_dump_ops(s, logfile, true);
6462             fprintf(logfile, "\n");
6463             qemu_log_unlock(logfile);
6464         }
6465     }
6466 
6467     /* Initialize goto_tb jump offsets. */
6468     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6469     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6470     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6471     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6472 
6473     tcg_reg_alloc_start(s);
6474 
6475     /*
6476      * Reset the buffer pointers when restarting after overflow.
6477      * TODO: Move this into translate-all.c with the rest of the
6478      * buffer management.  Having only this done here is confusing.
6479      */
6480     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6481     s->code_ptr = s->code_buf;
6482     s->data_gen_ptr = NULL;
6483 
6484     QSIMPLEQ_INIT(&s->ldst_labels);
6485     s->pool_labels = NULL;
6486 
6487     start_words = s->insn_start_words;
6488     s->gen_insn_data =
6489         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6490 
6491     tcg_out_tb_start(s);
6492 
6493     num_insns = -1;
6494     QTAILQ_FOREACH(op, &s->ops, link) {
6495         TCGOpcode opc = op->opc;
6496 
6497         switch (opc) {
6498         case INDEX_op_mov:
6499         case INDEX_op_mov_vec:
6500             tcg_reg_alloc_mov(s, op);
6501             break;
6502         case INDEX_op_dup_vec:
6503             tcg_reg_alloc_dup(s, op);
6504             break;
6505         case INDEX_op_insn_start:
6506             if (num_insns >= 0) {
6507                 size_t off = tcg_current_code_size(s);
6508                 s->gen_insn_end_off[num_insns] = off;
6509                 /* Assert that we do not overflow our stored offset.  */
6510                 assert(s->gen_insn_end_off[num_insns] == off);
6511             }
6512             num_insns++;
6513             for (i = 0; i < start_words; ++i) {
6514                 s->gen_insn_data[num_insns * start_words + i] =
6515                     tcg_get_insn_start_param(op, i);
6516             }
6517             break;
6518         case INDEX_op_discard:
6519             temp_dead(s, arg_temp(op->args[0]));
6520             break;
6521         case INDEX_op_set_label:
6522             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6523             tcg_out_label(s, arg_label(op->args[0]));
6524             break;
6525         case INDEX_op_call:
6526             tcg_reg_alloc_call(s, op);
6527             break;
6528         case INDEX_op_exit_tb:
6529             tcg_out_exit_tb(s, op->args[0]);
6530             break;
6531         case INDEX_op_goto_tb:
6532             tcg_out_goto_tb(s, op->args[0]);
6533             break;
6534         case INDEX_op_dup2_vec:
6535             if (tcg_reg_alloc_dup2(s, op)) {
6536                 break;
6537             }
6538             /* fall through */
6539         default:
6540             /* Sanity check that we've not introduced any unhandled opcodes. */
6541             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6542                                               TCGOP_FLAGS(op)));
6543             /* Note: in order to speed up the code, it would be much
6544                faster to have specialized register allocator functions for
6545                some common argument patterns */
6546             tcg_reg_alloc_op(s, op);
6547             break;
6548         }
6549         /* Test for (pending) buffer overflow.  The assumption is that any
6550            one operation beginning below the high water mark cannot overrun
6551            the buffer completely.  Thus we can test for overflow after
6552            generating code without having to check during generation.  */
6553         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6554             return -1;
6555         }
6556         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6557         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6558             return -2;
6559         }
6560     }
6561     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6562     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6563 
6564     /* Generate TB finalization at the end of block */
6565     i = tcg_out_ldst_finalize(s);
6566     if (i < 0) {
6567         return i;
6568     }
6569     i = tcg_out_pool_finalize(s);
6570     if (i < 0) {
6571         return i;
6572     }
6573     if (!tcg_resolve_relocs(s)) {
6574         return -2;
6575     }
6576 
6577 #ifndef CONFIG_TCG_INTERPRETER
6578     /* flush instruction cache */
6579     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6580                         (uintptr_t)s->code_buf,
6581                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6582 #endif
6583 
6584     return tcg_current_code_size(s);
6585 }
6586 
6587 #ifdef ELF_HOST_MACHINE
6588 /* In order to use this feature, the backend needs to do three things:
6589 
6590    (1) Define ELF_HOST_MACHINE to indicate both what value to
6591        put into the ELF image and to indicate support for the feature.
6592 
6593    (2) Define tcg_register_jit.  This should create a buffer containing
6594        the contents of a .debug_frame section that describes the post-
6595        prologue unwind info for the tcg machine.
6596 
6597    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6598 */
6599 
6600 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6601 typedef enum {
6602     JIT_NOACTION = 0,
6603     JIT_REGISTER_FN,
6604     JIT_UNREGISTER_FN
6605 } jit_actions_t;
6606 
6607 struct jit_code_entry {
6608     struct jit_code_entry *next_entry;
6609     struct jit_code_entry *prev_entry;
6610     const void *symfile_addr;
6611     uint64_t symfile_size;
6612 };
6613 
6614 struct jit_descriptor {
6615     uint32_t version;
6616     uint32_t action_flag;
6617     struct jit_code_entry *relevant_entry;
6618     struct jit_code_entry *first_entry;
6619 };
6620 
6621 void __jit_debug_register_code(void) __attribute__((noinline));
6622 void __jit_debug_register_code(void)
6623 {
6624     asm("");
6625 }
6626 
6627 /* Must statically initialize the version, because GDB may check
6628    the version before we can set it.  */
6629 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6630 
6631 /* End GDB interface.  */
6632 
6633 static int find_string(const char *strtab, const char *str)
6634 {
6635     const char *p = strtab + 1;
6636 
6637     while (1) {
6638         if (strcmp(p, str) == 0) {
6639             return p - strtab;
6640         }
6641         p += strlen(p) + 1;
6642     }
6643 }
6644 
6645 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6646                                  const void *debug_frame,
6647                                  size_t debug_frame_size)
6648 {
6649     struct __attribute__((packed)) DebugInfo {
6650         uint32_t  len;
6651         uint16_t  version;
6652         uint32_t  abbrev;
6653         uint8_t   ptr_size;
6654         uint8_t   cu_die;
6655         uint16_t  cu_lang;
6656         uintptr_t cu_low_pc;
6657         uintptr_t cu_high_pc;
6658         uint8_t   fn_die;
6659         char      fn_name[16];
6660         uintptr_t fn_low_pc;
6661         uintptr_t fn_high_pc;
6662         uint8_t   cu_eoc;
6663     };
6664 
6665     struct ElfImage {
6666         ElfW(Ehdr) ehdr;
6667         ElfW(Phdr) phdr;
6668         ElfW(Shdr) shdr[7];
6669         ElfW(Sym)  sym[2];
6670         struct DebugInfo di;
6671         uint8_t    da[24];
6672         char       str[80];
6673     };
6674 
6675     struct ElfImage *img;
6676 
6677     static const struct ElfImage img_template = {
6678         .ehdr = {
6679             .e_ident[EI_MAG0] = ELFMAG0,
6680             .e_ident[EI_MAG1] = ELFMAG1,
6681             .e_ident[EI_MAG2] = ELFMAG2,
6682             .e_ident[EI_MAG3] = ELFMAG3,
6683             .e_ident[EI_CLASS] = ELF_CLASS,
6684             .e_ident[EI_DATA] = ELF_DATA,
6685             .e_ident[EI_VERSION] = EV_CURRENT,
6686             .e_type = ET_EXEC,
6687             .e_machine = ELF_HOST_MACHINE,
6688             .e_version = EV_CURRENT,
6689             .e_phoff = offsetof(struct ElfImage, phdr),
6690             .e_shoff = offsetof(struct ElfImage, shdr),
6691             .e_ehsize = sizeof(ElfW(Shdr)),
6692             .e_phentsize = sizeof(ElfW(Phdr)),
6693             .e_phnum = 1,
6694             .e_shentsize = sizeof(ElfW(Shdr)),
6695             .e_shnum = ARRAY_SIZE(img->shdr),
6696             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6697 #ifdef ELF_HOST_FLAGS
6698             .e_flags = ELF_HOST_FLAGS,
6699 #endif
6700 #ifdef ELF_OSABI
6701             .e_ident[EI_OSABI] = ELF_OSABI,
6702 #endif
6703         },
6704         .phdr = {
6705             .p_type = PT_LOAD,
6706             .p_flags = PF_X,
6707         },
6708         .shdr = {
6709             [0] = { .sh_type = SHT_NULL },
6710             /* Trick: The contents of code_gen_buffer are not present in
6711                this fake ELF file; that got allocated elsewhere.  Therefore
6712                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6713                will not look for contents.  We can record any address.  */
6714             [1] = { /* .text */
6715                 .sh_type = SHT_NOBITS,
6716                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6717             },
6718             [2] = { /* .debug_info */
6719                 .sh_type = SHT_PROGBITS,
6720                 .sh_offset = offsetof(struct ElfImage, di),
6721                 .sh_size = sizeof(struct DebugInfo),
6722             },
6723             [3] = { /* .debug_abbrev */
6724                 .sh_type = SHT_PROGBITS,
6725                 .sh_offset = offsetof(struct ElfImage, da),
6726                 .sh_size = sizeof(img->da),
6727             },
6728             [4] = { /* .debug_frame */
6729                 .sh_type = SHT_PROGBITS,
6730                 .sh_offset = sizeof(struct ElfImage),
6731             },
6732             [5] = { /* .symtab */
6733                 .sh_type = SHT_SYMTAB,
6734                 .sh_offset = offsetof(struct ElfImage, sym),
6735                 .sh_size = sizeof(img->sym),
6736                 .sh_info = 1,
6737                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6738                 .sh_entsize = sizeof(ElfW(Sym)),
6739             },
6740             [6] = { /* .strtab */
6741                 .sh_type = SHT_STRTAB,
6742                 .sh_offset = offsetof(struct ElfImage, str),
6743                 .sh_size = sizeof(img->str),
6744             }
6745         },
6746         .sym = {
6747             [1] = { /* code_gen_buffer */
6748                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6749                 .st_shndx = 1,
6750             }
6751         },
6752         .di = {
6753             .len = sizeof(struct DebugInfo) - 4,
6754             .version = 2,
6755             .ptr_size = sizeof(void *),
6756             .cu_die = 1,
6757             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6758             .fn_die = 2,
6759             .fn_name = "code_gen_buffer"
6760         },
6761         .da = {
6762             1,          /* abbrev number (the cu) */
6763             0x11, 1,    /* DW_TAG_compile_unit, has children */
6764             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6765             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6766             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6767             0, 0,       /* end of abbrev */
6768             2,          /* abbrev number (the fn) */
6769             0x2e, 0,    /* DW_TAG_subprogram, no children */
6770             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6771             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6772             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6773             0, 0,       /* end of abbrev */
6774             0           /* no more abbrev */
6775         },
6776         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6777                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6778     };
6779 
6780     /* We only need a single jit entry; statically allocate it.  */
6781     static struct jit_code_entry one_entry;
6782 
6783     uintptr_t buf = (uintptr_t)buf_ptr;
6784     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6785     DebugFrameHeader *dfh;
6786 
6787     img = g_malloc(img_size);
6788     *img = img_template;
6789 
6790     img->phdr.p_vaddr = buf;
6791     img->phdr.p_paddr = buf;
6792     img->phdr.p_memsz = buf_size;
6793 
6794     img->shdr[1].sh_name = find_string(img->str, ".text");
6795     img->shdr[1].sh_addr = buf;
6796     img->shdr[1].sh_size = buf_size;
6797 
6798     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6799     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6800 
6801     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6802     img->shdr[4].sh_size = debug_frame_size;
6803 
6804     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6805     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6806 
6807     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6808     img->sym[1].st_value = buf;
6809     img->sym[1].st_size = buf_size;
6810 
6811     img->di.cu_low_pc = buf;
6812     img->di.cu_high_pc = buf + buf_size;
6813     img->di.fn_low_pc = buf;
6814     img->di.fn_high_pc = buf + buf_size;
6815 
6816     dfh = (DebugFrameHeader *)(img + 1);
6817     memcpy(dfh, debug_frame, debug_frame_size);
6818     dfh->fde.func_start = buf;
6819     dfh->fde.func_len = buf_size;
6820 
6821 #ifdef DEBUG_JIT
6822     /* Enable this block to be able to debug the ELF image file creation.
6823        One can use readelf, objdump, or other inspection utilities.  */
6824     {
6825         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6826         FILE *f = fopen(jit, "w+b");
6827         if (f) {
6828             if (fwrite(img, img_size, 1, f) != img_size) {
6829                 /* Avoid stupid unused return value warning for fwrite.  */
6830             }
6831             fclose(f);
6832         }
6833     }
6834 #endif
6835 
6836     one_entry.symfile_addr = img;
6837     one_entry.symfile_size = img_size;
6838 
6839     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6840     __jit_debug_descriptor.relevant_entry = &one_entry;
6841     __jit_debug_descriptor.first_entry = &one_entry;
6842     __jit_debug_register_code();
6843 }
6844 #else
6845 /* No support for the feature.  Provide the entry point expected by exec.c,
6846    and implement the internal function we declared earlier.  */
6847 
6848 static void tcg_register_jit_int(const void *buf, size_t size,
6849                                  const void *debug_frame,
6850                                  size_t debug_frame_size)
6851 {
6852 }
6853 
6854 void tcg_register_jit(const void *buf, size_t buf_size)
6855 {
6856 }
6857 #endif /* ELF_HOST_MACHINE */
6858 
6859 #if !TCG_TARGET_MAYBE_vec
6860 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6861 {
6862     g_assert_not_reached();
6863 }
6864 #endif
6865