xref: /openbmc/qemu/tcg/tcg.c (revision 9a6bc1840ec105902bda1a59c42e9e0c56a9ed05)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpDivRem {
990     TCGOutOp base;
991     void (*out_rr01r)(TCGContext *s, TCGType type,
992                       TCGReg a0, TCGReg a1, TCGReg a4);
993 } TCGOutOpDivRem;
994 
995 typedef struct TCGOutOpUnary {
996     TCGOutOp base;
997     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
998 } TCGOutOpUnary;
999 
1000 typedef struct TCGOutOpSubtract {
1001     TCGOutOp base;
1002     void (*out_rrr)(TCGContext *s, TCGType type,
1003                     TCGReg a0, TCGReg a1, TCGReg a2);
1004     void (*out_rir)(TCGContext *s, TCGType type,
1005                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1006 } TCGOutOpSubtract;
1007 
1008 #include "tcg-target.c.inc"
1009 
1010 #ifndef CONFIG_TCG_INTERPRETER
1011 /* Validate CPUTLBDescFast placement. */
1012 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1013                         sizeof(CPUNegativeOffsetState))
1014                   < MIN_TLB_MASK_TABLE_OFS);
1015 #endif
1016 
1017 /*
1018  * Register V as the TCGOutOp for O.
1019  * This verifies that V is of type T, otherwise give a nice compiler error.
1020  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1021  */
1022 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1023 
1024 /* Register allocation descriptions for every TCGOpcode. */
1025 static const TCGOutOp * const all_outop[NB_OPS] = {
1026     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1027     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1028     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1029     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1030     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1031     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1032     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1033     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1034     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1035     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1036     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1037     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1038     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1039     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1040     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1041     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1042     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1043     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1044     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1045     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1046 };
1047 
1048 #undef OUTOP
1049 
1050 /*
1051  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1052  * and registered the target's TCG globals) must register with this function
1053  * before initiating translation.
1054  *
1055  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1056  * of tcg_region_init() for the reasoning behind this.
1057  *
1058  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1059  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1060  * is not used anymore for translation once this function is called.
1061  *
1062  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1063  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1064  * modes.
1065  */
1066 #ifdef CONFIG_USER_ONLY
1067 void tcg_register_thread(void)
1068 {
1069     tcg_ctx = &tcg_init_ctx;
1070 }
1071 #else
1072 void tcg_register_thread(void)
1073 {
1074     TCGContext *s = g_malloc(sizeof(*s));
1075     unsigned int i, n;
1076 
1077     *s = tcg_init_ctx;
1078 
1079     /* Relink mem_base.  */
1080     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1081         if (tcg_init_ctx.temps[i].mem_base) {
1082             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1083             tcg_debug_assert(b >= 0 && b < n);
1084             s->temps[i].mem_base = &s->temps[b];
1085         }
1086     }
1087 
1088     /* Claim an entry in tcg_ctxs */
1089     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1090     g_assert(n < tcg_max_ctxs);
1091     qatomic_set(&tcg_ctxs[n], s);
1092 
1093     if (n > 0) {
1094         tcg_region_initial_alloc(s);
1095     }
1096 
1097     tcg_ctx = s;
1098 }
1099 #endif /* !CONFIG_USER_ONLY */
1100 
1101 /* pool based memory allocation */
1102 void *tcg_malloc_internal(TCGContext *s, int size)
1103 {
1104     TCGPool *p;
1105     int pool_size;
1106 
1107     if (size > TCG_POOL_CHUNK_SIZE) {
1108         /* big malloc: insert a new pool (XXX: could optimize) */
1109         p = g_malloc(sizeof(TCGPool) + size);
1110         p->size = size;
1111         p->next = s->pool_first_large;
1112         s->pool_first_large = p;
1113         return p->data;
1114     } else {
1115         p = s->pool_current;
1116         if (!p) {
1117             p = s->pool_first;
1118             if (!p)
1119                 goto new_pool;
1120         } else {
1121             if (!p->next) {
1122             new_pool:
1123                 pool_size = TCG_POOL_CHUNK_SIZE;
1124                 p = g_malloc(sizeof(TCGPool) + pool_size);
1125                 p->size = pool_size;
1126                 p->next = NULL;
1127                 if (s->pool_current) {
1128                     s->pool_current->next = p;
1129                 } else {
1130                     s->pool_first = p;
1131                 }
1132             } else {
1133                 p = p->next;
1134             }
1135         }
1136     }
1137     s->pool_current = p;
1138     s->pool_cur = p->data + size;
1139     s->pool_end = p->data + p->size;
1140     return p->data;
1141 }
1142 
1143 void tcg_pool_reset(TCGContext *s)
1144 {
1145     TCGPool *p, *t;
1146     for (p = s->pool_first_large; p; p = t) {
1147         t = p->next;
1148         g_free(p);
1149     }
1150     s->pool_first_large = NULL;
1151     s->pool_cur = s->pool_end = NULL;
1152     s->pool_current = NULL;
1153 }
1154 
1155 /*
1156  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1157  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1158  * We only use these for layout in tcg_out_ld_helper_ret and
1159  * tcg_out_st_helper_args, and share them between several of
1160  * the helpers, with the end result that it's easier to build manually.
1161  */
1162 
1163 #if TCG_TARGET_REG_BITS == 32
1164 # define dh_typecode_ttl  dh_typecode_i32
1165 #else
1166 # define dh_typecode_ttl  dh_typecode_i64
1167 #endif
1168 
1169 static TCGHelperInfo info_helper_ld32_mmu = {
1170     .flags = TCG_CALL_NO_WG,
1171     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1172               | dh_typemask(env, 1)
1173               | dh_typemask(i64, 2)  /* uint64_t addr */
1174               | dh_typemask(i32, 3)  /* unsigned oi */
1175               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1176 };
1177 
1178 static TCGHelperInfo info_helper_ld64_mmu = {
1179     .flags = TCG_CALL_NO_WG,
1180     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1181               | dh_typemask(env, 1)
1182               | dh_typemask(i64, 2)  /* uint64_t addr */
1183               | dh_typemask(i32, 3)  /* unsigned oi */
1184               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1185 };
1186 
1187 static TCGHelperInfo info_helper_ld128_mmu = {
1188     .flags = TCG_CALL_NO_WG,
1189     .typemask = dh_typemask(i128, 0) /* return Int128 */
1190               | dh_typemask(env, 1)
1191               | dh_typemask(i64, 2)  /* uint64_t addr */
1192               | dh_typemask(i32, 3)  /* unsigned oi */
1193               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1194 };
1195 
1196 static TCGHelperInfo info_helper_st32_mmu = {
1197     .flags = TCG_CALL_NO_WG,
1198     .typemask = dh_typemask(void, 0)
1199               | dh_typemask(env, 1)
1200               | dh_typemask(i64, 2)  /* uint64_t addr */
1201               | dh_typemask(i32, 3)  /* uint32_t data */
1202               | dh_typemask(i32, 4)  /* unsigned oi */
1203               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1204 };
1205 
1206 static TCGHelperInfo info_helper_st64_mmu = {
1207     .flags = TCG_CALL_NO_WG,
1208     .typemask = dh_typemask(void, 0)
1209               | dh_typemask(env, 1)
1210               | dh_typemask(i64, 2)  /* uint64_t addr */
1211               | dh_typemask(i64, 3)  /* uint64_t data */
1212               | dh_typemask(i32, 4)  /* unsigned oi */
1213               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1214 };
1215 
1216 static TCGHelperInfo info_helper_st128_mmu = {
1217     .flags = TCG_CALL_NO_WG,
1218     .typemask = dh_typemask(void, 0)
1219               | dh_typemask(env, 1)
1220               | dh_typemask(i64, 2)  /* uint64_t addr */
1221               | dh_typemask(i128, 3) /* Int128 data */
1222               | dh_typemask(i32, 4)  /* unsigned oi */
1223               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1224 };
1225 
1226 #ifdef CONFIG_TCG_INTERPRETER
1227 static ffi_type *typecode_to_ffi(int argmask)
1228 {
1229     /*
1230      * libffi does not support __int128_t, so we have forced Int128
1231      * to use the structure definition instead of the builtin type.
1232      */
1233     static ffi_type *ffi_type_i128_elements[3] = {
1234         &ffi_type_uint64,
1235         &ffi_type_uint64,
1236         NULL
1237     };
1238     static ffi_type ffi_type_i128 = {
1239         .size = 16,
1240         .alignment = __alignof__(Int128),
1241         .type = FFI_TYPE_STRUCT,
1242         .elements = ffi_type_i128_elements,
1243     };
1244 
1245     switch (argmask) {
1246     case dh_typecode_void:
1247         return &ffi_type_void;
1248     case dh_typecode_i32:
1249         return &ffi_type_uint32;
1250     case dh_typecode_s32:
1251         return &ffi_type_sint32;
1252     case dh_typecode_i64:
1253         return &ffi_type_uint64;
1254     case dh_typecode_s64:
1255         return &ffi_type_sint64;
1256     case dh_typecode_ptr:
1257         return &ffi_type_pointer;
1258     case dh_typecode_i128:
1259         return &ffi_type_i128;
1260     }
1261     g_assert_not_reached();
1262 }
1263 
1264 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1265 {
1266     unsigned typemask = info->typemask;
1267     struct {
1268         ffi_cif cif;
1269         ffi_type *args[];
1270     } *ca;
1271     ffi_status status;
1272     int nargs;
1273 
1274     /* Ignoring the return type, find the last non-zero field. */
1275     nargs = 32 - clz32(typemask >> 3);
1276     nargs = DIV_ROUND_UP(nargs, 3);
1277     assert(nargs <= MAX_CALL_IARGS);
1278 
1279     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1280     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1281     ca->cif.nargs = nargs;
1282 
1283     if (nargs != 0) {
1284         ca->cif.arg_types = ca->args;
1285         for (int j = 0; j < nargs; ++j) {
1286             int typecode = extract32(typemask, (j + 1) * 3, 3);
1287             ca->args[j] = typecode_to_ffi(typecode);
1288         }
1289     }
1290 
1291     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1292                           ca->cif.rtype, ca->cif.arg_types);
1293     assert(status == FFI_OK);
1294 
1295     return &ca->cif;
1296 }
1297 
1298 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1299 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1300 #else
1301 #define HELPER_INFO_INIT(I)      (&(I)->init)
1302 #define HELPER_INFO_INIT_VAL(I)  1
1303 #endif /* CONFIG_TCG_INTERPRETER */
1304 
1305 static inline bool arg_slot_reg_p(unsigned arg_slot)
1306 {
1307     /*
1308      * Split the sizeof away from the comparison to avoid Werror from
1309      * "unsigned < 0 is always false", when iarg_regs is empty.
1310      */
1311     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1312     return arg_slot < nreg;
1313 }
1314 
1315 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1316 {
1317     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1318     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1319 
1320     tcg_debug_assert(stk_slot < max);
1321     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1322 }
1323 
1324 typedef struct TCGCumulativeArgs {
1325     int arg_idx;                /* tcg_gen_callN args[] */
1326     int info_in_idx;            /* TCGHelperInfo in[] */
1327     int arg_slot;               /* regs+stack slot */
1328     int ref_slot;               /* stack slots for references */
1329 } TCGCumulativeArgs;
1330 
1331 static void layout_arg_even(TCGCumulativeArgs *cum)
1332 {
1333     cum->arg_slot += cum->arg_slot & 1;
1334 }
1335 
1336 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1337                          TCGCallArgumentKind kind)
1338 {
1339     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1340 
1341     *loc = (TCGCallArgumentLoc){
1342         .kind = kind,
1343         .arg_idx = cum->arg_idx,
1344         .arg_slot = cum->arg_slot,
1345     };
1346     cum->info_in_idx++;
1347     cum->arg_slot++;
1348 }
1349 
1350 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1351                                 TCGHelperInfo *info, int n)
1352 {
1353     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1354 
1355     for (int i = 0; i < n; ++i) {
1356         /* Layout all using the same arg_idx, adjusting the subindex. */
1357         loc[i] = (TCGCallArgumentLoc){
1358             .kind = TCG_CALL_ARG_NORMAL,
1359             .arg_idx = cum->arg_idx,
1360             .tmp_subindex = i,
1361             .arg_slot = cum->arg_slot + i,
1362         };
1363     }
1364     cum->info_in_idx += n;
1365     cum->arg_slot += n;
1366 }
1367 
1368 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1369 {
1370     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1371     int n = 128 / TCG_TARGET_REG_BITS;
1372 
1373     /* The first subindex carries the pointer. */
1374     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1375 
1376     /*
1377      * The callee is allowed to clobber memory associated with
1378      * structure pass by-reference.  Therefore we must make copies.
1379      * Allocate space from "ref_slot", which will be adjusted to
1380      * follow the parameters on the stack.
1381      */
1382     loc[0].ref_slot = cum->ref_slot;
1383 
1384     /*
1385      * Subsequent words also go into the reference slot, but
1386      * do not accumulate into the regular arguments.
1387      */
1388     for (int i = 1; i < n; ++i) {
1389         loc[i] = (TCGCallArgumentLoc){
1390             .kind = TCG_CALL_ARG_BY_REF_N,
1391             .arg_idx = cum->arg_idx,
1392             .tmp_subindex = i,
1393             .ref_slot = cum->ref_slot + i,
1394         };
1395     }
1396     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1397     cum->ref_slot += n;
1398 }
1399 
1400 static void init_call_layout(TCGHelperInfo *info)
1401 {
1402     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1403     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1404     unsigned typemask = info->typemask;
1405     unsigned typecode;
1406     TCGCumulativeArgs cum = { };
1407 
1408     /*
1409      * Parse and place any function return value.
1410      */
1411     typecode = typemask & 7;
1412     switch (typecode) {
1413     case dh_typecode_void:
1414         info->nr_out = 0;
1415         break;
1416     case dh_typecode_i32:
1417     case dh_typecode_s32:
1418     case dh_typecode_ptr:
1419         info->nr_out = 1;
1420         info->out_kind = TCG_CALL_RET_NORMAL;
1421         break;
1422     case dh_typecode_i64:
1423     case dh_typecode_s64:
1424         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1425         info->out_kind = TCG_CALL_RET_NORMAL;
1426         /* Query the last register now to trigger any assert early. */
1427         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1428         break;
1429     case dh_typecode_i128:
1430         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1431         info->out_kind = TCG_TARGET_CALL_RET_I128;
1432         switch (TCG_TARGET_CALL_RET_I128) {
1433         case TCG_CALL_RET_NORMAL:
1434             /* Query the last register now to trigger any assert early. */
1435             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1436             break;
1437         case TCG_CALL_RET_BY_VEC:
1438             /* Query the single register now to trigger any assert early. */
1439             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1440             break;
1441         case TCG_CALL_RET_BY_REF:
1442             /*
1443              * Allocate the first argument to the output.
1444              * We don't need to store this anywhere, just make it
1445              * unavailable for use in the input loop below.
1446              */
1447             cum.arg_slot = 1;
1448             break;
1449         default:
1450             qemu_build_not_reached();
1451         }
1452         break;
1453     default:
1454         g_assert_not_reached();
1455     }
1456 
1457     /*
1458      * Parse and place function arguments.
1459      */
1460     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1461         TCGCallArgumentKind kind;
1462         TCGType type;
1463 
1464         typecode = typemask & 7;
1465         switch (typecode) {
1466         case dh_typecode_i32:
1467         case dh_typecode_s32:
1468             type = TCG_TYPE_I32;
1469             break;
1470         case dh_typecode_i64:
1471         case dh_typecode_s64:
1472             type = TCG_TYPE_I64;
1473             break;
1474         case dh_typecode_ptr:
1475             type = TCG_TYPE_PTR;
1476             break;
1477         case dh_typecode_i128:
1478             type = TCG_TYPE_I128;
1479             break;
1480         default:
1481             g_assert_not_reached();
1482         }
1483 
1484         switch (type) {
1485         case TCG_TYPE_I32:
1486             switch (TCG_TARGET_CALL_ARG_I32) {
1487             case TCG_CALL_ARG_EVEN:
1488                 layout_arg_even(&cum);
1489                 /* fall through */
1490             case TCG_CALL_ARG_NORMAL:
1491                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1492                 break;
1493             case TCG_CALL_ARG_EXTEND:
1494                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1495                 layout_arg_1(&cum, info, kind);
1496                 break;
1497             default:
1498                 qemu_build_not_reached();
1499             }
1500             break;
1501 
1502         case TCG_TYPE_I64:
1503             switch (TCG_TARGET_CALL_ARG_I64) {
1504             case TCG_CALL_ARG_EVEN:
1505                 layout_arg_even(&cum);
1506                 /* fall through */
1507             case TCG_CALL_ARG_NORMAL:
1508                 if (TCG_TARGET_REG_BITS == 32) {
1509                     layout_arg_normal_n(&cum, info, 2);
1510                 } else {
1511                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1512                 }
1513                 break;
1514             default:
1515                 qemu_build_not_reached();
1516             }
1517             break;
1518 
1519         case TCG_TYPE_I128:
1520             switch (TCG_TARGET_CALL_ARG_I128) {
1521             case TCG_CALL_ARG_EVEN:
1522                 layout_arg_even(&cum);
1523                 /* fall through */
1524             case TCG_CALL_ARG_NORMAL:
1525                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1526                 break;
1527             case TCG_CALL_ARG_BY_REF:
1528                 layout_arg_by_ref(&cum, info);
1529                 break;
1530             default:
1531                 qemu_build_not_reached();
1532             }
1533             break;
1534 
1535         default:
1536             g_assert_not_reached();
1537         }
1538     }
1539     info->nr_in = cum.info_in_idx;
1540 
1541     /* Validate that we didn't overrun the input array. */
1542     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1543     /* Validate the backend has enough argument space. */
1544     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1545 
1546     /*
1547      * Relocate the "ref_slot" area to the end of the parameters.
1548      * Minimizing this stack offset helps code size for x86,
1549      * which has a signed 8-bit offset encoding.
1550      */
1551     if (cum.ref_slot != 0) {
1552         int ref_base = 0;
1553 
1554         if (cum.arg_slot > max_reg_slots) {
1555             int align = __alignof(Int128) / sizeof(tcg_target_long);
1556 
1557             ref_base = cum.arg_slot - max_reg_slots;
1558             if (align > 1) {
1559                 ref_base = ROUND_UP(ref_base, align);
1560             }
1561         }
1562         assert(ref_base + cum.ref_slot <= max_stk_slots);
1563         ref_base += max_reg_slots;
1564 
1565         if (ref_base != 0) {
1566             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1567                 TCGCallArgumentLoc *loc = &info->in[i];
1568                 switch (loc->kind) {
1569                 case TCG_CALL_ARG_BY_REF:
1570                 case TCG_CALL_ARG_BY_REF_N:
1571                     loc->ref_slot += ref_base;
1572                     break;
1573                 default:
1574                     break;
1575                 }
1576             }
1577         }
1578     }
1579 }
1580 
1581 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1582 static void process_constraint_sets(void);
1583 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1584                                             TCGReg reg, const char *name);
1585 
1586 static void tcg_context_init(unsigned max_threads)
1587 {
1588     TCGContext *s = &tcg_init_ctx;
1589     int n, i;
1590     TCGTemp *ts;
1591 
1592     memset(s, 0, sizeof(*s));
1593     s->nb_globals = 0;
1594 
1595     init_call_layout(&info_helper_ld32_mmu);
1596     init_call_layout(&info_helper_ld64_mmu);
1597     init_call_layout(&info_helper_ld128_mmu);
1598     init_call_layout(&info_helper_st32_mmu);
1599     init_call_layout(&info_helper_st64_mmu);
1600     init_call_layout(&info_helper_st128_mmu);
1601 
1602     tcg_target_init(s);
1603     process_constraint_sets();
1604 
1605     /* Reverse the order of the saved registers, assuming they're all at
1606        the start of tcg_target_reg_alloc_order.  */
1607     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1608         int r = tcg_target_reg_alloc_order[n];
1609         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1610             break;
1611         }
1612     }
1613     for (i = 0; i < n; ++i) {
1614         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1615     }
1616     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1617         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1618     }
1619 
1620     tcg_ctx = s;
1621     /*
1622      * In user-mode we simply share the init context among threads, since we
1623      * use a single region. See the documentation tcg_region_init() for the
1624      * reasoning behind this.
1625      * In system-mode we will have at most max_threads TCG threads.
1626      */
1627 #ifdef CONFIG_USER_ONLY
1628     tcg_ctxs = &tcg_ctx;
1629     tcg_cur_ctxs = 1;
1630     tcg_max_ctxs = 1;
1631 #else
1632     tcg_max_ctxs = max_threads;
1633     tcg_ctxs = g_new0(TCGContext *, max_threads);
1634 #endif
1635 
1636     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1637     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1638     tcg_env = temp_tcgv_ptr(ts);
1639 }
1640 
1641 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1642 {
1643     tcg_context_init(max_threads);
1644     tcg_region_init(tb_size, splitwx, max_threads);
1645 }
1646 
1647 /*
1648  * Allocate TBs right before their corresponding translated code, making
1649  * sure that TBs and code are on different cache lines.
1650  */
1651 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1652 {
1653     uintptr_t align = qemu_icache_linesize;
1654     TranslationBlock *tb;
1655     void *next;
1656 
1657  retry:
1658     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1659     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1660 
1661     if (unlikely(next > s->code_gen_highwater)) {
1662         if (tcg_region_alloc(s)) {
1663             return NULL;
1664         }
1665         goto retry;
1666     }
1667     qatomic_set(&s->code_gen_ptr, next);
1668     return tb;
1669 }
1670 
1671 void tcg_prologue_init(void)
1672 {
1673     TCGContext *s = tcg_ctx;
1674     size_t prologue_size;
1675 
1676     s->code_ptr = s->code_gen_ptr;
1677     s->code_buf = s->code_gen_ptr;
1678     s->data_gen_ptr = NULL;
1679 
1680 #ifndef CONFIG_TCG_INTERPRETER
1681     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1682 #endif
1683 
1684     s->pool_labels = NULL;
1685 
1686     qemu_thread_jit_write();
1687     /* Generate the prologue.  */
1688     tcg_target_qemu_prologue(s);
1689 
1690     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1691     {
1692         int result = tcg_out_pool_finalize(s);
1693         tcg_debug_assert(result == 0);
1694     }
1695 
1696     prologue_size = tcg_current_code_size(s);
1697     perf_report_prologue(s->code_gen_ptr, prologue_size);
1698 
1699 #ifndef CONFIG_TCG_INTERPRETER
1700     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1701                         (uintptr_t)s->code_buf, prologue_size);
1702 #endif
1703 
1704     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1705         FILE *logfile = qemu_log_trylock();
1706         if (logfile) {
1707             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1708             if (s->data_gen_ptr) {
1709                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1710                 size_t data_size = prologue_size - code_size;
1711                 size_t i;
1712 
1713                 disas(logfile, s->code_gen_ptr, code_size);
1714 
1715                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1716                     if (sizeof(tcg_target_ulong) == 8) {
1717                         fprintf(logfile,
1718                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1719                                 (uintptr_t)s->data_gen_ptr + i,
1720                                 *(uint64_t *)(s->data_gen_ptr + i));
1721                     } else {
1722                         fprintf(logfile,
1723                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1724                                 (uintptr_t)s->data_gen_ptr + i,
1725                                 *(uint32_t *)(s->data_gen_ptr + i));
1726                     }
1727                 }
1728             } else {
1729                 disas(logfile, s->code_gen_ptr, prologue_size);
1730             }
1731             fprintf(logfile, "\n");
1732             qemu_log_unlock(logfile);
1733         }
1734     }
1735 
1736 #ifndef CONFIG_TCG_INTERPRETER
1737     /*
1738      * Assert that goto_ptr is implemented completely, setting an epilogue.
1739      * For tci, we use NULL as the signal to return from the interpreter,
1740      * so skip this check.
1741      */
1742     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1743 #endif
1744 
1745     tcg_region_prologue_set(s);
1746 }
1747 
1748 void tcg_func_start(TCGContext *s)
1749 {
1750     tcg_pool_reset(s);
1751     s->nb_temps = s->nb_globals;
1752 
1753     /* No temps have been previously allocated for size or locality.  */
1754     tcg_temp_ebb_reset_freed(s);
1755 
1756     /* No constant temps have been previously allocated. */
1757     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1758         if (s->const_table[i]) {
1759             g_hash_table_remove_all(s->const_table[i]);
1760         }
1761     }
1762 
1763     s->nb_ops = 0;
1764     s->nb_labels = 0;
1765     s->current_frame_offset = s->frame_start;
1766 
1767 #ifdef CONFIG_DEBUG_TCG
1768     s->goto_tb_issue_mask = 0;
1769 #endif
1770 
1771     QTAILQ_INIT(&s->ops);
1772     QTAILQ_INIT(&s->free_ops);
1773     s->emit_before_op = NULL;
1774     QSIMPLEQ_INIT(&s->labels);
1775 
1776     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1777     tcg_debug_assert(s->insn_start_words > 0);
1778 }
1779 
1780 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1781 {
1782     int n = s->nb_temps++;
1783 
1784     if (n >= TCG_MAX_TEMPS) {
1785         tcg_raise_tb_overflow(s);
1786     }
1787     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1788 }
1789 
1790 static TCGTemp *tcg_global_alloc(TCGContext *s)
1791 {
1792     TCGTemp *ts;
1793 
1794     tcg_debug_assert(s->nb_globals == s->nb_temps);
1795     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1796     s->nb_globals++;
1797     ts = tcg_temp_alloc(s);
1798     ts->kind = TEMP_GLOBAL;
1799 
1800     return ts;
1801 }
1802 
1803 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1804                                             TCGReg reg, const char *name)
1805 {
1806     TCGTemp *ts;
1807 
1808     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1809 
1810     ts = tcg_global_alloc(s);
1811     ts->base_type = type;
1812     ts->type = type;
1813     ts->kind = TEMP_FIXED;
1814     ts->reg = reg;
1815     ts->name = name;
1816     tcg_regset_set_reg(s->reserved_regs, reg);
1817 
1818     return ts;
1819 }
1820 
1821 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1822 {
1823     s->frame_start = start;
1824     s->frame_end = start + size;
1825     s->frame_temp
1826         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1827 }
1828 
1829 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1830                                             const char *name, TCGType type)
1831 {
1832     TCGContext *s = tcg_ctx;
1833     TCGTemp *base_ts = tcgv_ptr_temp(base);
1834     TCGTemp *ts = tcg_global_alloc(s);
1835     int indirect_reg = 0;
1836 
1837     switch (base_ts->kind) {
1838     case TEMP_FIXED:
1839         break;
1840     case TEMP_GLOBAL:
1841         /* We do not support double-indirect registers.  */
1842         tcg_debug_assert(!base_ts->indirect_reg);
1843         base_ts->indirect_base = 1;
1844         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1845                             ? 2 : 1);
1846         indirect_reg = 1;
1847         break;
1848     default:
1849         g_assert_not_reached();
1850     }
1851 
1852     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1853         TCGTemp *ts2 = tcg_global_alloc(s);
1854         char buf[64];
1855 
1856         ts->base_type = TCG_TYPE_I64;
1857         ts->type = TCG_TYPE_I32;
1858         ts->indirect_reg = indirect_reg;
1859         ts->mem_allocated = 1;
1860         ts->mem_base = base_ts;
1861         ts->mem_offset = offset;
1862         pstrcpy(buf, sizeof(buf), name);
1863         pstrcat(buf, sizeof(buf), "_0");
1864         ts->name = strdup(buf);
1865 
1866         tcg_debug_assert(ts2 == ts + 1);
1867         ts2->base_type = TCG_TYPE_I64;
1868         ts2->type = TCG_TYPE_I32;
1869         ts2->indirect_reg = indirect_reg;
1870         ts2->mem_allocated = 1;
1871         ts2->mem_base = base_ts;
1872         ts2->mem_offset = offset + 4;
1873         ts2->temp_subindex = 1;
1874         pstrcpy(buf, sizeof(buf), name);
1875         pstrcat(buf, sizeof(buf), "_1");
1876         ts2->name = strdup(buf);
1877     } else {
1878         ts->base_type = type;
1879         ts->type = type;
1880         ts->indirect_reg = indirect_reg;
1881         ts->mem_allocated = 1;
1882         ts->mem_base = base_ts;
1883         ts->mem_offset = offset;
1884         ts->name = name;
1885     }
1886     return ts;
1887 }
1888 
1889 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1890 {
1891     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1892     return temp_tcgv_i32(ts);
1893 }
1894 
1895 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1896 {
1897     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1898     return temp_tcgv_i64(ts);
1899 }
1900 
1901 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1902 {
1903     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1904     return temp_tcgv_ptr(ts);
1905 }
1906 
1907 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1908 {
1909     TCGContext *s = tcg_ctx;
1910     TCGTemp *ts;
1911     int n;
1912 
1913     if (kind == TEMP_EBB) {
1914         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1915 
1916         if (idx < TCG_MAX_TEMPS) {
1917             /* There is already an available temp with the right type.  */
1918             clear_bit(idx, s->free_temps[type].l);
1919 
1920             ts = &s->temps[idx];
1921             ts->temp_allocated = 1;
1922             tcg_debug_assert(ts->base_type == type);
1923             tcg_debug_assert(ts->kind == kind);
1924             return ts;
1925         }
1926     } else {
1927         tcg_debug_assert(kind == TEMP_TB);
1928     }
1929 
1930     switch (type) {
1931     case TCG_TYPE_I32:
1932     case TCG_TYPE_V64:
1933     case TCG_TYPE_V128:
1934     case TCG_TYPE_V256:
1935         n = 1;
1936         break;
1937     case TCG_TYPE_I64:
1938         n = 64 / TCG_TARGET_REG_BITS;
1939         break;
1940     case TCG_TYPE_I128:
1941         n = 128 / TCG_TARGET_REG_BITS;
1942         break;
1943     default:
1944         g_assert_not_reached();
1945     }
1946 
1947     ts = tcg_temp_alloc(s);
1948     ts->base_type = type;
1949     ts->temp_allocated = 1;
1950     ts->kind = kind;
1951 
1952     if (n == 1) {
1953         ts->type = type;
1954     } else {
1955         ts->type = TCG_TYPE_REG;
1956 
1957         for (int i = 1; i < n; ++i) {
1958             TCGTemp *ts2 = tcg_temp_alloc(s);
1959 
1960             tcg_debug_assert(ts2 == ts + i);
1961             ts2->base_type = type;
1962             ts2->type = TCG_TYPE_REG;
1963             ts2->temp_allocated = 1;
1964             ts2->temp_subindex = i;
1965             ts2->kind = kind;
1966         }
1967     }
1968     return ts;
1969 }
1970 
1971 TCGv_i32 tcg_temp_new_i32(void)
1972 {
1973     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1974 }
1975 
1976 TCGv_i32 tcg_temp_ebb_new_i32(void)
1977 {
1978     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1979 }
1980 
1981 TCGv_i64 tcg_temp_new_i64(void)
1982 {
1983     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1984 }
1985 
1986 TCGv_i64 tcg_temp_ebb_new_i64(void)
1987 {
1988     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1989 }
1990 
1991 TCGv_ptr tcg_temp_new_ptr(void)
1992 {
1993     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1994 }
1995 
1996 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1997 {
1998     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1999 }
2000 
2001 TCGv_i128 tcg_temp_new_i128(void)
2002 {
2003     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2004 }
2005 
2006 TCGv_i128 tcg_temp_ebb_new_i128(void)
2007 {
2008     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2009 }
2010 
2011 TCGv_vec tcg_temp_new_vec(TCGType type)
2012 {
2013     TCGTemp *t;
2014 
2015 #ifdef CONFIG_DEBUG_TCG
2016     switch (type) {
2017     case TCG_TYPE_V64:
2018         assert(TCG_TARGET_HAS_v64);
2019         break;
2020     case TCG_TYPE_V128:
2021         assert(TCG_TARGET_HAS_v128);
2022         break;
2023     case TCG_TYPE_V256:
2024         assert(TCG_TARGET_HAS_v256);
2025         break;
2026     default:
2027         g_assert_not_reached();
2028     }
2029 #endif
2030 
2031     t = tcg_temp_new_internal(type, TEMP_EBB);
2032     return temp_tcgv_vec(t);
2033 }
2034 
2035 /* Create a new temp of the same type as an existing temp.  */
2036 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2037 {
2038     TCGTemp *t = tcgv_vec_temp(match);
2039 
2040     tcg_debug_assert(t->temp_allocated != 0);
2041 
2042     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2043     return temp_tcgv_vec(t);
2044 }
2045 
2046 void tcg_temp_free_internal(TCGTemp *ts)
2047 {
2048     TCGContext *s = tcg_ctx;
2049 
2050     switch (ts->kind) {
2051     case TEMP_CONST:
2052     case TEMP_TB:
2053         /* Silently ignore free. */
2054         break;
2055     case TEMP_EBB:
2056         tcg_debug_assert(ts->temp_allocated != 0);
2057         ts->temp_allocated = 0;
2058         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2059         break;
2060     default:
2061         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2062         g_assert_not_reached();
2063     }
2064 }
2065 
2066 void tcg_temp_free_i32(TCGv_i32 arg)
2067 {
2068     tcg_temp_free_internal(tcgv_i32_temp(arg));
2069 }
2070 
2071 void tcg_temp_free_i64(TCGv_i64 arg)
2072 {
2073     tcg_temp_free_internal(tcgv_i64_temp(arg));
2074 }
2075 
2076 void tcg_temp_free_i128(TCGv_i128 arg)
2077 {
2078     tcg_temp_free_internal(tcgv_i128_temp(arg));
2079 }
2080 
2081 void tcg_temp_free_ptr(TCGv_ptr arg)
2082 {
2083     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2084 }
2085 
2086 void tcg_temp_free_vec(TCGv_vec arg)
2087 {
2088     tcg_temp_free_internal(tcgv_vec_temp(arg));
2089 }
2090 
2091 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2092 {
2093     TCGContext *s = tcg_ctx;
2094     GHashTable *h = s->const_table[type];
2095     TCGTemp *ts;
2096 
2097     if (h == NULL) {
2098         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2099         s->const_table[type] = h;
2100     }
2101 
2102     ts = g_hash_table_lookup(h, &val);
2103     if (ts == NULL) {
2104         int64_t *val_ptr;
2105 
2106         ts = tcg_temp_alloc(s);
2107 
2108         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2109             TCGTemp *ts2 = tcg_temp_alloc(s);
2110 
2111             tcg_debug_assert(ts2 == ts + 1);
2112 
2113             ts->base_type = TCG_TYPE_I64;
2114             ts->type = TCG_TYPE_I32;
2115             ts->kind = TEMP_CONST;
2116             ts->temp_allocated = 1;
2117 
2118             ts2->base_type = TCG_TYPE_I64;
2119             ts2->type = TCG_TYPE_I32;
2120             ts2->kind = TEMP_CONST;
2121             ts2->temp_allocated = 1;
2122             ts2->temp_subindex = 1;
2123 
2124             /*
2125              * Retain the full value of the 64-bit constant in the low
2126              * part, so that the hash table works.  Actual uses will
2127              * truncate the value to the low part.
2128              */
2129             ts[HOST_BIG_ENDIAN].val = val;
2130             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2131             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2132         } else {
2133             ts->base_type = type;
2134             ts->type = type;
2135             ts->kind = TEMP_CONST;
2136             ts->temp_allocated = 1;
2137             ts->val = val;
2138             val_ptr = &ts->val;
2139         }
2140         g_hash_table_insert(h, val_ptr, ts);
2141     }
2142 
2143     return ts;
2144 }
2145 
2146 TCGv_i32 tcg_constant_i32(int32_t val)
2147 {
2148     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2149 }
2150 
2151 TCGv_i64 tcg_constant_i64(int64_t val)
2152 {
2153     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2154 }
2155 
2156 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2157 {
2158     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2159 }
2160 
2161 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2162 {
2163     val = dup_const(vece, val);
2164     return temp_tcgv_vec(tcg_constant_internal(type, val));
2165 }
2166 
2167 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2168 {
2169     TCGTemp *t = tcgv_vec_temp(match);
2170 
2171     tcg_debug_assert(t->temp_allocated != 0);
2172     return tcg_constant_vec(t->base_type, vece, val);
2173 }
2174 
2175 #ifdef CONFIG_DEBUG_TCG
2176 size_t temp_idx(TCGTemp *ts)
2177 {
2178     ptrdiff_t n = ts - tcg_ctx->temps;
2179     assert(n >= 0 && n < tcg_ctx->nb_temps);
2180     return n;
2181 }
2182 
2183 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2184 {
2185     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2186 
2187     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2188     assert(o % sizeof(TCGTemp) == 0);
2189 
2190     return (void *)tcg_ctx + (uintptr_t)v;
2191 }
2192 #endif /* CONFIG_DEBUG_TCG */
2193 
2194 /*
2195  * Return true if OP may appear in the opcode stream with TYPE.
2196  * Test the runtime variable that controls each opcode.
2197  */
2198 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2199 {
2200     bool has_type;
2201 
2202     switch (type) {
2203     case TCG_TYPE_I32:
2204         has_type = true;
2205         break;
2206     case TCG_TYPE_I64:
2207         has_type = TCG_TARGET_REG_BITS == 64;
2208         break;
2209     case TCG_TYPE_V64:
2210         has_type = TCG_TARGET_HAS_v64;
2211         break;
2212     case TCG_TYPE_V128:
2213         has_type = TCG_TARGET_HAS_v128;
2214         break;
2215     case TCG_TYPE_V256:
2216         has_type = TCG_TARGET_HAS_v256;
2217         break;
2218     default:
2219         has_type = false;
2220         break;
2221     }
2222 
2223     switch (op) {
2224     case INDEX_op_discard:
2225     case INDEX_op_set_label:
2226     case INDEX_op_call:
2227     case INDEX_op_br:
2228     case INDEX_op_mb:
2229     case INDEX_op_insn_start:
2230     case INDEX_op_exit_tb:
2231     case INDEX_op_goto_tb:
2232     case INDEX_op_goto_ptr:
2233     case INDEX_op_qemu_ld_i32:
2234     case INDEX_op_qemu_st_i32:
2235     case INDEX_op_qemu_ld_i64:
2236     case INDEX_op_qemu_st_i64:
2237         return true;
2238 
2239     case INDEX_op_qemu_st8_i32:
2240         return TCG_TARGET_HAS_qemu_st8_i32;
2241 
2242     case INDEX_op_qemu_ld_i128:
2243     case INDEX_op_qemu_st_i128:
2244         return TCG_TARGET_HAS_qemu_ldst_i128;
2245 
2246     case INDEX_op_add:
2247     case INDEX_op_and:
2248     case INDEX_op_mov:
2249     case INDEX_op_or:
2250     case INDEX_op_xor:
2251         return has_type;
2252 
2253     case INDEX_op_setcond_i32:
2254     case INDEX_op_brcond_i32:
2255     case INDEX_op_movcond_i32:
2256     case INDEX_op_ld8u_i32:
2257     case INDEX_op_ld8s_i32:
2258     case INDEX_op_ld16u_i32:
2259     case INDEX_op_ld16s_i32:
2260     case INDEX_op_ld_i32:
2261     case INDEX_op_st8_i32:
2262     case INDEX_op_st16_i32:
2263     case INDEX_op_st_i32:
2264     case INDEX_op_shl_i32:
2265     case INDEX_op_shr_i32:
2266     case INDEX_op_sar_i32:
2267     case INDEX_op_extract_i32:
2268     case INDEX_op_sextract_i32:
2269     case INDEX_op_deposit_i32:
2270         return true;
2271 
2272     case INDEX_op_negsetcond_i32:
2273         return TCG_TARGET_HAS_negsetcond_i32;
2274     case INDEX_op_remu_i32:
2275         return TCG_TARGET_HAS_rem_i32;
2276     case INDEX_op_rotl_i32:
2277     case INDEX_op_rotr_i32:
2278         return TCG_TARGET_HAS_rot_i32;
2279     case INDEX_op_extract2_i32:
2280         return TCG_TARGET_HAS_extract2_i32;
2281     case INDEX_op_add2_i32:
2282         return TCG_TARGET_HAS_add2_i32;
2283     case INDEX_op_sub2_i32:
2284         return TCG_TARGET_HAS_sub2_i32;
2285     case INDEX_op_mulu2_i32:
2286         return TCG_TARGET_HAS_mulu2_i32;
2287     case INDEX_op_muls2_i32:
2288         return TCG_TARGET_HAS_muls2_i32;
2289     case INDEX_op_bswap16_i32:
2290         return TCG_TARGET_HAS_bswap16_i32;
2291     case INDEX_op_bswap32_i32:
2292         return TCG_TARGET_HAS_bswap32_i32;
2293     case INDEX_op_clz_i32:
2294         return TCG_TARGET_HAS_clz_i32;
2295     case INDEX_op_ctz_i32:
2296         return TCG_TARGET_HAS_ctz_i32;
2297     case INDEX_op_ctpop_i32:
2298         return TCG_TARGET_HAS_ctpop_i32;
2299 
2300     case INDEX_op_brcond2_i32:
2301     case INDEX_op_setcond2_i32:
2302         return TCG_TARGET_REG_BITS == 32;
2303 
2304     case INDEX_op_setcond_i64:
2305     case INDEX_op_brcond_i64:
2306     case INDEX_op_movcond_i64:
2307     case INDEX_op_ld8u_i64:
2308     case INDEX_op_ld8s_i64:
2309     case INDEX_op_ld16u_i64:
2310     case INDEX_op_ld16s_i64:
2311     case INDEX_op_ld32u_i64:
2312     case INDEX_op_ld32s_i64:
2313     case INDEX_op_ld_i64:
2314     case INDEX_op_st8_i64:
2315     case INDEX_op_st16_i64:
2316     case INDEX_op_st32_i64:
2317     case INDEX_op_st_i64:
2318     case INDEX_op_shl_i64:
2319     case INDEX_op_shr_i64:
2320     case INDEX_op_sar_i64:
2321     case INDEX_op_ext_i32_i64:
2322     case INDEX_op_extu_i32_i64:
2323     case INDEX_op_extract_i64:
2324     case INDEX_op_sextract_i64:
2325     case INDEX_op_deposit_i64:
2326         return TCG_TARGET_REG_BITS == 64;
2327 
2328     case INDEX_op_negsetcond_i64:
2329         return TCG_TARGET_HAS_negsetcond_i64;
2330     case INDEX_op_remu_i64:
2331         return TCG_TARGET_HAS_rem_i64;
2332     case INDEX_op_rotl_i64:
2333     case INDEX_op_rotr_i64:
2334         return TCG_TARGET_HAS_rot_i64;
2335     case INDEX_op_extract2_i64:
2336         return TCG_TARGET_HAS_extract2_i64;
2337     case INDEX_op_extrl_i64_i32:
2338     case INDEX_op_extrh_i64_i32:
2339         return TCG_TARGET_HAS_extr_i64_i32;
2340     case INDEX_op_bswap16_i64:
2341         return TCG_TARGET_HAS_bswap16_i64;
2342     case INDEX_op_bswap32_i64:
2343         return TCG_TARGET_HAS_bswap32_i64;
2344     case INDEX_op_bswap64_i64:
2345         return TCG_TARGET_HAS_bswap64_i64;
2346     case INDEX_op_clz_i64:
2347         return TCG_TARGET_HAS_clz_i64;
2348     case INDEX_op_ctz_i64:
2349         return TCG_TARGET_HAS_ctz_i64;
2350     case INDEX_op_ctpop_i64:
2351         return TCG_TARGET_HAS_ctpop_i64;
2352     case INDEX_op_add2_i64:
2353         return TCG_TARGET_HAS_add2_i64;
2354     case INDEX_op_sub2_i64:
2355         return TCG_TARGET_HAS_sub2_i64;
2356     case INDEX_op_mulu2_i64:
2357         return TCG_TARGET_HAS_mulu2_i64;
2358     case INDEX_op_muls2_i64:
2359         return TCG_TARGET_HAS_muls2_i64;
2360 
2361     case INDEX_op_mov_vec:
2362     case INDEX_op_dup_vec:
2363     case INDEX_op_dupm_vec:
2364     case INDEX_op_ld_vec:
2365     case INDEX_op_st_vec:
2366     case INDEX_op_add_vec:
2367     case INDEX_op_sub_vec:
2368     case INDEX_op_and_vec:
2369     case INDEX_op_or_vec:
2370     case INDEX_op_xor_vec:
2371     case INDEX_op_cmp_vec:
2372         return has_type;
2373     case INDEX_op_dup2_vec:
2374         return has_type && TCG_TARGET_REG_BITS == 32;
2375     case INDEX_op_not_vec:
2376         return has_type && TCG_TARGET_HAS_not_vec;
2377     case INDEX_op_neg_vec:
2378         return has_type && TCG_TARGET_HAS_neg_vec;
2379     case INDEX_op_abs_vec:
2380         return has_type && TCG_TARGET_HAS_abs_vec;
2381     case INDEX_op_andc_vec:
2382         return has_type && TCG_TARGET_HAS_andc_vec;
2383     case INDEX_op_orc_vec:
2384         return has_type && TCG_TARGET_HAS_orc_vec;
2385     case INDEX_op_nand_vec:
2386         return has_type && TCG_TARGET_HAS_nand_vec;
2387     case INDEX_op_nor_vec:
2388         return has_type && TCG_TARGET_HAS_nor_vec;
2389     case INDEX_op_eqv_vec:
2390         return has_type && TCG_TARGET_HAS_eqv_vec;
2391     case INDEX_op_mul_vec:
2392         return has_type && TCG_TARGET_HAS_mul_vec;
2393     case INDEX_op_shli_vec:
2394     case INDEX_op_shri_vec:
2395     case INDEX_op_sari_vec:
2396         return has_type && TCG_TARGET_HAS_shi_vec;
2397     case INDEX_op_shls_vec:
2398     case INDEX_op_shrs_vec:
2399     case INDEX_op_sars_vec:
2400         return has_type && TCG_TARGET_HAS_shs_vec;
2401     case INDEX_op_shlv_vec:
2402     case INDEX_op_shrv_vec:
2403     case INDEX_op_sarv_vec:
2404         return has_type && TCG_TARGET_HAS_shv_vec;
2405     case INDEX_op_rotli_vec:
2406         return has_type && TCG_TARGET_HAS_roti_vec;
2407     case INDEX_op_rotls_vec:
2408         return has_type && TCG_TARGET_HAS_rots_vec;
2409     case INDEX_op_rotlv_vec:
2410     case INDEX_op_rotrv_vec:
2411         return has_type && TCG_TARGET_HAS_rotv_vec;
2412     case INDEX_op_ssadd_vec:
2413     case INDEX_op_usadd_vec:
2414     case INDEX_op_sssub_vec:
2415     case INDEX_op_ussub_vec:
2416         return has_type && TCG_TARGET_HAS_sat_vec;
2417     case INDEX_op_smin_vec:
2418     case INDEX_op_umin_vec:
2419     case INDEX_op_smax_vec:
2420     case INDEX_op_umax_vec:
2421         return has_type && TCG_TARGET_HAS_minmax_vec;
2422     case INDEX_op_bitsel_vec:
2423         return has_type && TCG_TARGET_HAS_bitsel_vec;
2424     case INDEX_op_cmpsel_vec:
2425         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2426 
2427     default:
2428         if (op < INDEX_op_last_generic) {
2429             const TCGOutOp *outop;
2430             TCGConstraintSetIndex con_set;
2431 
2432             if (!has_type) {
2433                 return false;
2434             }
2435 
2436             outop = all_outop[op];
2437             tcg_debug_assert(outop != NULL);
2438 
2439             con_set = outop->static_constraint;
2440             if (con_set == C_Dynamic) {
2441                 con_set = outop->dynamic_constraint(type, flags);
2442             }
2443             if (con_set >= 0) {
2444                 return true;
2445             }
2446             tcg_debug_assert(con_set == C_NotImplemented);
2447             return false;
2448         }
2449         tcg_debug_assert(op < NB_OPS);
2450         return true;
2451 
2452     case INDEX_op_last_generic:
2453         g_assert_not_reached();
2454     }
2455 }
2456 
2457 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2458 {
2459     unsigned width;
2460 
2461     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2462     width = (type == TCG_TYPE_I32 ? 32 : 64);
2463 
2464     tcg_debug_assert(ofs < width);
2465     tcg_debug_assert(len > 0);
2466     tcg_debug_assert(len <= width - ofs);
2467 
2468     return TCG_TARGET_deposit_valid(type, ofs, len);
2469 }
2470 
2471 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2472 
2473 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2474                           TCGTemp *ret, TCGTemp **args)
2475 {
2476     TCGv_i64 extend_free[MAX_CALL_IARGS];
2477     int n_extend = 0;
2478     TCGOp *op;
2479     int i, n, pi = 0, total_args;
2480 
2481     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2482         init_call_layout(info);
2483         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2484     }
2485 
2486     total_args = info->nr_out + info->nr_in + 2;
2487     op = tcg_op_alloc(INDEX_op_call, total_args);
2488 
2489 #ifdef CONFIG_PLUGIN
2490     /* Flag helpers that may affect guest state */
2491     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2492         tcg_ctx->plugin_insn->calls_helpers = true;
2493     }
2494 #endif
2495 
2496     TCGOP_CALLO(op) = n = info->nr_out;
2497     switch (n) {
2498     case 0:
2499         tcg_debug_assert(ret == NULL);
2500         break;
2501     case 1:
2502         tcg_debug_assert(ret != NULL);
2503         op->args[pi++] = temp_arg(ret);
2504         break;
2505     case 2:
2506     case 4:
2507         tcg_debug_assert(ret != NULL);
2508         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2509         tcg_debug_assert(ret->temp_subindex == 0);
2510         for (i = 0; i < n; ++i) {
2511             op->args[pi++] = temp_arg(ret + i);
2512         }
2513         break;
2514     default:
2515         g_assert_not_reached();
2516     }
2517 
2518     TCGOP_CALLI(op) = n = info->nr_in;
2519     for (i = 0; i < n; i++) {
2520         const TCGCallArgumentLoc *loc = &info->in[i];
2521         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2522 
2523         switch (loc->kind) {
2524         case TCG_CALL_ARG_NORMAL:
2525         case TCG_CALL_ARG_BY_REF:
2526         case TCG_CALL_ARG_BY_REF_N:
2527             op->args[pi++] = temp_arg(ts);
2528             break;
2529 
2530         case TCG_CALL_ARG_EXTEND_U:
2531         case TCG_CALL_ARG_EXTEND_S:
2532             {
2533                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2534                 TCGv_i32 orig = temp_tcgv_i32(ts);
2535 
2536                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2537                     tcg_gen_ext_i32_i64(temp, orig);
2538                 } else {
2539                     tcg_gen_extu_i32_i64(temp, orig);
2540                 }
2541                 op->args[pi++] = tcgv_i64_arg(temp);
2542                 extend_free[n_extend++] = temp;
2543             }
2544             break;
2545 
2546         default:
2547             g_assert_not_reached();
2548         }
2549     }
2550     op->args[pi++] = (uintptr_t)func;
2551     op->args[pi++] = (uintptr_t)info;
2552     tcg_debug_assert(pi == total_args);
2553 
2554     if (tcg_ctx->emit_before_op) {
2555         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2556     } else {
2557         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2558     }
2559 
2560     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2561     for (i = 0; i < n_extend; ++i) {
2562         tcg_temp_free_i64(extend_free[i]);
2563     }
2564 }
2565 
2566 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2567 {
2568     tcg_gen_callN(func, info, ret, NULL);
2569 }
2570 
2571 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2572 {
2573     tcg_gen_callN(func, info, ret, &t1);
2574 }
2575 
2576 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2577                    TCGTemp *t1, TCGTemp *t2)
2578 {
2579     TCGTemp *args[2] = { t1, t2 };
2580     tcg_gen_callN(func, info, ret, args);
2581 }
2582 
2583 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2584                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2585 {
2586     TCGTemp *args[3] = { t1, t2, t3 };
2587     tcg_gen_callN(func, info, ret, args);
2588 }
2589 
2590 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2591                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2592 {
2593     TCGTemp *args[4] = { t1, t2, t3, t4 };
2594     tcg_gen_callN(func, info, ret, args);
2595 }
2596 
2597 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2598                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2599 {
2600     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2601     tcg_gen_callN(func, info, ret, args);
2602 }
2603 
2604 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2605                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2606                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2607 {
2608     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2609     tcg_gen_callN(func, info, ret, args);
2610 }
2611 
2612 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2613                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2614                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2615 {
2616     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2617     tcg_gen_callN(func, info, ret, args);
2618 }
2619 
2620 static void tcg_reg_alloc_start(TCGContext *s)
2621 {
2622     int i, n;
2623 
2624     for (i = 0, n = s->nb_temps; i < n; i++) {
2625         TCGTemp *ts = &s->temps[i];
2626         TCGTempVal val = TEMP_VAL_MEM;
2627 
2628         switch (ts->kind) {
2629         case TEMP_CONST:
2630             val = TEMP_VAL_CONST;
2631             break;
2632         case TEMP_FIXED:
2633             val = TEMP_VAL_REG;
2634             break;
2635         case TEMP_GLOBAL:
2636             break;
2637         case TEMP_EBB:
2638             val = TEMP_VAL_DEAD;
2639             /* fall through */
2640         case TEMP_TB:
2641             ts->mem_allocated = 0;
2642             break;
2643         default:
2644             g_assert_not_reached();
2645         }
2646         ts->val_type = val;
2647     }
2648 
2649     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2650 }
2651 
2652 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2653                                  TCGTemp *ts)
2654 {
2655     int idx = temp_idx(ts);
2656 
2657     switch (ts->kind) {
2658     case TEMP_FIXED:
2659     case TEMP_GLOBAL:
2660         pstrcpy(buf, buf_size, ts->name);
2661         break;
2662     case TEMP_TB:
2663         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2664         break;
2665     case TEMP_EBB:
2666         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2667         break;
2668     case TEMP_CONST:
2669         switch (ts->type) {
2670         case TCG_TYPE_I32:
2671             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2672             break;
2673 #if TCG_TARGET_REG_BITS > 32
2674         case TCG_TYPE_I64:
2675             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2676             break;
2677 #endif
2678         case TCG_TYPE_V64:
2679         case TCG_TYPE_V128:
2680         case TCG_TYPE_V256:
2681             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2682                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2683             break;
2684         default:
2685             g_assert_not_reached();
2686         }
2687         break;
2688     }
2689     return buf;
2690 }
2691 
2692 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2693                              int buf_size, TCGArg arg)
2694 {
2695     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2696 }
2697 
2698 static const char * const cond_name[] =
2699 {
2700     [TCG_COND_NEVER] = "never",
2701     [TCG_COND_ALWAYS] = "always",
2702     [TCG_COND_EQ] = "eq",
2703     [TCG_COND_NE] = "ne",
2704     [TCG_COND_LT] = "lt",
2705     [TCG_COND_GE] = "ge",
2706     [TCG_COND_LE] = "le",
2707     [TCG_COND_GT] = "gt",
2708     [TCG_COND_LTU] = "ltu",
2709     [TCG_COND_GEU] = "geu",
2710     [TCG_COND_LEU] = "leu",
2711     [TCG_COND_GTU] = "gtu",
2712     [TCG_COND_TSTEQ] = "tsteq",
2713     [TCG_COND_TSTNE] = "tstne",
2714 };
2715 
2716 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2717 {
2718     [MO_UB]   = "ub",
2719     [MO_SB]   = "sb",
2720     [MO_LEUW] = "leuw",
2721     [MO_LESW] = "lesw",
2722     [MO_LEUL] = "leul",
2723     [MO_LESL] = "lesl",
2724     [MO_LEUQ] = "leq",
2725     [MO_BEUW] = "beuw",
2726     [MO_BESW] = "besw",
2727     [MO_BEUL] = "beul",
2728     [MO_BESL] = "besl",
2729     [MO_BEUQ] = "beq",
2730     [MO_128 + MO_BE] = "beo",
2731     [MO_128 + MO_LE] = "leo",
2732 };
2733 
2734 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2735     [MO_UNALN >> MO_ASHIFT]    = "un+",
2736     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2737     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2738     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2739     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2740     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2741     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2742     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2743 };
2744 
2745 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2746     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2747     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2748     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2749     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2750     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2751     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2752 };
2753 
2754 static const char bswap_flag_name[][6] = {
2755     [TCG_BSWAP_IZ] = "iz",
2756     [TCG_BSWAP_OZ] = "oz",
2757     [TCG_BSWAP_OS] = "os",
2758     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2759     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2760 };
2761 
2762 #ifdef CONFIG_PLUGIN
2763 static const char * const plugin_from_name[] = {
2764     "from-tb",
2765     "from-insn",
2766     "after-insn",
2767     "after-tb",
2768 };
2769 #endif
2770 
2771 static inline bool tcg_regset_single(TCGRegSet d)
2772 {
2773     return (d & (d - 1)) == 0;
2774 }
2775 
2776 static inline TCGReg tcg_regset_first(TCGRegSet d)
2777 {
2778     if (TCG_TARGET_NB_REGS <= 32) {
2779         return ctz32(d);
2780     } else {
2781         return ctz64(d);
2782     }
2783 }
2784 
2785 /* Return only the number of characters output -- no error return. */
2786 #define ne_fprintf(...) \
2787     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2788 
2789 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2790 {
2791     char buf[128];
2792     TCGOp *op;
2793 
2794     QTAILQ_FOREACH(op, &s->ops, link) {
2795         int i, k, nb_oargs, nb_iargs, nb_cargs;
2796         const TCGOpDef *def;
2797         TCGOpcode c;
2798         int col = 0;
2799 
2800         c = op->opc;
2801         def = &tcg_op_defs[c];
2802 
2803         if (c == INDEX_op_insn_start) {
2804             nb_oargs = 0;
2805             col += ne_fprintf(f, "\n ----");
2806 
2807             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2808                 col += ne_fprintf(f, " %016" PRIx64,
2809                                   tcg_get_insn_start_param(op, i));
2810             }
2811         } else if (c == INDEX_op_call) {
2812             const TCGHelperInfo *info = tcg_call_info(op);
2813             void *func = tcg_call_func(op);
2814 
2815             /* variable number of arguments */
2816             nb_oargs = TCGOP_CALLO(op);
2817             nb_iargs = TCGOP_CALLI(op);
2818             nb_cargs = def->nb_cargs;
2819 
2820             col += ne_fprintf(f, " %s ", def->name);
2821 
2822             /*
2823              * Print the function name from TCGHelperInfo, if available.
2824              * Note that plugins have a template function for the info,
2825              * but the actual function pointer comes from the plugin.
2826              */
2827             if (func == info->func) {
2828                 col += ne_fprintf(f, "%s", info->name);
2829             } else {
2830                 col += ne_fprintf(f, "plugin(%p)", func);
2831             }
2832 
2833             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2834             for (i = 0; i < nb_oargs; i++) {
2835                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2836                                                             op->args[i]));
2837             }
2838             for (i = 0; i < nb_iargs; i++) {
2839                 TCGArg arg = op->args[nb_oargs + i];
2840                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2841                 col += ne_fprintf(f, ",%s", t);
2842             }
2843         } else {
2844             if (def->flags & TCG_OPF_INT) {
2845                 col += ne_fprintf(f, " %s_i%d ",
2846                                   def->name,
2847                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2848             } else if (def->flags & TCG_OPF_VECTOR) {
2849                 col += ne_fprintf(f, "%s v%d,e%d,",
2850                                   def->name,
2851                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2852                                   8 << TCGOP_VECE(op));
2853             } else {
2854                 col += ne_fprintf(f, " %s ", def->name);
2855             }
2856 
2857             nb_oargs = def->nb_oargs;
2858             nb_iargs = def->nb_iargs;
2859             nb_cargs = def->nb_cargs;
2860 
2861             k = 0;
2862             for (i = 0; i < nb_oargs; i++) {
2863                 const char *sep =  k ? "," : "";
2864                 col += ne_fprintf(f, "%s%s", sep,
2865                                   tcg_get_arg_str(s, buf, sizeof(buf),
2866                                                   op->args[k++]));
2867             }
2868             for (i = 0; i < nb_iargs; i++) {
2869                 const char *sep =  k ? "," : "";
2870                 col += ne_fprintf(f, "%s%s", sep,
2871                                   tcg_get_arg_str(s, buf, sizeof(buf),
2872                                                   op->args[k++]));
2873             }
2874             switch (c) {
2875             case INDEX_op_brcond_i32:
2876             case INDEX_op_setcond_i32:
2877             case INDEX_op_negsetcond_i32:
2878             case INDEX_op_movcond_i32:
2879             case INDEX_op_brcond2_i32:
2880             case INDEX_op_setcond2_i32:
2881             case INDEX_op_brcond_i64:
2882             case INDEX_op_setcond_i64:
2883             case INDEX_op_negsetcond_i64:
2884             case INDEX_op_movcond_i64:
2885             case INDEX_op_cmp_vec:
2886             case INDEX_op_cmpsel_vec:
2887                 if (op->args[k] < ARRAY_SIZE(cond_name)
2888                     && cond_name[op->args[k]]) {
2889                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2890                 } else {
2891                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2892                 }
2893                 i = 1;
2894                 break;
2895             case INDEX_op_qemu_ld_i32:
2896             case INDEX_op_qemu_st_i32:
2897             case INDEX_op_qemu_st8_i32:
2898             case INDEX_op_qemu_ld_i64:
2899             case INDEX_op_qemu_st_i64:
2900             case INDEX_op_qemu_ld_i128:
2901             case INDEX_op_qemu_st_i128:
2902                 {
2903                     const char *s_al, *s_op, *s_at;
2904                     MemOpIdx oi = op->args[k++];
2905                     MemOp mop = get_memop(oi);
2906                     unsigned ix = get_mmuidx(oi);
2907 
2908                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2909                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2910                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2911                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2912 
2913                     /* If all fields are accounted for, print symbolically. */
2914                     if (!mop && s_al && s_op && s_at) {
2915                         col += ne_fprintf(f, ",%s%s%s,%u",
2916                                           s_at, s_al, s_op, ix);
2917                     } else {
2918                         mop = get_memop(oi);
2919                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2920                     }
2921                     i = 1;
2922                 }
2923                 break;
2924             case INDEX_op_bswap16_i32:
2925             case INDEX_op_bswap16_i64:
2926             case INDEX_op_bswap32_i32:
2927             case INDEX_op_bswap32_i64:
2928             case INDEX_op_bswap64_i64:
2929                 {
2930                     TCGArg flags = op->args[k];
2931                     const char *name = NULL;
2932 
2933                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2934                         name = bswap_flag_name[flags];
2935                     }
2936                     if (name) {
2937                         col += ne_fprintf(f, ",%s", name);
2938                     } else {
2939                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2940                     }
2941                     i = k = 1;
2942                 }
2943                 break;
2944 #ifdef CONFIG_PLUGIN
2945             case INDEX_op_plugin_cb:
2946                 {
2947                     TCGArg from = op->args[k++];
2948                     const char *name = NULL;
2949 
2950                     if (from < ARRAY_SIZE(plugin_from_name)) {
2951                         name = plugin_from_name[from];
2952                     }
2953                     if (name) {
2954                         col += ne_fprintf(f, "%s", name);
2955                     } else {
2956                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2957                     }
2958                     i = 1;
2959                 }
2960                 break;
2961 #endif
2962             default:
2963                 i = 0;
2964                 break;
2965             }
2966             switch (c) {
2967             case INDEX_op_set_label:
2968             case INDEX_op_br:
2969             case INDEX_op_brcond_i32:
2970             case INDEX_op_brcond_i64:
2971             case INDEX_op_brcond2_i32:
2972                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2973                                   arg_label(op->args[k])->id);
2974                 i++, k++;
2975                 break;
2976             case INDEX_op_mb:
2977                 {
2978                     TCGBar membar = op->args[k];
2979                     const char *b_op, *m_op;
2980 
2981                     switch (membar & TCG_BAR_SC) {
2982                     case 0:
2983                         b_op = "none";
2984                         break;
2985                     case TCG_BAR_LDAQ:
2986                         b_op = "acq";
2987                         break;
2988                     case TCG_BAR_STRL:
2989                         b_op = "rel";
2990                         break;
2991                     case TCG_BAR_SC:
2992                         b_op = "seq";
2993                         break;
2994                     default:
2995                         g_assert_not_reached();
2996                     }
2997 
2998                     switch (membar & TCG_MO_ALL) {
2999                     case 0:
3000                         m_op = "none";
3001                         break;
3002                     case TCG_MO_LD_LD:
3003                         m_op = "rr";
3004                         break;
3005                     case TCG_MO_LD_ST:
3006                         m_op = "rw";
3007                         break;
3008                     case TCG_MO_ST_LD:
3009                         m_op = "wr";
3010                         break;
3011                     case TCG_MO_ST_ST:
3012                         m_op = "ww";
3013                         break;
3014                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3015                         m_op = "rr+rw";
3016                         break;
3017                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3018                         m_op = "rr+wr";
3019                         break;
3020                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3021                         m_op = "rr+ww";
3022                         break;
3023                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3024                         m_op = "rw+wr";
3025                         break;
3026                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3027                         m_op = "rw+ww";
3028                         break;
3029                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3030                         m_op = "wr+ww";
3031                         break;
3032                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3033                         m_op = "rr+rw+wr";
3034                         break;
3035                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3036                         m_op = "rr+rw+ww";
3037                         break;
3038                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3039                         m_op = "rr+wr+ww";
3040                         break;
3041                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3042                         m_op = "rw+wr+ww";
3043                         break;
3044                     case TCG_MO_ALL:
3045                         m_op = "all";
3046                         break;
3047                     default:
3048                         g_assert_not_reached();
3049                     }
3050 
3051                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3052                     i++, k++;
3053                 }
3054                 break;
3055             default:
3056                 break;
3057             }
3058             for (; i < nb_cargs; i++, k++) {
3059                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3060                                   op->args[k]);
3061             }
3062         }
3063 
3064         if (have_prefs || op->life) {
3065             for (; col < 40; ++col) {
3066                 putc(' ', f);
3067             }
3068         }
3069 
3070         if (op->life) {
3071             unsigned life = op->life;
3072 
3073             if (life & (SYNC_ARG * 3)) {
3074                 ne_fprintf(f, "  sync:");
3075                 for (i = 0; i < 2; ++i) {
3076                     if (life & (SYNC_ARG << i)) {
3077                         ne_fprintf(f, " %d", i);
3078                     }
3079                 }
3080             }
3081             life /= DEAD_ARG;
3082             if (life) {
3083                 ne_fprintf(f, "  dead:");
3084                 for (i = 0; life; ++i, life >>= 1) {
3085                     if (life & 1) {
3086                         ne_fprintf(f, " %d", i);
3087                     }
3088                 }
3089             }
3090         }
3091 
3092         if (have_prefs) {
3093             for (i = 0; i < nb_oargs; ++i) {
3094                 TCGRegSet set = output_pref(op, i);
3095 
3096                 if (i == 0) {
3097                     ne_fprintf(f, "  pref=");
3098                 } else {
3099                     ne_fprintf(f, ",");
3100                 }
3101                 if (set == 0) {
3102                     ne_fprintf(f, "none");
3103                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3104                     ne_fprintf(f, "all");
3105 #ifdef CONFIG_DEBUG_TCG
3106                 } else if (tcg_regset_single(set)) {
3107                     TCGReg reg = tcg_regset_first(set);
3108                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3109 #endif
3110                 } else if (TCG_TARGET_NB_REGS <= 32) {
3111                     ne_fprintf(f, "0x%x", (uint32_t)set);
3112                 } else {
3113                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3114                 }
3115             }
3116         }
3117 
3118         putc('\n', f);
3119     }
3120 }
3121 
3122 /* we give more priority to constraints with less registers */
3123 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3124 {
3125     int n;
3126 
3127     arg_ct += k;
3128     n = ctpop64(arg_ct->regs);
3129 
3130     /*
3131      * Sort constraints of a single register first, which includes output
3132      * aliases (which must exactly match the input already allocated).
3133      */
3134     if (n == 1 || arg_ct->oalias) {
3135         return INT_MAX;
3136     }
3137 
3138     /*
3139      * Sort register pairs next, first then second immediately after.
3140      * Arbitrarily sort multiple pairs by the index of the first reg;
3141      * there shouldn't be many pairs.
3142      */
3143     switch (arg_ct->pair) {
3144     case 1:
3145     case 3:
3146         return (k + 1) * 2;
3147     case 2:
3148         return (arg_ct->pair_index + 1) * 2 - 1;
3149     }
3150 
3151     /* Finally, sort by decreasing register count. */
3152     assert(n > 1);
3153     return -n;
3154 }
3155 
3156 /* sort from highest priority to lowest */
3157 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3158 {
3159     int i, j;
3160 
3161     for (i = 0; i < n; i++) {
3162         a[start + i].sort_index = start + i;
3163     }
3164     if (n <= 1) {
3165         return;
3166     }
3167     for (i = 0; i < n - 1; i++) {
3168         for (j = i + 1; j < n; j++) {
3169             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3170             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3171             if (p1 < p2) {
3172                 int tmp = a[start + i].sort_index;
3173                 a[start + i].sort_index = a[start + j].sort_index;
3174                 a[start + j].sort_index = tmp;
3175             }
3176         }
3177     }
3178 }
3179 
3180 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3181 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3182 
3183 static void process_constraint_sets(void)
3184 {
3185     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3186         const TCGConstraintSet *tdefs = &constraint_sets[c];
3187         TCGArgConstraint *args_ct = all_cts[c];
3188         int nb_oargs = tdefs->nb_oargs;
3189         int nb_iargs = tdefs->nb_iargs;
3190         int nb_args = nb_oargs + nb_iargs;
3191         bool saw_alias_pair = false;
3192 
3193         for (int i = 0; i < nb_args; i++) {
3194             const char *ct_str = tdefs->args_ct_str[i];
3195             bool input_p = i >= nb_oargs;
3196             int o;
3197 
3198             switch (*ct_str) {
3199             case '0' ... '9':
3200                 o = *ct_str - '0';
3201                 tcg_debug_assert(input_p);
3202                 tcg_debug_assert(o < nb_oargs);
3203                 tcg_debug_assert(args_ct[o].regs != 0);
3204                 tcg_debug_assert(!args_ct[o].oalias);
3205                 args_ct[i] = args_ct[o];
3206                 /* The output sets oalias.  */
3207                 args_ct[o].oalias = 1;
3208                 args_ct[o].alias_index = i;
3209                 /* The input sets ialias. */
3210                 args_ct[i].ialias = 1;
3211                 args_ct[i].alias_index = o;
3212                 if (args_ct[i].pair) {
3213                     saw_alias_pair = true;
3214                 }
3215                 tcg_debug_assert(ct_str[1] == '\0');
3216                 continue;
3217 
3218             case '&':
3219                 tcg_debug_assert(!input_p);
3220                 args_ct[i].newreg = true;
3221                 ct_str++;
3222                 break;
3223 
3224             case 'p': /* plus */
3225                 /* Allocate to the register after the previous. */
3226                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3227                 o = i - 1;
3228                 tcg_debug_assert(!args_ct[o].pair);
3229                 tcg_debug_assert(!args_ct[o].ct);
3230                 args_ct[i] = (TCGArgConstraint){
3231                     .pair = 2,
3232                     .pair_index = o,
3233                     .regs = args_ct[o].regs << 1,
3234                     .newreg = args_ct[o].newreg,
3235                 };
3236                 args_ct[o].pair = 1;
3237                 args_ct[o].pair_index = i;
3238                 tcg_debug_assert(ct_str[1] == '\0');
3239                 continue;
3240 
3241             case 'm': /* minus */
3242                 /* Allocate to the register before the previous. */
3243                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3244                 o = i - 1;
3245                 tcg_debug_assert(!args_ct[o].pair);
3246                 tcg_debug_assert(!args_ct[o].ct);
3247                 args_ct[i] = (TCGArgConstraint){
3248                     .pair = 1,
3249                     .pair_index = o,
3250                     .regs = args_ct[o].regs >> 1,
3251                     .newreg = args_ct[o].newreg,
3252                 };
3253                 args_ct[o].pair = 2;
3254                 args_ct[o].pair_index = i;
3255                 tcg_debug_assert(ct_str[1] == '\0');
3256                 continue;
3257             }
3258 
3259             do {
3260                 switch (*ct_str) {
3261                 case 'i':
3262                     args_ct[i].ct |= TCG_CT_CONST;
3263                     break;
3264 #ifdef TCG_REG_ZERO
3265                 case 'z':
3266                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3267                     break;
3268 #endif
3269 
3270                 /* Include all of the target-specific constraints. */
3271 
3272 #undef CONST
3273 #define CONST(CASE, MASK) \
3274     case CASE: args_ct[i].ct |= MASK; break;
3275 #define REGS(CASE, MASK) \
3276     case CASE: args_ct[i].regs |= MASK; break;
3277 
3278 #include "tcg-target-con-str.h"
3279 
3280 #undef REGS
3281 #undef CONST
3282                 default:
3283                 case '0' ... '9':
3284                 case '&':
3285                 case 'p':
3286                 case 'm':
3287                     /* Typo in TCGConstraintSet constraint. */
3288                     g_assert_not_reached();
3289                 }
3290             } while (*++ct_str != '\0');
3291         }
3292 
3293         /*
3294          * Fix up output pairs that are aliased with inputs.
3295          * When we created the alias, we copied pair from the output.
3296          * There are three cases:
3297          *    (1a) Pairs of inputs alias pairs of outputs.
3298          *    (1b) One input aliases the first of a pair of outputs.
3299          *    (2)  One input aliases the second of a pair of outputs.
3300          *
3301          * Case 1a is handled by making sure that the pair_index'es are
3302          * properly updated so that they appear the same as a pair of inputs.
3303          *
3304          * Case 1b is handled by setting the pair_index of the input to
3305          * itself, simply so it doesn't point to an unrelated argument.
3306          * Since we don't encounter the "second" during the input allocation
3307          * phase, nothing happens with the second half of the input pair.
3308          *
3309          * Case 2 is handled by setting the second input to pair=3, the
3310          * first output to pair=3, and the pair_index'es to match.
3311          */
3312         if (saw_alias_pair) {
3313             for (int i = nb_oargs; i < nb_args; i++) {
3314                 int o, o2, i2;
3315 
3316                 /*
3317                  * Since [0-9pm] must be alone in the constraint string,
3318                  * the only way they can both be set is if the pair comes
3319                  * from the output alias.
3320                  */
3321                 if (!args_ct[i].ialias) {
3322                     continue;
3323                 }
3324                 switch (args_ct[i].pair) {
3325                 case 0:
3326                     break;
3327                 case 1:
3328                     o = args_ct[i].alias_index;
3329                     o2 = args_ct[o].pair_index;
3330                     tcg_debug_assert(args_ct[o].pair == 1);
3331                     tcg_debug_assert(args_ct[o2].pair == 2);
3332                     if (args_ct[o2].oalias) {
3333                         /* Case 1a */
3334                         i2 = args_ct[o2].alias_index;
3335                         tcg_debug_assert(args_ct[i2].pair == 2);
3336                         args_ct[i2].pair_index = i;
3337                         args_ct[i].pair_index = i2;
3338                     } else {
3339                         /* Case 1b */
3340                         args_ct[i].pair_index = i;
3341                     }
3342                     break;
3343                 case 2:
3344                     o = args_ct[i].alias_index;
3345                     o2 = args_ct[o].pair_index;
3346                     tcg_debug_assert(args_ct[o].pair == 2);
3347                     tcg_debug_assert(args_ct[o2].pair == 1);
3348                     if (args_ct[o2].oalias) {
3349                         /* Case 1a */
3350                         i2 = args_ct[o2].alias_index;
3351                         tcg_debug_assert(args_ct[i2].pair == 1);
3352                         args_ct[i2].pair_index = i;
3353                         args_ct[i].pair_index = i2;
3354                     } else {
3355                         /* Case 2 */
3356                         args_ct[i].pair = 3;
3357                         args_ct[o2].pair = 3;
3358                         args_ct[i].pair_index = o2;
3359                         args_ct[o2].pair_index = i;
3360                     }
3361                     break;
3362                 default:
3363                     g_assert_not_reached();
3364                 }
3365             }
3366         }
3367 
3368         /* sort the constraints (XXX: this is just an heuristic) */
3369         sort_constraints(args_ct, 0, nb_oargs);
3370         sort_constraints(args_ct, nb_oargs, nb_iargs);
3371     }
3372 }
3373 
3374 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3375 {
3376     TCGOpcode opc = op->opc;
3377     TCGType type = TCGOP_TYPE(op);
3378     unsigned flags = TCGOP_FLAGS(op);
3379     const TCGOpDef *def = &tcg_op_defs[opc];
3380     const TCGOutOp *outop = all_outop[opc];
3381     TCGConstraintSetIndex con_set;
3382 
3383     if (def->flags & TCG_OPF_NOT_PRESENT) {
3384         return empty_cts;
3385     }
3386 
3387     if (outop) {
3388         con_set = outop->static_constraint;
3389         if (con_set == C_Dynamic) {
3390             con_set = outop->dynamic_constraint(type, flags);
3391         }
3392     } else {
3393         con_set = tcg_target_op_def(opc, type, flags);
3394     }
3395     tcg_debug_assert(con_set >= 0);
3396     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3397 
3398     /* The constraint arguments must match TCGOpcode arguments. */
3399     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3400     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3401 
3402     return all_cts[con_set];
3403 }
3404 
3405 static void remove_label_use(TCGOp *op, int idx)
3406 {
3407     TCGLabel *label = arg_label(op->args[idx]);
3408     TCGLabelUse *use;
3409 
3410     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3411         if (use->op == op) {
3412             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3413             return;
3414         }
3415     }
3416     g_assert_not_reached();
3417 }
3418 
3419 void tcg_op_remove(TCGContext *s, TCGOp *op)
3420 {
3421     switch (op->opc) {
3422     case INDEX_op_br:
3423         remove_label_use(op, 0);
3424         break;
3425     case INDEX_op_brcond_i32:
3426     case INDEX_op_brcond_i64:
3427         remove_label_use(op, 3);
3428         break;
3429     case INDEX_op_brcond2_i32:
3430         remove_label_use(op, 5);
3431         break;
3432     default:
3433         break;
3434     }
3435 
3436     QTAILQ_REMOVE(&s->ops, op, link);
3437     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3438     s->nb_ops--;
3439 }
3440 
3441 void tcg_remove_ops_after(TCGOp *op)
3442 {
3443     TCGContext *s = tcg_ctx;
3444 
3445     while (true) {
3446         TCGOp *last = tcg_last_op();
3447         if (last == op) {
3448             return;
3449         }
3450         tcg_op_remove(s, last);
3451     }
3452 }
3453 
3454 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3455 {
3456     TCGContext *s = tcg_ctx;
3457     TCGOp *op = NULL;
3458 
3459     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3460         QTAILQ_FOREACH(op, &s->free_ops, link) {
3461             if (nargs <= op->nargs) {
3462                 QTAILQ_REMOVE(&s->free_ops, op, link);
3463                 nargs = op->nargs;
3464                 goto found;
3465             }
3466         }
3467     }
3468 
3469     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3470     nargs = MAX(4, nargs);
3471     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3472 
3473  found:
3474     memset(op, 0, offsetof(TCGOp, link));
3475     op->opc = opc;
3476     op->nargs = nargs;
3477 
3478     /* Check for bitfield overflow. */
3479     tcg_debug_assert(op->nargs == nargs);
3480 
3481     s->nb_ops++;
3482     return op;
3483 }
3484 
3485 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3486 {
3487     TCGOp *op = tcg_op_alloc(opc, nargs);
3488 
3489     if (tcg_ctx->emit_before_op) {
3490         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3491     } else {
3492         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3493     }
3494     return op;
3495 }
3496 
3497 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3498                             TCGOpcode opc, TCGType type, unsigned nargs)
3499 {
3500     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3501 
3502     TCGOP_TYPE(new_op) = type;
3503     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3504     return new_op;
3505 }
3506 
3507 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3508                            TCGOpcode opc, TCGType type, unsigned nargs)
3509 {
3510     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3511 
3512     TCGOP_TYPE(new_op) = type;
3513     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3514     return new_op;
3515 }
3516 
3517 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3518 {
3519     TCGLabelUse *u;
3520 
3521     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3522         TCGOp *op = u->op;
3523         switch (op->opc) {
3524         case INDEX_op_br:
3525             op->args[0] = label_arg(to);
3526             break;
3527         case INDEX_op_brcond_i32:
3528         case INDEX_op_brcond_i64:
3529             op->args[3] = label_arg(to);
3530             break;
3531         case INDEX_op_brcond2_i32:
3532             op->args[5] = label_arg(to);
3533             break;
3534         default:
3535             g_assert_not_reached();
3536         }
3537     }
3538 
3539     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3540 }
3541 
3542 /* Reachable analysis : remove unreachable code.  */
3543 static void __attribute__((noinline))
3544 reachable_code_pass(TCGContext *s)
3545 {
3546     TCGOp *op, *op_next, *op_prev;
3547     bool dead = false;
3548 
3549     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3550         bool remove = dead;
3551         TCGLabel *label;
3552 
3553         switch (op->opc) {
3554         case INDEX_op_set_label:
3555             label = arg_label(op->args[0]);
3556 
3557             /*
3558              * Note that the first op in the TB is always a load,
3559              * so there is always something before a label.
3560              */
3561             op_prev = QTAILQ_PREV(op, link);
3562 
3563             /*
3564              * If we find two sequential labels, move all branches to
3565              * reference the second label and remove the first label.
3566              * Do this before branch to next optimization, so that the
3567              * middle label is out of the way.
3568              */
3569             if (op_prev->opc == INDEX_op_set_label) {
3570                 move_label_uses(label, arg_label(op_prev->args[0]));
3571                 tcg_op_remove(s, op_prev);
3572                 op_prev = QTAILQ_PREV(op, link);
3573             }
3574 
3575             /*
3576              * Optimization can fold conditional branches to unconditional.
3577              * If we find a label which is preceded by an unconditional
3578              * branch to next, remove the branch.  We couldn't do this when
3579              * processing the branch because any dead code between the branch
3580              * and label had not yet been removed.
3581              */
3582             if (op_prev->opc == INDEX_op_br &&
3583                 label == arg_label(op_prev->args[0])) {
3584                 tcg_op_remove(s, op_prev);
3585                 /* Fall through means insns become live again.  */
3586                 dead = false;
3587             }
3588 
3589             if (QSIMPLEQ_EMPTY(&label->branches)) {
3590                 /*
3591                  * While there is an occasional backward branch, virtually
3592                  * all branches generated by the translators are forward.
3593                  * Which means that generally we will have already removed
3594                  * all references to the label that will be, and there is
3595                  * little to be gained by iterating.
3596                  */
3597                 remove = true;
3598             } else {
3599                 /* Once we see a label, insns become live again.  */
3600                 dead = false;
3601                 remove = false;
3602             }
3603             break;
3604 
3605         case INDEX_op_br:
3606         case INDEX_op_exit_tb:
3607         case INDEX_op_goto_ptr:
3608             /* Unconditional branches; everything following is dead.  */
3609             dead = true;
3610             break;
3611 
3612         case INDEX_op_call:
3613             /* Notice noreturn helper calls, raising exceptions.  */
3614             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3615                 dead = true;
3616             }
3617             break;
3618 
3619         case INDEX_op_insn_start:
3620             /* Never remove -- we need to keep these for unwind.  */
3621             remove = false;
3622             break;
3623 
3624         default:
3625             break;
3626         }
3627 
3628         if (remove) {
3629             tcg_op_remove(s, op);
3630         }
3631     }
3632 }
3633 
3634 #define TS_DEAD  1
3635 #define TS_MEM   2
3636 
3637 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3638 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3639 
3640 /* For liveness_pass_1, the register preferences for a given temp.  */
3641 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3642 {
3643     return ts->state_ptr;
3644 }
3645 
3646 /* For liveness_pass_1, reset the preferences for a given temp to the
3647  * maximal regset for its type.
3648  */
3649 static inline void la_reset_pref(TCGTemp *ts)
3650 {
3651     *la_temp_pref(ts)
3652         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3653 }
3654 
3655 /* liveness analysis: end of function: all temps are dead, and globals
3656    should be in memory. */
3657 static void la_func_end(TCGContext *s, int ng, int nt)
3658 {
3659     int i;
3660 
3661     for (i = 0; i < ng; ++i) {
3662         s->temps[i].state = TS_DEAD | TS_MEM;
3663         la_reset_pref(&s->temps[i]);
3664     }
3665     for (i = ng; i < nt; ++i) {
3666         s->temps[i].state = TS_DEAD;
3667         la_reset_pref(&s->temps[i]);
3668     }
3669 }
3670 
3671 /* liveness analysis: end of basic block: all temps are dead, globals
3672    and local temps should be in memory. */
3673 static void la_bb_end(TCGContext *s, int ng, int nt)
3674 {
3675     int i;
3676 
3677     for (i = 0; i < nt; ++i) {
3678         TCGTemp *ts = &s->temps[i];
3679         int state;
3680 
3681         switch (ts->kind) {
3682         case TEMP_FIXED:
3683         case TEMP_GLOBAL:
3684         case TEMP_TB:
3685             state = TS_DEAD | TS_MEM;
3686             break;
3687         case TEMP_EBB:
3688         case TEMP_CONST:
3689             state = TS_DEAD;
3690             break;
3691         default:
3692             g_assert_not_reached();
3693         }
3694         ts->state = state;
3695         la_reset_pref(ts);
3696     }
3697 }
3698 
3699 /* liveness analysis: sync globals back to memory.  */
3700 static void la_global_sync(TCGContext *s, int ng)
3701 {
3702     int i;
3703 
3704     for (i = 0; i < ng; ++i) {
3705         int state = s->temps[i].state;
3706         s->temps[i].state = state | TS_MEM;
3707         if (state == TS_DEAD) {
3708             /* If the global was previously dead, reset prefs.  */
3709             la_reset_pref(&s->temps[i]);
3710         }
3711     }
3712 }
3713 
3714 /*
3715  * liveness analysis: conditional branch: all temps are dead unless
3716  * explicitly live-across-conditional-branch, globals and local temps
3717  * should be synced.
3718  */
3719 static void la_bb_sync(TCGContext *s, int ng, int nt)
3720 {
3721     la_global_sync(s, ng);
3722 
3723     for (int i = ng; i < nt; ++i) {
3724         TCGTemp *ts = &s->temps[i];
3725         int state;
3726 
3727         switch (ts->kind) {
3728         case TEMP_TB:
3729             state = ts->state;
3730             ts->state = state | TS_MEM;
3731             if (state != TS_DEAD) {
3732                 continue;
3733             }
3734             break;
3735         case TEMP_EBB:
3736         case TEMP_CONST:
3737             continue;
3738         default:
3739             g_assert_not_reached();
3740         }
3741         la_reset_pref(&s->temps[i]);
3742     }
3743 }
3744 
3745 /* liveness analysis: sync globals back to memory and kill.  */
3746 static void la_global_kill(TCGContext *s, int ng)
3747 {
3748     int i;
3749 
3750     for (i = 0; i < ng; i++) {
3751         s->temps[i].state = TS_DEAD | TS_MEM;
3752         la_reset_pref(&s->temps[i]);
3753     }
3754 }
3755 
3756 /* liveness analysis: note live globals crossing calls.  */
3757 static void la_cross_call(TCGContext *s, int nt)
3758 {
3759     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3760     int i;
3761 
3762     for (i = 0; i < nt; i++) {
3763         TCGTemp *ts = &s->temps[i];
3764         if (!(ts->state & TS_DEAD)) {
3765             TCGRegSet *pset = la_temp_pref(ts);
3766             TCGRegSet set = *pset;
3767 
3768             set &= mask;
3769             /* If the combination is not possible, restart.  */
3770             if (set == 0) {
3771                 set = tcg_target_available_regs[ts->type] & mask;
3772             }
3773             *pset = set;
3774         }
3775     }
3776 }
3777 
3778 /*
3779  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3780  * to TEMP_EBB, if possible.
3781  */
3782 static void __attribute__((noinline))
3783 liveness_pass_0(TCGContext *s)
3784 {
3785     void * const multiple_ebb = (void *)(uintptr_t)-1;
3786     int nb_temps = s->nb_temps;
3787     TCGOp *op, *ebb;
3788 
3789     for (int i = s->nb_globals; i < nb_temps; ++i) {
3790         s->temps[i].state_ptr = NULL;
3791     }
3792 
3793     /*
3794      * Represent each EBB by the op at which it begins.  In the case of
3795      * the first EBB, this is the first op, otherwise it is a label.
3796      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3797      * within a single EBB, else MULTIPLE_EBB.
3798      */
3799     ebb = QTAILQ_FIRST(&s->ops);
3800     QTAILQ_FOREACH(op, &s->ops, link) {
3801         const TCGOpDef *def;
3802         int nb_oargs, nb_iargs;
3803 
3804         switch (op->opc) {
3805         case INDEX_op_set_label:
3806             ebb = op;
3807             continue;
3808         case INDEX_op_discard:
3809             continue;
3810         case INDEX_op_call:
3811             nb_oargs = TCGOP_CALLO(op);
3812             nb_iargs = TCGOP_CALLI(op);
3813             break;
3814         default:
3815             def = &tcg_op_defs[op->opc];
3816             nb_oargs = def->nb_oargs;
3817             nb_iargs = def->nb_iargs;
3818             break;
3819         }
3820 
3821         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3822             TCGTemp *ts = arg_temp(op->args[i]);
3823 
3824             if (ts->kind != TEMP_TB) {
3825                 continue;
3826             }
3827             if (ts->state_ptr == NULL) {
3828                 ts->state_ptr = ebb;
3829             } else if (ts->state_ptr != ebb) {
3830                 ts->state_ptr = multiple_ebb;
3831             }
3832         }
3833     }
3834 
3835     /*
3836      * For TEMP_TB that turned out not to be used beyond one EBB,
3837      * reduce the liveness to TEMP_EBB.
3838      */
3839     for (int i = s->nb_globals; i < nb_temps; ++i) {
3840         TCGTemp *ts = &s->temps[i];
3841         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3842             ts->kind = TEMP_EBB;
3843         }
3844     }
3845 }
3846 
3847 /* Liveness analysis : update the opc_arg_life array to tell if a
3848    given input arguments is dead. Instructions updating dead
3849    temporaries are removed. */
3850 static void __attribute__((noinline))
3851 liveness_pass_1(TCGContext *s)
3852 {
3853     int nb_globals = s->nb_globals;
3854     int nb_temps = s->nb_temps;
3855     TCGOp *op, *op_prev;
3856     TCGRegSet *prefs;
3857     int i;
3858 
3859     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3860     for (i = 0; i < nb_temps; ++i) {
3861         s->temps[i].state_ptr = prefs + i;
3862     }
3863 
3864     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3865     la_func_end(s, nb_globals, nb_temps);
3866 
3867     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3868         int nb_iargs, nb_oargs;
3869         TCGOpcode opc_new, opc_new2;
3870         TCGLifeData arg_life = 0;
3871         TCGTemp *ts;
3872         TCGOpcode opc = op->opc;
3873         const TCGOpDef *def = &tcg_op_defs[opc];
3874         const TCGArgConstraint *args_ct;
3875 
3876         switch (opc) {
3877         case INDEX_op_call:
3878             {
3879                 const TCGHelperInfo *info = tcg_call_info(op);
3880                 int call_flags = tcg_call_flags(op);
3881 
3882                 nb_oargs = TCGOP_CALLO(op);
3883                 nb_iargs = TCGOP_CALLI(op);
3884 
3885                 /* pure functions can be removed if their result is unused */
3886                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3887                     for (i = 0; i < nb_oargs; i++) {
3888                         ts = arg_temp(op->args[i]);
3889                         if (ts->state != TS_DEAD) {
3890                             goto do_not_remove_call;
3891                         }
3892                     }
3893                     goto do_remove;
3894                 }
3895             do_not_remove_call:
3896 
3897                 /* Output args are dead.  */
3898                 for (i = 0; i < nb_oargs; i++) {
3899                     ts = arg_temp(op->args[i]);
3900                     if (ts->state & TS_DEAD) {
3901                         arg_life |= DEAD_ARG << i;
3902                     }
3903                     if (ts->state & TS_MEM) {
3904                         arg_life |= SYNC_ARG << i;
3905                     }
3906                     ts->state = TS_DEAD;
3907                     la_reset_pref(ts);
3908                 }
3909 
3910                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3911                 memset(op->output_pref, 0, sizeof(op->output_pref));
3912 
3913                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3914                                     TCG_CALL_NO_READ_GLOBALS))) {
3915                     la_global_kill(s, nb_globals);
3916                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3917                     la_global_sync(s, nb_globals);
3918                 }
3919 
3920                 /* Record arguments that die in this helper.  */
3921                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3922                     ts = arg_temp(op->args[i]);
3923                     if (ts->state & TS_DEAD) {
3924                         arg_life |= DEAD_ARG << i;
3925                     }
3926                 }
3927 
3928                 /* For all live registers, remove call-clobbered prefs.  */
3929                 la_cross_call(s, nb_temps);
3930 
3931                 /*
3932                  * Input arguments are live for preceding opcodes.
3933                  *
3934                  * For those arguments that die, and will be allocated in
3935                  * registers, clear the register set for that arg, to be
3936                  * filled in below.  For args that will be on the stack,
3937                  * reset to any available reg.  Process arguments in reverse
3938                  * order so that if a temp is used more than once, the stack
3939                  * reset to max happens before the register reset to 0.
3940                  */
3941                 for (i = nb_iargs - 1; i >= 0; i--) {
3942                     const TCGCallArgumentLoc *loc = &info->in[i];
3943                     ts = arg_temp(op->args[nb_oargs + i]);
3944 
3945                     if (ts->state & TS_DEAD) {
3946                         switch (loc->kind) {
3947                         case TCG_CALL_ARG_NORMAL:
3948                         case TCG_CALL_ARG_EXTEND_U:
3949                         case TCG_CALL_ARG_EXTEND_S:
3950                             if (arg_slot_reg_p(loc->arg_slot)) {
3951                                 *la_temp_pref(ts) = 0;
3952                                 break;
3953                             }
3954                             /* fall through */
3955                         default:
3956                             *la_temp_pref(ts) =
3957                                 tcg_target_available_regs[ts->type];
3958                             break;
3959                         }
3960                         ts->state &= ~TS_DEAD;
3961                     }
3962                 }
3963 
3964                 /*
3965                  * For each input argument, add its input register to prefs.
3966                  * If a temp is used once, this produces a single set bit;
3967                  * if a temp is used multiple times, this produces a set.
3968                  */
3969                 for (i = 0; i < nb_iargs; i++) {
3970                     const TCGCallArgumentLoc *loc = &info->in[i];
3971                     ts = arg_temp(op->args[nb_oargs + i]);
3972 
3973                     switch (loc->kind) {
3974                     case TCG_CALL_ARG_NORMAL:
3975                     case TCG_CALL_ARG_EXTEND_U:
3976                     case TCG_CALL_ARG_EXTEND_S:
3977                         if (arg_slot_reg_p(loc->arg_slot)) {
3978                             tcg_regset_set_reg(*la_temp_pref(ts),
3979                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3980                         }
3981                         break;
3982                     default:
3983                         break;
3984                     }
3985                 }
3986             }
3987             break;
3988         case INDEX_op_insn_start:
3989             break;
3990         case INDEX_op_discard:
3991             /* mark the temporary as dead */
3992             ts = arg_temp(op->args[0]);
3993             ts->state = TS_DEAD;
3994             la_reset_pref(ts);
3995             break;
3996 
3997         case INDEX_op_add2_i32:
3998         case INDEX_op_add2_i64:
3999             opc_new = INDEX_op_add;
4000             goto do_addsub2;
4001         case INDEX_op_sub2_i32:
4002         case INDEX_op_sub2_i64:
4003             opc_new = INDEX_op_sub;
4004         do_addsub2:
4005             nb_iargs = 4;
4006             nb_oargs = 2;
4007             /* Test if the high part of the operation is dead, but not
4008                the low part.  The result can be optimized to a simple
4009                add or sub.  This happens often for x86_64 guest when the
4010                cpu mode is set to 32 bit.  */
4011             if (arg_temp(op->args[1])->state == TS_DEAD) {
4012                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4013                     goto do_remove;
4014                 }
4015                 /* Replace the opcode and adjust the args in place,
4016                    leaving 3 unused args at the end.  */
4017                 op->opc = opc = opc_new;
4018                 op->args[1] = op->args[2];
4019                 op->args[2] = op->args[4];
4020                 /* Fall through and mark the single-word operation live.  */
4021                 nb_iargs = 2;
4022                 nb_oargs = 1;
4023             }
4024             goto do_not_remove;
4025 
4026         case INDEX_op_muls2_i32:
4027         case INDEX_op_muls2_i64:
4028             opc_new = INDEX_op_mul;
4029             opc_new2 = INDEX_op_mulsh;
4030             goto do_mul2;
4031         case INDEX_op_mulu2_i32:
4032         case INDEX_op_mulu2_i64:
4033             opc_new = INDEX_op_mul;
4034             opc_new2 = INDEX_op_muluh;
4035         do_mul2:
4036             nb_iargs = 2;
4037             nb_oargs = 2;
4038             if (arg_temp(op->args[1])->state == TS_DEAD) {
4039                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4040                     /* Both parts of the operation are dead.  */
4041                     goto do_remove;
4042                 }
4043                 /* The high part of the operation is dead; generate the low. */
4044                 op->opc = opc = opc_new;
4045                 op->args[1] = op->args[2];
4046                 op->args[2] = op->args[3];
4047             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4048                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4049                 /* The low part of the operation is dead; generate the high. */
4050                 op->opc = opc = opc_new2;
4051                 op->args[0] = op->args[1];
4052                 op->args[1] = op->args[2];
4053                 op->args[2] = op->args[3];
4054             } else {
4055                 goto do_not_remove;
4056             }
4057             /* Mark the single-word operation live.  */
4058             nb_oargs = 1;
4059             goto do_not_remove;
4060 
4061         default:
4062             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4063             nb_iargs = def->nb_iargs;
4064             nb_oargs = def->nb_oargs;
4065 
4066             /* Test if the operation can be removed because all
4067                its outputs are dead. We assume that nb_oargs == 0
4068                implies side effects */
4069             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4070                 for (i = 0; i < nb_oargs; i++) {
4071                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4072                         goto do_not_remove;
4073                     }
4074                 }
4075                 goto do_remove;
4076             }
4077             goto do_not_remove;
4078 
4079         do_remove:
4080             tcg_op_remove(s, op);
4081             break;
4082 
4083         do_not_remove:
4084             for (i = 0; i < nb_oargs; i++) {
4085                 ts = arg_temp(op->args[i]);
4086 
4087                 /* Remember the preference of the uses that followed.  */
4088                 if (i < ARRAY_SIZE(op->output_pref)) {
4089                     op->output_pref[i] = *la_temp_pref(ts);
4090                 }
4091 
4092                 /* Output args are dead.  */
4093                 if (ts->state & TS_DEAD) {
4094                     arg_life |= DEAD_ARG << i;
4095                 }
4096                 if (ts->state & TS_MEM) {
4097                     arg_life |= SYNC_ARG << i;
4098                 }
4099                 ts->state = TS_DEAD;
4100                 la_reset_pref(ts);
4101             }
4102 
4103             /* If end of basic block, update.  */
4104             if (def->flags & TCG_OPF_BB_EXIT) {
4105                 la_func_end(s, nb_globals, nb_temps);
4106             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4107                 la_bb_sync(s, nb_globals, nb_temps);
4108             } else if (def->flags & TCG_OPF_BB_END) {
4109                 la_bb_end(s, nb_globals, nb_temps);
4110             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4111                 la_global_sync(s, nb_globals);
4112                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4113                     la_cross_call(s, nb_temps);
4114                 }
4115             }
4116 
4117             /* Record arguments that die in this opcode.  */
4118             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4119                 ts = arg_temp(op->args[i]);
4120                 if (ts->state & TS_DEAD) {
4121                     arg_life |= DEAD_ARG << i;
4122                 }
4123             }
4124 
4125             /* Input arguments are live for preceding opcodes.  */
4126             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4127                 ts = arg_temp(op->args[i]);
4128                 if (ts->state & TS_DEAD) {
4129                     /* For operands that were dead, initially allow
4130                        all regs for the type.  */
4131                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4132                     ts->state &= ~TS_DEAD;
4133                 }
4134             }
4135 
4136             /* Incorporate constraints for this operand.  */
4137             switch (opc) {
4138             case INDEX_op_mov:
4139                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4140                    have proper constraints.  That said, special case
4141                    moves to propagate preferences backward.  */
4142                 if (IS_DEAD_ARG(1)) {
4143                     *la_temp_pref(arg_temp(op->args[0]))
4144                         = *la_temp_pref(arg_temp(op->args[1]));
4145                 }
4146                 break;
4147 
4148             default:
4149                 args_ct = opcode_args_ct(op);
4150                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4151                     const TCGArgConstraint *ct = &args_ct[i];
4152                     TCGRegSet set, *pset;
4153 
4154                     ts = arg_temp(op->args[i]);
4155                     pset = la_temp_pref(ts);
4156                     set = *pset;
4157 
4158                     set &= ct->regs;
4159                     if (ct->ialias) {
4160                         set &= output_pref(op, ct->alias_index);
4161                     }
4162                     /* If the combination is not possible, restart.  */
4163                     if (set == 0) {
4164                         set = ct->regs;
4165                     }
4166                     *pset = set;
4167                 }
4168                 break;
4169             }
4170             break;
4171         }
4172         op->life = arg_life;
4173     }
4174 }
4175 
4176 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4177 static bool __attribute__((noinline))
4178 liveness_pass_2(TCGContext *s)
4179 {
4180     int nb_globals = s->nb_globals;
4181     int nb_temps, i;
4182     bool changes = false;
4183     TCGOp *op, *op_next;
4184 
4185     /* Create a temporary for each indirect global.  */
4186     for (i = 0; i < nb_globals; ++i) {
4187         TCGTemp *its = &s->temps[i];
4188         if (its->indirect_reg) {
4189             TCGTemp *dts = tcg_temp_alloc(s);
4190             dts->type = its->type;
4191             dts->base_type = its->base_type;
4192             dts->temp_subindex = its->temp_subindex;
4193             dts->kind = TEMP_EBB;
4194             its->state_ptr = dts;
4195         } else {
4196             its->state_ptr = NULL;
4197         }
4198         /* All globals begin dead.  */
4199         its->state = TS_DEAD;
4200     }
4201     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4202         TCGTemp *its = &s->temps[i];
4203         its->state_ptr = NULL;
4204         its->state = TS_DEAD;
4205     }
4206 
4207     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4208         TCGOpcode opc = op->opc;
4209         const TCGOpDef *def = &tcg_op_defs[opc];
4210         TCGLifeData arg_life = op->life;
4211         int nb_iargs, nb_oargs, call_flags;
4212         TCGTemp *arg_ts, *dir_ts;
4213 
4214         if (opc == INDEX_op_call) {
4215             nb_oargs = TCGOP_CALLO(op);
4216             nb_iargs = TCGOP_CALLI(op);
4217             call_flags = tcg_call_flags(op);
4218         } else {
4219             nb_iargs = def->nb_iargs;
4220             nb_oargs = def->nb_oargs;
4221 
4222             /* Set flags similar to how calls require.  */
4223             if (def->flags & TCG_OPF_COND_BRANCH) {
4224                 /* Like reading globals: sync_globals */
4225                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4226             } else if (def->flags & TCG_OPF_BB_END) {
4227                 /* Like writing globals: save_globals */
4228                 call_flags = 0;
4229             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4230                 /* Like reading globals: sync_globals */
4231                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4232             } else {
4233                 /* No effect on globals.  */
4234                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4235                               TCG_CALL_NO_WRITE_GLOBALS);
4236             }
4237         }
4238 
4239         /* Make sure that input arguments are available.  */
4240         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4241             arg_ts = arg_temp(op->args[i]);
4242             dir_ts = arg_ts->state_ptr;
4243             if (dir_ts && arg_ts->state == TS_DEAD) {
4244                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4245                                   ? INDEX_op_ld_i32
4246                                   : INDEX_op_ld_i64);
4247                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4248                                                   arg_ts->type, 3);
4249 
4250                 lop->args[0] = temp_arg(dir_ts);
4251                 lop->args[1] = temp_arg(arg_ts->mem_base);
4252                 lop->args[2] = arg_ts->mem_offset;
4253 
4254                 /* Loaded, but synced with memory.  */
4255                 arg_ts->state = TS_MEM;
4256             }
4257         }
4258 
4259         /* Perform input replacement, and mark inputs that became dead.
4260            No action is required except keeping temp_state up to date
4261            so that we reload when needed.  */
4262         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4263             arg_ts = arg_temp(op->args[i]);
4264             dir_ts = arg_ts->state_ptr;
4265             if (dir_ts) {
4266                 op->args[i] = temp_arg(dir_ts);
4267                 changes = true;
4268                 if (IS_DEAD_ARG(i)) {
4269                     arg_ts->state = TS_DEAD;
4270                 }
4271             }
4272         }
4273 
4274         /* Liveness analysis should ensure that the following are
4275            all correct, for call sites and basic block end points.  */
4276         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4277             /* Nothing to do */
4278         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4279             for (i = 0; i < nb_globals; ++i) {
4280                 /* Liveness should see that globals are synced back,
4281                    that is, either TS_DEAD or TS_MEM.  */
4282                 arg_ts = &s->temps[i];
4283                 tcg_debug_assert(arg_ts->state_ptr == 0
4284                                  || arg_ts->state != 0);
4285             }
4286         } else {
4287             for (i = 0; i < nb_globals; ++i) {
4288                 /* Liveness should see that globals are saved back,
4289                    that is, TS_DEAD, waiting to be reloaded.  */
4290                 arg_ts = &s->temps[i];
4291                 tcg_debug_assert(arg_ts->state_ptr == 0
4292                                  || arg_ts->state == TS_DEAD);
4293             }
4294         }
4295 
4296         /* Outputs become available.  */
4297         if (opc == INDEX_op_mov) {
4298             arg_ts = arg_temp(op->args[0]);
4299             dir_ts = arg_ts->state_ptr;
4300             if (dir_ts) {
4301                 op->args[0] = temp_arg(dir_ts);
4302                 changes = true;
4303 
4304                 /* The output is now live and modified.  */
4305                 arg_ts->state = 0;
4306 
4307                 if (NEED_SYNC_ARG(0)) {
4308                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4309                                       ? INDEX_op_st_i32
4310                                       : INDEX_op_st_i64);
4311                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4312                                                      arg_ts->type, 3);
4313                     TCGTemp *out_ts = dir_ts;
4314 
4315                     if (IS_DEAD_ARG(0)) {
4316                         out_ts = arg_temp(op->args[1]);
4317                         arg_ts->state = TS_DEAD;
4318                         tcg_op_remove(s, op);
4319                     } else {
4320                         arg_ts->state = TS_MEM;
4321                     }
4322 
4323                     sop->args[0] = temp_arg(out_ts);
4324                     sop->args[1] = temp_arg(arg_ts->mem_base);
4325                     sop->args[2] = arg_ts->mem_offset;
4326                 } else {
4327                     tcg_debug_assert(!IS_DEAD_ARG(0));
4328                 }
4329             }
4330         } else {
4331             for (i = 0; i < nb_oargs; i++) {
4332                 arg_ts = arg_temp(op->args[i]);
4333                 dir_ts = arg_ts->state_ptr;
4334                 if (!dir_ts) {
4335                     continue;
4336                 }
4337                 op->args[i] = temp_arg(dir_ts);
4338                 changes = true;
4339 
4340                 /* The output is now live and modified.  */
4341                 arg_ts->state = 0;
4342 
4343                 /* Sync outputs upon their last write.  */
4344                 if (NEED_SYNC_ARG(i)) {
4345                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4346                                       ? INDEX_op_st_i32
4347                                       : INDEX_op_st_i64);
4348                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4349                                                      arg_ts->type, 3);
4350 
4351                     sop->args[0] = temp_arg(dir_ts);
4352                     sop->args[1] = temp_arg(arg_ts->mem_base);
4353                     sop->args[2] = arg_ts->mem_offset;
4354 
4355                     arg_ts->state = TS_MEM;
4356                 }
4357                 /* Drop outputs that are dead.  */
4358                 if (IS_DEAD_ARG(i)) {
4359                     arg_ts->state = TS_DEAD;
4360                 }
4361             }
4362         }
4363     }
4364 
4365     return changes;
4366 }
4367 
4368 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4369 {
4370     intptr_t off;
4371     int size, align;
4372 
4373     /* When allocating an object, look at the full type. */
4374     size = tcg_type_size(ts->base_type);
4375     switch (ts->base_type) {
4376     case TCG_TYPE_I32:
4377         align = 4;
4378         break;
4379     case TCG_TYPE_I64:
4380     case TCG_TYPE_V64:
4381         align = 8;
4382         break;
4383     case TCG_TYPE_I128:
4384     case TCG_TYPE_V128:
4385     case TCG_TYPE_V256:
4386         /*
4387          * Note that we do not require aligned storage for V256,
4388          * and that we provide alignment for I128 to match V128,
4389          * even if that's above what the host ABI requires.
4390          */
4391         align = 16;
4392         break;
4393     default:
4394         g_assert_not_reached();
4395     }
4396 
4397     /*
4398      * Assume the stack is sufficiently aligned.
4399      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4400      * and do not require 16 byte vector alignment.  This seems slightly
4401      * easier than fully parameterizing the above switch statement.
4402      */
4403     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4404     off = ROUND_UP(s->current_frame_offset, align);
4405 
4406     /* If we've exhausted the stack frame, restart with a smaller TB. */
4407     if (off + size > s->frame_end) {
4408         tcg_raise_tb_overflow(s);
4409     }
4410     s->current_frame_offset = off + size;
4411 #if defined(__sparc__)
4412     off += TCG_TARGET_STACK_BIAS;
4413 #endif
4414 
4415     /* If the object was subdivided, assign memory to all the parts. */
4416     if (ts->base_type != ts->type) {
4417         int part_size = tcg_type_size(ts->type);
4418         int part_count = size / part_size;
4419 
4420         /*
4421          * Each part is allocated sequentially in tcg_temp_new_internal.
4422          * Jump back to the first part by subtracting the current index.
4423          */
4424         ts -= ts->temp_subindex;
4425         for (int i = 0; i < part_count; ++i) {
4426             ts[i].mem_offset = off + i * part_size;
4427             ts[i].mem_base = s->frame_temp;
4428             ts[i].mem_allocated = 1;
4429         }
4430     } else {
4431         ts->mem_offset = off;
4432         ts->mem_base = s->frame_temp;
4433         ts->mem_allocated = 1;
4434     }
4435 }
4436 
4437 /* Assign @reg to @ts, and update reg_to_temp[]. */
4438 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4439 {
4440     if (ts->val_type == TEMP_VAL_REG) {
4441         TCGReg old = ts->reg;
4442         tcg_debug_assert(s->reg_to_temp[old] == ts);
4443         if (old == reg) {
4444             return;
4445         }
4446         s->reg_to_temp[old] = NULL;
4447     }
4448     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4449     s->reg_to_temp[reg] = ts;
4450     ts->val_type = TEMP_VAL_REG;
4451     ts->reg = reg;
4452 }
4453 
4454 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4455 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4456 {
4457     tcg_debug_assert(type != TEMP_VAL_REG);
4458     if (ts->val_type == TEMP_VAL_REG) {
4459         TCGReg reg = ts->reg;
4460         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4461         s->reg_to_temp[reg] = NULL;
4462     }
4463     ts->val_type = type;
4464 }
4465 
4466 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4467 
4468 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4469    mark it free; otherwise mark it dead.  */
4470 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4471 {
4472     TCGTempVal new_type;
4473 
4474     switch (ts->kind) {
4475     case TEMP_FIXED:
4476         return;
4477     case TEMP_GLOBAL:
4478     case TEMP_TB:
4479         new_type = TEMP_VAL_MEM;
4480         break;
4481     case TEMP_EBB:
4482         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4483         break;
4484     case TEMP_CONST:
4485         new_type = TEMP_VAL_CONST;
4486         break;
4487     default:
4488         g_assert_not_reached();
4489     }
4490     set_temp_val_nonreg(s, ts, new_type);
4491 }
4492 
4493 /* Mark a temporary as dead.  */
4494 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4495 {
4496     temp_free_or_dead(s, ts, 1);
4497 }
4498 
4499 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4500    registers needs to be allocated to store a constant.  If 'free_or_dead'
4501    is non-zero, subsequently release the temporary; if it is positive, the
4502    temp is dead; if it is negative, the temp is free.  */
4503 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4504                       TCGRegSet preferred_regs, int free_or_dead)
4505 {
4506     if (!temp_readonly(ts) && !ts->mem_coherent) {
4507         if (!ts->mem_allocated) {
4508             temp_allocate_frame(s, ts);
4509         }
4510         switch (ts->val_type) {
4511         case TEMP_VAL_CONST:
4512             /* If we're going to free the temp immediately, then we won't
4513                require it later in a register, so attempt to store the
4514                constant to memory directly.  */
4515             if (free_or_dead
4516                 && tcg_out_sti(s, ts->type, ts->val,
4517                                ts->mem_base->reg, ts->mem_offset)) {
4518                 break;
4519             }
4520             temp_load(s, ts, tcg_target_available_regs[ts->type],
4521                       allocated_regs, preferred_regs);
4522             /* fallthrough */
4523 
4524         case TEMP_VAL_REG:
4525             tcg_out_st(s, ts->type, ts->reg,
4526                        ts->mem_base->reg, ts->mem_offset);
4527             break;
4528 
4529         case TEMP_VAL_MEM:
4530             break;
4531 
4532         case TEMP_VAL_DEAD:
4533         default:
4534             g_assert_not_reached();
4535         }
4536         ts->mem_coherent = 1;
4537     }
4538     if (free_or_dead) {
4539         temp_free_or_dead(s, ts, free_or_dead);
4540     }
4541 }
4542 
4543 /* free register 'reg' by spilling the corresponding temporary if necessary */
4544 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4545 {
4546     TCGTemp *ts = s->reg_to_temp[reg];
4547     if (ts != NULL) {
4548         temp_sync(s, ts, allocated_regs, 0, -1);
4549     }
4550 }
4551 
4552 /**
4553  * tcg_reg_alloc:
4554  * @required_regs: Set of registers in which we must allocate.
4555  * @allocated_regs: Set of registers which must be avoided.
4556  * @preferred_regs: Set of registers we should prefer.
4557  * @rev: True if we search the registers in "indirect" order.
4558  *
4559  * The allocated register must be in @required_regs & ~@allocated_regs,
4560  * but if we can put it in @preferred_regs we may save a move later.
4561  */
4562 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4563                             TCGRegSet allocated_regs,
4564                             TCGRegSet preferred_regs, bool rev)
4565 {
4566     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4567     TCGRegSet reg_ct[2];
4568     const int *order;
4569 
4570     reg_ct[1] = required_regs & ~allocated_regs;
4571     tcg_debug_assert(reg_ct[1] != 0);
4572     reg_ct[0] = reg_ct[1] & preferred_regs;
4573 
4574     /* Skip the preferred_regs option if it cannot be satisfied,
4575        or if the preference made no difference.  */
4576     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4577 
4578     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4579 
4580     /* Try free registers, preferences first.  */
4581     for (j = f; j < 2; j++) {
4582         TCGRegSet set = reg_ct[j];
4583 
4584         if (tcg_regset_single(set)) {
4585             /* One register in the set.  */
4586             TCGReg reg = tcg_regset_first(set);
4587             if (s->reg_to_temp[reg] == NULL) {
4588                 return reg;
4589             }
4590         } else {
4591             for (i = 0; i < n; i++) {
4592                 TCGReg reg = order[i];
4593                 if (s->reg_to_temp[reg] == NULL &&
4594                     tcg_regset_test_reg(set, reg)) {
4595                     return reg;
4596                 }
4597             }
4598         }
4599     }
4600 
4601     /* We must spill something.  */
4602     for (j = f; j < 2; j++) {
4603         TCGRegSet set = reg_ct[j];
4604 
4605         if (tcg_regset_single(set)) {
4606             /* One register in the set.  */
4607             TCGReg reg = tcg_regset_first(set);
4608             tcg_reg_free(s, reg, allocated_regs);
4609             return reg;
4610         } else {
4611             for (i = 0; i < n; i++) {
4612                 TCGReg reg = order[i];
4613                 if (tcg_regset_test_reg(set, reg)) {
4614                     tcg_reg_free(s, reg, allocated_regs);
4615                     return reg;
4616                 }
4617             }
4618         }
4619     }
4620 
4621     g_assert_not_reached();
4622 }
4623 
4624 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4625                                  TCGRegSet allocated_regs,
4626                                  TCGRegSet preferred_regs, bool rev)
4627 {
4628     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4629     TCGRegSet reg_ct[2];
4630     const int *order;
4631 
4632     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4633     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4634     tcg_debug_assert(reg_ct[1] != 0);
4635     reg_ct[0] = reg_ct[1] & preferred_regs;
4636 
4637     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4638 
4639     /*
4640      * Skip the preferred_regs option if it cannot be satisfied,
4641      * or if the preference made no difference.
4642      */
4643     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4644 
4645     /*
4646      * Minimize the number of flushes by looking for 2 free registers first,
4647      * then a single flush, then two flushes.
4648      */
4649     for (fmin = 2; fmin >= 0; fmin--) {
4650         for (j = k; j < 2; j++) {
4651             TCGRegSet set = reg_ct[j];
4652 
4653             for (i = 0; i < n; i++) {
4654                 TCGReg reg = order[i];
4655 
4656                 if (tcg_regset_test_reg(set, reg)) {
4657                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4658                     if (f >= fmin) {
4659                         tcg_reg_free(s, reg, allocated_regs);
4660                         tcg_reg_free(s, reg + 1, allocated_regs);
4661                         return reg;
4662                     }
4663                 }
4664             }
4665         }
4666     }
4667     g_assert_not_reached();
4668 }
4669 
4670 /* Make sure the temporary is in a register.  If needed, allocate the register
4671    from DESIRED while avoiding ALLOCATED.  */
4672 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4673                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4674 {
4675     TCGReg reg;
4676 
4677     switch (ts->val_type) {
4678     case TEMP_VAL_REG:
4679         return;
4680     case TEMP_VAL_CONST:
4681         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4682                             preferred_regs, ts->indirect_base);
4683         if (ts->type <= TCG_TYPE_I64) {
4684             tcg_out_movi(s, ts->type, reg, ts->val);
4685         } else {
4686             uint64_t val = ts->val;
4687             MemOp vece = MO_64;
4688 
4689             /*
4690              * Find the minimal vector element that matches the constant.
4691              * The targets will, in general, have to do this search anyway,
4692              * do this generically.
4693              */
4694             if (val == dup_const(MO_8, val)) {
4695                 vece = MO_8;
4696             } else if (val == dup_const(MO_16, val)) {
4697                 vece = MO_16;
4698             } else if (val == dup_const(MO_32, val)) {
4699                 vece = MO_32;
4700             }
4701 
4702             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4703         }
4704         ts->mem_coherent = 0;
4705         break;
4706     case TEMP_VAL_MEM:
4707         if (!ts->mem_allocated) {
4708             temp_allocate_frame(s, ts);
4709         }
4710         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4711                             preferred_regs, ts->indirect_base);
4712         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4713         ts->mem_coherent = 1;
4714         break;
4715     case TEMP_VAL_DEAD:
4716     default:
4717         g_assert_not_reached();
4718     }
4719     set_temp_val_reg(s, ts, reg);
4720 }
4721 
4722 /* Save a temporary to memory. 'allocated_regs' is used in case a
4723    temporary registers needs to be allocated to store a constant.  */
4724 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4725 {
4726     /* The liveness analysis already ensures that globals are back
4727        in memory. Keep an tcg_debug_assert for safety. */
4728     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4729 }
4730 
4731 /* save globals to their canonical location and assume they can be
4732    modified be the following code. 'allocated_regs' is used in case a
4733    temporary registers needs to be allocated to store a constant. */
4734 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4735 {
4736     int i, n;
4737 
4738     for (i = 0, n = s->nb_globals; i < n; i++) {
4739         temp_save(s, &s->temps[i], allocated_regs);
4740     }
4741 }
4742 
4743 /* sync globals to their canonical location and assume they can be
4744    read by the following code. 'allocated_regs' is used in case a
4745    temporary registers needs to be allocated to store a constant. */
4746 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4747 {
4748     int i, n;
4749 
4750     for (i = 0, n = s->nb_globals; i < n; i++) {
4751         TCGTemp *ts = &s->temps[i];
4752         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4753                          || ts->kind == TEMP_FIXED
4754                          || ts->mem_coherent);
4755     }
4756 }
4757 
4758 /* at the end of a basic block, we assume all temporaries are dead and
4759    all globals are stored at their canonical location. */
4760 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4761 {
4762     int i;
4763 
4764     for (i = s->nb_globals; i < s->nb_temps; i++) {
4765         TCGTemp *ts = &s->temps[i];
4766 
4767         switch (ts->kind) {
4768         case TEMP_TB:
4769             temp_save(s, ts, allocated_regs);
4770             break;
4771         case TEMP_EBB:
4772             /* The liveness analysis already ensures that temps are dead.
4773                Keep an tcg_debug_assert for safety. */
4774             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4775             break;
4776         case TEMP_CONST:
4777             /* Similarly, we should have freed any allocated register. */
4778             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4779             break;
4780         default:
4781             g_assert_not_reached();
4782         }
4783     }
4784 
4785     save_globals(s, allocated_regs);
4786 }
4787 
4788 /*
4789  * At a conditional branch, we assume all temporaries are dead unless
4790  * explicitly live-across-conditional-branch; all globals and local
4791  * temps are synced to their location.
4792  */
4793 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4794 {
4795     sync_globals(s, allocated_regs);
4796 
4797     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4798         TCGTemp *ts = &s->temps[i];
4799         /*
4800          * The liveness analysis already ensures that temps are dead.
4801          * Keep tcg_debug_asserts for safety.
4802          */
4803         switch (ts->kind) {
4804         case TEMP_TB:
4805             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4806             break;
4807         case TEMP_EBB:
4808         case TEMP_CONST:
4809             break;
4810         default:
4811             g_assert_not_reached();
4812         }
4813     }
4814 }
4815 
4816 /*
4817  * Specialized code generation for INDEX_op_mov_* with a constant.
4818  */
4819 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4820                                   tcg_target_ulong val, TCGLifeData arg_life,
4821                                   TCGRegSet preferred_regs)
4822 {
4823     /* ENV should not be modified.  */
4824     tcg_debug_assert(!temp_readonly(ots));
4825 
4826     /* The movi is not explicitly generated here.  */
4827     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4828     ots->val = val;
4829     ots->mem_coherent = 0;
4830     if (NEED_SYNC_ARG(0)) {
4831         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4832     } else if (IS_DEAD_ARG(0)) {
4833         temp_dead(s, ots);
4834     }
4835 }
4836 
4837 /*
4838  * Specialized code generation for INDEX_op_mov_*.
4839  */
4840 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4841 {
4842     const TCGLifeData arg_life = op->life;
4843     TCGRegSet allocated_regs, preferred_regs;
4844     TCGTemp *ts, *ots;
4845     TCGType otype, itype;
4846     TCGReg oreg, ireg;
4847 
4848     allocated_regs = s->reserved_regs;
4849     preferred_regs = output_pref(op, 0);
4850     ots = arg_temp(op->args[0]);
4851     ts = arg_temp(op->args[1]);
4852 
4853     /* ENV should not be modified.  */
4854     tcg_debug_assert(!temp_readonly(ots));
4855 
4856     /* Note that otype != itype for no-op truncation.  */
4857     otype = ots->type;
4858     itype = ts->type;
4859 
4860     if (ts->val_type == TEMP_VAL_CONST) {
4861         /* propagate constant or generate sti */
4862         tcg_target_ulong val = ts->val;
4863         if (IS_DEAD_ARG(1)) {
4864             temp_dead(s, ts);
4865         }
4866         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4867         return;
4868     }
4869 
4870     /* If the source value is in memory we're going to be forced
4871        to have it in a register in order to perform the copy.  Copy
4872        the SOURCE value into its own register first, that way we
4873        don't have to reload SOURCE the next time it is used. */
4874     if (ts->val_type == TEMP_VAL_MEM) {
4875         temp_load(s, ts, tcg_target_available_regs[itype],
4876                   allocated_regs, preferred_regs);
4877     }
4878     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4879     ireg = ts->reg;
4880 
4881     if (IS_DEAD_ARG(0)) {
4882         /* mov to a non-saved dead register makes no sense (even with
4883            liveness analysis disabled). */
4884         tcg_debug_assert(NEED_SYNC_ARG(0));
4885         if (!ots->mem_allocated) {
4886             temp_allocate_frame(s, ots);
4887         }
4888         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4889         if (IS_DEAD_ARG(1)) {
4890             temp_dead(s, ts);
4891         }
4892         temp_dead(s, ots);
4893         return;
4894     }
4895 
4896     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4897         /*
4898          * The mov can be suppressed.  Kill input first, so that it
4899          * is unlinked from reg_to_temp, then set the output to the
4900          * reg that we saved from the input.
4901          */
4902         temp_dead(s, ts);
4903         oreg = ireg;
4904     } else {
4905         if (ots->val_type == TEMP_VAL_REG) {
4906             oreg = ots->reg;
4907         } else {
4908             /* Make sure to not spill the input register during allocation. */
4909             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4910                                  allocated_regs | ((TCGRegSet)1 << ireg),
4911                                  preferred_regs, ots->indirect_base);
4912         }
4913         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4914             /*
4915              * Cross register class move not supported.
4916              * Store the source register into the destination slot
4917              * and leave the destination temp as TEMP_VAL_MEM.
4918              */
4919             assert(!temp_readonly(ots));
4920             if (!ts->mem_allocated) {
4921                 temp_allocate_frame(s, ots);
4922             }
4923             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4924             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4925             ots->mem_coherent = 1;
4926             return;
4927         }
4928     }
4929     set_temp_val_reg(s, ots, oreg);
4930     ots->mem_coherent = 0;
4931 
4932     if (NEED_SYNC_ARG(0)) {
4933         temp_sync(s, ots, allocated_regs, 0, 0);
4934     }
4935 }
4936 
4937 /*
4938  * Specialized code generation for INDEX_op_dup_vec.
4939  */
4940 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4941 {
4942     const TCGLifeData arg_life = op->life;
4943     TCGRegSet dup_out_regs, dup_in_regs;
4944     const TCGArgConstraint *dup_args_ct;
4945     TCGTemp *its, *ots;
4946     TCGType itype, vtype;
4947     unsigned vece;
4948     int lowpart_ofs;
4949     bool ok;
4950 
4951     ots = arg_temp(op->args[0]);
4952     its = arg_temp(op->args[1]);
4953 
4954     /* ENV should not be modified.  */
4955     tcg_debug_assert(!temp_readonly(ots));
4956 
4957     itype = its->type;
4958     vece = TCGOP_VECE(op);
4959     vtype = TCGOP_TYPE(op);
4960 
4961     if (its->val_type == TEMP_VAL_CONST) {
4962         /* Propagate constant via movi -> dupi.  */
4963         tcg_target_ulong val = its->val;
4964         if (IS_DEAD_ARG(1)) {
4965             temp_dead(s, its);
4966         }
4967         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4968         return;
4969     }
4970 
4971     dup_args_ct = opcode_args_ct(op);
4972     dup_out_regs = dup_args_ct[0].regs;
4973     dup_in_regs = dup_args_ct[1].regs;
4974 
4975     /* Allocate the output register now.  */
4976     if (ots->val_type != TEMP_VAL_REG) {
4977         TCGRegSet allocated_regs = s->reserved_regs;
4978         TCGReg oreg;
4979 
4980         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4981             /* Make sure to not spill the input register. */
4982             tcg_regset_set_reg(allocated_regs, its->reg);
4983         }
4984         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4985                              output_pref(op, 0), ots->indirect_base);
4986         set_temp_val_reg(s, ots, oreg);
4987     }
4988 
4989     switch (its->val_type) {
4990     case TEMP_VAL_REG:
4991         /*
4992          * The dup constriaints must be broad, covering all possible VECE.
4993          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4994          * to fail, indicating that extra moves are required for that case.
4995          */
4996         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4997             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4998                 goto done;
4999             }
5000             /* Try again from memory or a vector input register.  */
5001         }
5002         if (!its->mem_coherent) {
5003             /*
5004              * The input register is not synced, and so an extra store
5005              * would be required to use memory.  Attempt an integer-vector
5006              * register move first.  We do not have a TCGRegSet for this.
5007              */
5008             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5009                 break;
5010             }
5011             /* Sync the temp back to its slot and load from there.  */
5012             temp_sync(s, its, s->reserved_regs, 0, 0);
5013         }
5014         /* fall through */
5015 
5016     case TEMP_VAL_MEM:
5017         lowpart_ofs = 0;
5018         if (HOST_BIG_ENDIAN) {
5019             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5020         }
5021         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5022                              its->mem_offset + lowpart_ofs)) {
5023             goto done;
5024         }
5025         /* Load the input into the destination vector register. */
5026         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5027         break;
5028 
5029     default:
5030         g_assert_not_reached();
5031     }
5032 
5033     /* We now have a vector input register, so dup must succeed. */
5034     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5035     tcg_debug_assert(ok);
5036 
5037  done:
5038     ots->mem_coherent = 0;
5039     if (IS_DEAD_ARG(1)) {
5040         temp_dead(s, its);
5041     }
5042     if (NEED_SYNC_ARG(0)) {
5043         temp_sync(s, ots, s->reserved_regs, 0, 0);
5044     }
5045     if (IS_DEAD_ARG(0)) {
5046         temp_dead(s, ots);
5047     }
5048 }
5049 
5050 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5051 {
5052     const TCGLifeData arg_life = op->life;
5053     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5054     TCGRegSet i_allocated_regs;
5055     TCGRegSet o_allocated_regs;
5056     int i, k, nb_iargs, nb_oargs;
5057     TCGReg reg;
5058     TCGArg arg;
5059     const TCGArgConstraint *args_ct;
5060     const TCGArgConstraint *arg_ct;
5061     TCGTemp *ts;
5062     TCGArg new_args[TCG_MAX_OP_ARGS];
5063     int const_args[TCG_MAX_OP_ARGS];
5064     TCGCond op_cond;
5065 
5066     nb_oargs = def->nb_oargs;
5067     nb_iargs = def->nb_iargs;
5068 
5069     /* copy constants */
5070     memcpy(new_args + nb_oargs + nb_iargs,
5071            op->args + nb_oargs + nb_iargs,
5072            sizeof(TCGArg) * def->nb_cargs);
5073 
5074     i_allocated_regs = s->reserved_regs;
5075     o_allocated_regs = s->reserved_regs;
5076 
5077     switch (op->opc) {
5078     case INDEX_op_brcond_i32:
5079     case INDEX_op_brcond_i64:
5080         op_cond = op->args[2];
5081         break;
5082     case INDEX_op_setcond_i32:
5083     case INDEX_op_setcond_i64:
5084     case INDEX_op_negsetcond_i32:
5085     case INDEX_op_negsetcond_i64:
5086     case INDEX_op_cmp_vec:
5087         op_cond = op->args[3];
5088         break;
5089     case INDEX_op_brcond2_i32:
5090         op_cond = op->args[4];
5091         break;
5092     case INDEX_op_movcond_i32:
5093     case INDEX_op_movcond_i64:
5094     case INDEX_op_setcond2_i32:
5095     case INDEX_op_cmpsel_vec:
5096         op_cond = op->args[5];
5097         break;
5098     default:
5099         /* No condition within opcode. */
5100         op_cond = TCG_COND_ALWAYS;
5101         break;
5102     }
5103 
5104     args_ct = opcode_args_ct(op);
5105 
5106     /* satisfy input constraints */
5107     for (k = 0; k < nb_iargs; k++) {
5108         TCGRegSet i_preferred_regs, i_required_regs;
5109         bool allocate_new_reg, copyto_new_reg;
5110         TCGTemp *ts2;
5111         int i1, i2;
5112 
5113         i = args_ct[nb_oargs + k].sort_index;
5114         arg = op->args[i];
5115         arg_ct = &args_ct[i];
5116         ts = arg_temp(arg);
5117 
5118         if (ts->val_type == TEMP_VAL_CONST) {
5119 #ifdef TCG_REG_ZERO
5120             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5121                 /* Hardware zero register: indicate register via non-const. */
5122                 const_args[i] = 0;
5123                 new_args[i] = TCG_REG_ZERO;
5124                 continue;
5125             }
5126 #endif
5127 
5128             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5129                                        op_cond, TCGOP_VECE(op))) {
5130                 /* constant is OK for instruction */
5131                 const_args[i] = 1;
5132                 new_args[i] = ts->val;
5133                 continue;
5134             }
5135         }
5136 
5137         reg = ts->reg;
5138         i_preferred_regs = 0;
5139         i_required_regs = arg_ct->regs;
5140         allocate_new_reg = false;
5141         copyto_new_reg = false;
5142 
5143         switch (arg_ct->pair) {
5144         case 0: /* not paired */
5145             if (arg_ct->ialias) {
5146                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5147 
5148                 /*
5149                  * If the input is readonly, then it cannot also be an
5150                  * output and aliased to itself.  If the input is not
5151                  * dead after the instruction, we must allocate a new
5152                  * register and move it.
5153                  */
5154                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5155                     || args_ct[arg_ct->alias_index].newreg) {
5156                     allocate_new_reg = true;
5157                 } else if (ts->val_type == TEMP_VAL_REG) {
5158                     /*
5159                      * Check if the current register has already been
5160                      * allocated for another input.
5161                      */
5162                     allocate_new_reg =
5163                         tcg_regset_test_reg(i_allocated_regs, reg);
5164                 }
5165             }
5166             if (!allocate_new_reg) {
5167                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5168                           i_preferred_regs);
5169                 reg = ts->reg;
5170                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5171             }
5172             if (allocate_new_reg) {
5173                 /*
5174                  * Allocate a new register matching the constraint
5175                  * and move the temporary register into it.
5176                  */
5177                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5178                           i_allocated_regs, 0);
5179                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5180                                     i_preferred_regs, ts->indirect_base);
5181                 copyto_new_reg = true;
5182             }
5183             break;
5184 
5185         case 1:
5186             /* First of an input pair; if i1 == i2, the second is an output. */
5187             i1 = i;
5188             i2 = arg_ct->pair_index;
5189             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5190 
5191             /*
5192              * It is easier to default to allocating a new pair
5193              * and to identify a few cases where it's not required.
5194              */
5195             if (arg_ct->ialias) {
5196                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5197                 if (IS_DEAD_ARG(i1) &&
5198                     IS_DEAD_ARG(i2) &&
5199                     !temp_readonly(ts) &&
5200                     ts->val_type == TEMP_VAL_REG &&
5201                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5202                     tcg_regset_test_reg(i_required_regs, reg) &&
5203                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5204                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5205                     (ts2
5206                      ? ts2->val_type == TEMP_VAL_REG &&
5207                        ts2->reg == reg + 1 &&
5208                        !temp_readonly(ts2)
5209                      : s->reg_to_temp[reg + 1] == NULL)) {
5210                     break;
5211                 }
5212             } else {
5213                 /* Without aliasing, the pair must also be an input. */
5214                 tcg_debug_assert(ts2);
5215                 if (ts->val_type == TEMP_VAL_REG &&
5216                     ts2->val_type == TEMP_VAL_REG &&
5217                     ts2->reg == reg + 1 &&
5218                     tcg_regset_test_reg(i_required_regs, reg)) {
5219                     break;
5220                 }
5221             }
5222             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5223                                      0, ts->indirect_base);
5224             goto do_pair;
5225 
5226         case 2: /* pair second */
5227             reg = new_args[arg_ct->pair_index] + 1;
5228             goto do_pair;
5229 
5230         case 3: /* ialias with second output, no first input */
5231             tcg_debug_assert(arg_ct->ialias);
5232             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5233 
5234             if (IS_DEAD_ARG(i) &&
5235                 !temp_readonly(ts) &&
5236                 ts->val_type == TEMP_VAL_REG &&
5237                 reg > 0 &&
5238                 s->reg_to_temp[reg - 1] == NULL &&
5239                 tcg_regset_test_reg(i_required_regs, reg) &&
5240                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5241                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5242                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5243                 break;
5244             }
5245             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5246                                      i_allocated_regs, 0,
5247                                      ts->indirect_base);
5248             tcg_regset_set_reg(i_allocated_regs, reg);
5249             reg += 1;
5250             goto do_pair;
5251 
5252         do_pair:
5253             /*
5254              * If an aliased input is not dead after the instruction,
5255              * we must allocate a new register and move it.
5256              */
5257             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5258                 TCGRegSet t_allocated_regs = i_allocated_regs;
5259 
5260                 /*
5261                  * Because of the alias, and the continued life, make sure
5262                  * that the temp is somewhere *other* than the reg pair,
5263                  * and we get a copy in reg.
5264                  */
5265                 tcg_regset_set_reg(t_allocated_regs, reg);
5266                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5267                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5268                     /* If ts was already in reg, copy it somewhere else. */
5269                     TCGReg nr;
5270                     bool ok;
5271 
5272                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5273                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5274                                        t_allocated_regs, 0, ts->indirect_base);
5275                     ok = tcg_out_mov(s, ts->type, nr, reg);
5276                     tcg_debug_assert(ok);
5277 
5278                     set_temp_val_reg(s, ts, nr);
5279                 } else {
5280                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5281                               t_allocated_regs, 0);
5282                     copyto_new_reg = true;
5283                 }
5284             } else {
5285                 /* Preferably allocate to reg, otherwise copy. */
5286                 i_required_regs = (TCGRegSet)1 << reg;
5287                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5288                           i_preferred_regs);
5289                 copyto_new_reg = ts->reg != reg;
5290             }
5291             break;
5292 
5293         default:
5294             g_assert_not_reached();
5295         }
5296 
5297         if (copyto_new_reg) {
5298             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5299                 /*
5300                  * Cross register class move not supported.  Sync the
5301                  * temp back to its slot and load from there.
5302                  */
5303                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5304                 tcg_out_ld(s, ts->type, reg,
5305                            ts->mem_base->reg, ts->mem_offset);
5306             }
5307         }
5308         new_args[i] = reg;
5309         const_args[i] = 0;
5310         tcg_regset_set_reg(i_allocated_regs, reg);
5311     }
5312 
5313     /* mark dead temporaries and free the associated registers */
5314     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5315         if (IS_DEAD_ARG(i)) {
5316             temp_dead(s, arg_temp(op->args[i]));
5317         }
5318     }
5319 
5320     if (def->flags & TCG_OPF_COND_BRANCH) {
5321         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5322     } else if (def->flags & TCG_OPF_BB_END) {
5323         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5324     } else {
5325         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5326             /* XXX: permit generic clobber register list ? */
5327             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5328                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5329                     tcg_reg_free(s, i, i_allocated_regs);
5330                 }
5331             }
5332         }
5333         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5334             /* sync globals if the op has side effects and might trigger
5335                an exception. */
5336             sync_globals(s, i_allocated_regs);
5337         }
5338 
5339         /* satisfy the output constraints */
5340         for (k = 0; k < nb_oargs; k++) {
5341             i = args_ct[k].sort_index;
5342             arg = op->args[i];
5343             arg_ct = &args_ct[i];
5344             ts = arg_temp(arg);
5345 
5346             /* ENV should not be modified.  */
5347             tcg_debug_assert(!temp_readonly(ts));
5348 
5349             switch (arg_ct->pair) {
5350             case 0: /* not paired */
5351                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5352                     reg = new_args[arg_ct->alias_index];
5353                 } else if (arg_ct->newreg) {
5354                     reg = tcg_reg_alloc(s, arg_ct->regs,
5355                                         i_allocated_regs | o_allocated_regs,
5356                                         output_pref(op, k), ts->indirect_base);
5357                 } else {
5358                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5359                                         output_pref(op, k), ts->indirect_base);
5360                 }
5361                 break;
5362 
5363             case 1: /* first of pair */
5364                 if (arg_ct->oalias) {
5365                     reg = new_args[arg_ct->alias_index];
5366                 } else if (arg_ct->newreg) {
5367                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5368                                              i_allocated_regs | o_allocated_regs,
5369                                              output_pref(op, k),
5370                                              ts->indirect_base);
5371                 } else {
5372                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5373                                              output_pref(op, k),
5374                                              ts->indirect_base);
5375                 }
5376                 break;
5377 
5378             case 2: /* second of pair */
5379                 if (arg_ct->oalias) {
5380                     reg = new_args[arg_ct->alias_index];
5381                 } else {
5382                     reg = new_args[arg_ct->pair_index] + 1;
5383                 }
5384                 break;
5385 
5386             case 3: /* first of pair, aliasing with a second input */
5387                 tcg_debug_assert(!arg_ct->newreg);
5388                 reg = new_args[arg_ct->pair_index] - 1;
5389                 break;
5390 
5391             default:
5392                 g_assert_not_reached();
5393             }
5394             tcg_regset_set_reg(o_allocated_regs, reg);
5395             set_temp_val_reg(s, ts, reg);
5396             ts->mem_coherent = 0;
5397             new_args[i] = reg;
5398         }
5399     }
5400 
5401     /* emit instruction */
5402     TCGType type = TCGOP_TYPE(op);
5403     switch (op->opc) {
5404     case INDEX_op_ext_i32_i64:
5405         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5406         break;
5407     case INDEX_op_extu_i32_i64:
5408         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5409         break;
5410     case INDEX_op_extrl_i64_i32:
5411         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5412         break;
5413 
5414     case INDEX_op_add:
5415     case INDEX_op_and:
5416     case INDEX_op_andc:
5417     case INDEX_op_divs:
5418     case INDEX_op_divu:
5419     case INDEX_op_eqv:
5420     case INDEX_op_mul:
5421     case INDEX_op_mulsh:
5422     case INDEX_op_muluh:
5423     case INDEX_op_nand:
5424     case INDEX_op_nor:
5425     case INDEX_op_or:
5426     case INDEX_op_orc:
5427     case INDEX_op_rems:
5428     case INDEX_op_xor:
5429         {
5430             const TCGOutOpBinary *out =
5431                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5432 
5433             /* Constants should never appear in the first source operand. */
5434             tcg_debug_assert(!const_args[1]);
5435             if (const_args[2]) {
5436                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5437             } else {
5438                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5439             }
5440         }
5441         break;
5442 
5443     case INDEX_op_sub:
5444         {
5445             const TCGOutOpSubtract *out = &outop_sub;
5446 
5447             /*
5448              * Constants should never appear in the second source operand.
5449              * These are folded to add with negative constant.
5450              */
5451             tcg_debug_assert(!const_args[2]);
5452             if (const_args[1]) {
5453                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5454             } else {
5455                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5456             }
5457         }
5458         break;
5459 
5460     case INDEX_op_neg:
5461     case INDEX_op_not:
5462         {
5463             const TCGOutOpUnary *out =
5464                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5465 
5466             /* Constants should have been folded. */
5467             tcg_debug_assert(!const_args[1]);
5468             out->out_rr(s, type, new_args[0], new_args[1]);
5469         }
5470         break;
5471 
5472     case INDEX_op_divs2:
5473     case INDEX_op_divu2:
5474         {
5475             const TCGOutOpDivRem *out =
5476                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5477 
5478             /* Only used by x86 and s390x, which use matching constraints. */
5479             tcg_debug_assert(new_args[0] == new_args[2]);
5480             tcg_debug_assert(new_args[1] == new_args[3]);
5481             tcg_debug_assert(!const_args[4]);
5482             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5483         }
5484         break;
5485 
5486     default:
5487         if (def->flags & TCG_OPF_VECTOR) {
5488             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5489                            TCGOP_VECE(op), new_args, const_args);
5490         } else {
5491             tcg_out_op(s, op->opc, type, new_args, const_args);
5492         }
5493         break;
5494     }
5495 
5496     /* move the outputs in the correct register if needed */
5497     for(i = 0; i < nb_oargs; i++) {
5498         ts = arg_temp(op->args[i]);
5499 
5500         /* ENV should not be modified.  */
5501         tcg_debug_assert(!temp_readonly(ts));
5502 
5503         if (NEED_SYNC_ARG(i)) {
5504             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5505         } else if (IS_DEAD_ARG(i)) {
5506             temp_dead(s, ts);
5507         }
5508     }
5509 }
5510 
5511 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5512 {
5513     const TCGLifeData arg_life = op->life;
5514     TCGTemp *ots, *itsl, *itsh;
5515     TCGType vtype = TCGOP_TYPE(op);
5516 
5517     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5518     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5519     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5520 
5521     ots = arg_temp(op->args[0]);
5522     itsl = arg_temp(op->args[1]);
5523     itsh = arg_temp(op->args[2]);
5524 
5525     /* ENV should not be modified.  */
5526     tcg_debug_assert(!temp_readonly(ots));
5527 
5528     /* Allocate the output register now.  */
5529     if (ots->val_type != TEMP_VAL_REG) {
5530         TCGRegSet allocated_regs = s->reserved_regs;
5531         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5532         TCGReg oreg;
5533 
5534         /* Make sure to not spill the input registers. */
5535         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5536             tcg_regset_set_reg(allocated_regs, itsl->reg);
5537         }
5538         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5539             tcg_regset_set_reg(allocated_regs, itsh->reg);
5540         }
5541 
5542         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5543                              output_pref(op, 0), ots->indirect_base);
5544         set_temp_val_reg(s, ots, oreg);
5545     }
5546 
5547     /* Promote dup2 of immediates to dupi_vec. */
5548     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5549         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5550         MemOp vece = MO_64;
5551 
5552         if (val == dup_const(MO_8, val)) {
5553             vece = MO_8;
5554         } else if (val == dup_const(MO_16, val)) {
5555             vece = MO_16;
5556         } else if (val == dup_const(MO_32, val)) {
5557             vece = MO_32;
5558         }
5559 
5560         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5561         goto done;
5562     }
5563 
5564     /* If the two inputs form one 64-bit value, try dupm_vec. */
5565     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5566         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5567         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5568         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5569 
5570         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5571         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5572 
5573         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5574                              its->mem_base->reg, its->mem_offset)) {
5575             goto done;
5576         }
5577     }
5578 
5579     /* Fall back to generic expansion. */
5580     return false;
5581 
5582  done:
5583     ots->mem_coherent = 0;
5584     if (IS_DEAD_ARG(1)) {
5585         temp_dead(s, itsl);
5586     }
5587     if (IS_DEAD_ARG(2)) {
5588         temp_dead(s, itsh);
5589     }
5590     if (NEED_SYNC_ARG(0)) {
5591         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5592     } else if (IS_DEAD_ARG(0)) {
5593         temp_dead(s, ots);
5594     }
5595     return true;
5596 }
5597 
5598 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5599                          TCGRegSet allocated_regs)
5600 {
5601     if (ts->val_type == TEMP_VAL_REG) {
5602         if (ts->reg != reg) {
5603             tcg_reg_free(s, reg, allocated_regs);
5604             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5605                 /*
5606                  * Cross register class move not supported.  Sync the
5607                  * temp back to its slot and load from there.
5608                  */
5609                 temp_sync(s, ts, allocated_regs, 0, 0);
5610                 tcg_out_ld(s, ts->type, reg,
5611                            ts->mem_base->reg, ts->mem_offset);
5612             }
5613         }
5614     } else {
5615         TCGRegSet arg_set = 0;
5616 
5617         tcg_reg_free(s, reg, allocated_regs);
5618         tcg_regset_set_reg(arg_set, reg);
5619         temp_load(s, ts, arg_set, allocated_regs, 0);
5620     }
5621 }
5622 
5623 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5624                          TCGRegSet allocated_regs)
5625 {
5626     /*
5627      * When the destination is on the stack, load up the temp and store.
5628      * If there are many call-saved registers, the temp might live to
5629      * see another use; otherwise it'll be discarded.
5630      */
5631     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5632     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5633                arg_slot_stk_ofs(arg_slot));
5634 }
5635 
5636 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5637                             TCGTemp *ts, TCGRegSet *allocated_regs)
5638 {
5639     if (arg_slot_reg_p(l->arg_slot)) {
5640         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5641         load_arg_reg(s, reg, ts, *allocated_regs);
5642         tcg_regset_set_reg(*allocated_regs, reg);
5643     } else {
5644         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5645     }
5646 }
5647 
5648 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5649                          intptr_t ref_off, TCGRegSet *allocated_regs)
5650 {
5651     TCGReg reg;
5652 
5653     if (arg_slot_reg_p(arg_slot)) {
5654         reg = tcg_target_call_iarg_regs[arg_slot];
5655         tcg_reg_free(s, reg, *allocated_regs);
5656         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5657         tcg_regset_set_reg(*allocated_regs, reg);
5658     } else {
5659         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5660                             *allocated_regs, 0, false);
5661         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5662         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5663                    arg_slot_stk_ofs(arg_slot));
5664     }
5665 }
5666 
5667 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5668 {
5669     const int nb_oargs = TCGOP_CALLO(op);
5670     const int nb_iargs = TCGOP_CALLI(op);
5671     const TCGLifeData arg_life = op->life;
5672     const TCGHelperInfo *info = tcg_call_info(op);
5673     TCGRegSet allocated_regs = s->reserved_regs;
5674     int i;
5675 
5676     /*
5677      * Move inputs into place in reverse order,
5678      * so that we place stacked arguments first.
5679      */
5680     for (i = nb_iargs - 1; i >= 0; --i) {
5681         const TCGCallArgumentLoc *loc = &info->in[i];
5682         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5683 
5684         switch (loc->kind) {
5685         case TCG_CALL_ARG_NORMAL:
5686         case TCG_CALL_ARG_EXTEND_U:
5687         case TCG_CALL_ARG_EXTEND_S:
5688             load_arg_normal(s, loc, ts, &allocated_regs);
5689             break;
5690         case TCG_CALL_ARG_BY_REF:
5691             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5692             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5693                          arg_slot_stk_ofs(loc->ref_slot),
5694                          &allocated_regs);
5695             break;
5696         case TCG_CALL_ARG_BY_REF_N:
5697             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5698             break;
5699         default:
5700             g_assert_not_reached();
5701         }
5702     }
5703 
5704     /* Mark dead temporaries and free the associated registers.  */
5705     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5706         if (IS_DEAD_ARG(i)) {
5707             temp_dead(s, arg_temp(op->args[i]));
5708         }
5709     }
5710 
5711     /* Clobber call registers.  */
5712     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5713         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5714             tcg_reg_free(s, i, allocated_regs);
5715         }
5716     }
5717 
5718     /*
5719      * Save globals if they might be written by the helper,
5720      * sync them if they might be read.
5721      */
5722     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5723         /* Nothing to do */
5724     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5725         sync_globals(s, allocated_regs);
5726     } else {
5727         save_globals(s, allocated_regs);
5728     }
5729 
5730     /*
5731      * If the ABI passes a pointer to the returned struct as the first
5732      * argument, load that now.  Pass a pointer to the output home slot.
5733      */
5734     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5735         TCGTemp *ts = arg_temp(op->args[0]);
5736 
5737         if (!ts->mem_allocated) {
5738             temp_allocate_frame(s, ts);
5739         }
5740         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5741     }
5742 
5743     tcg_out_call(s, tcg_call_func(op), info);
5744 
5745     /* Assign output registers and emit moves if needed.  */
5746     switch (info->out_kind) {
5747     case TCG_CALL_RET_NORMAL:
5748         for (i = 0; i < nb_oargs; i++) {
5749             TCGTemp *ts = arg_temp(op->args[i]);
5750             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5751 
5752             /* ENV should not be modified.  */
5753             tcg_debug_assert(!temp_readonly(ts));
5754 
5755             set_temp_val_reg(s, ts, reg);
5756             ts->mem_coherent = 0;
5757         }
5758         break;
5759 
5760     case TCG_CALL_RET_BY_VEC:
5761         {
5762             TCGTemp *ts = arg_temp(op->args[0]);
5763 
5764             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5765             tcg_debug_assert(ts->temp_subindex == 0);
5766             if (!ts->mem_allocated) {
5767                 temp_allocate_frame(s, ts);
5768             }
5769             tcg_out_st(s, TCG_TYPE_V128,
5770                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5771                        ts->mem_base->reg, ts->mem_offset);
5772         }
5773         /* fall through to mark all parts in memory */
5774 
5775     case TCG_CALL_RET_BY_REF:
5776         /* The callee has performed a write through the reference. */
5777         for (i = 0; i < nb_oargs; i++) {
5778             TCGTemp *ts = arg_temp(op->args[i]);
5779             ts->val_type = TEMP_VAL_MEM;
5780         }
5781         break;
5782 
5783     default:
5784         g_assert_not_reached();
5785     }
5786 
5787     /* Flush or discard output registers as needed. */
5788     for (i = 0; i < nb_oargs; i++) {
5789         TCGTemp *ts = arg_temp(op->args[i]);
5790         if (NEED_SYNC_ARG(i)) {
5791             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5792         } else if (IS_DEAD_ARG(i)) {
5793             temp_dead(s, ts);
5794         }
5795     }
5796 }
5797 
5798 /**
5799  * atom_and_align_for_opc:
5800  * @s: tcg context
5801  * @opc: memory operation code
5802  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5803  * @allow_two_ops: true if we are prepared to issue two operations
5804  *
5805  * Return the alignment and atomicity to use for the inline fast path
5806  * for the given memory operation.  The alignment may be larger than
5807  * that specified in @opc, and the correct alignment will be diagnosed
5808  * by the slow path helper.
5809  *
5810  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5811  * and issue two loads or stores for subalignment.
5812  */
5813 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5814                                            MemOp host_atom, bool allow_two_ops)
5815 {
5816     MemOp align = memop_alignment_bits(opc);
5817     MemOp size = opc & MO_SIZE;
5818     MemOp half = size ? size - 1 : 0;
5819     MemOp atom = opc & MO_ATOM_MASK;
5820     MemOp atmax;
5821 
5822     switch (atom) {
5823     case MO_ATOM_NONE:
5824         /* The operation requires no specific atomicity. */
5825         atmax = MO_8;
5826         break;
5827 
5828     case MO_ATOM_IFALIGN:
5829         atmax = size;
5830         break;
5831 
5832     case MO_ATOM_IFALIGN_PAIR:
5833         atmax = half;
5834         break;
5835 
5836     case MO_ATOM_WITHIN16:
5837         atmax = size;
5838         if (size == MO_128) {
5839             /* Misalignment implies !within16, and therefore no atomicity. */
5840         } else if (host_atom != MO_ATOM_WITHIN16) {
5841             /* The host does not implement within16, so require alignment. */
5842             align = MAX(align, size);
5843         }
5844         break;
5845 
5846     case MO_ATOM_WITHIN16_PAIR:
5847         atmax = size;
5848         /*
5849          * Misalignment implies !within16, and therefore half atomicity.
5850          * Any host prepared for two operations can implement this with
5851          * half alignment.
5852          */
5853         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5854             align = MAX(align, half);
5855         }
5856         break;
5857 
5858     case MO_ATOM_SUBALIGN:
5859         atmax = size;
5860         if (host_atom != MO_ATOM_SUBALIGN) {
5861             /* If unaligned but not odd, there are subobjects up to half. */
5862             if (allow_two_ops) {
5863                 align = MAX(align, half);
5864             } else {
5865                 align = MAX(align, size);
5866             }
5867         }
5868         break;
5869 
5870     default:
5871         g_assert_not_reached();
5872     }
5873 
5874     return (TCGAtomAlign){ .atom = atmax, .align = align };
5875 }
5876 
5877 /*
5878  * Similarly for qemu_ld/st slow path helpers.
5879  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5880  * using only the provided backend tcg_out_* functions.
5881  */
5882 
5883 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5884 {
5885     int ofs = arg_slot_stk_ofs(slot);
5886 
5887     /*
5888      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5889      * require extension to uint64_t, adjust the address for uint32_t.
5890      */
5891     if (HOST_BIG_ENDIAN &&
5892         TCG_TARGET_REG_BITS == 64 &&
5893         type == TCG_TYPE_I32) {
5894         ofs += 4;
5895     }
5896     return ofs;
5897 }
5898 
5899 static void tcg_out_helper_load_slots(TCGContext *s,
5900                                       unsigned nmov, TCGMovExtend *mov,
5901                                       const TCGLdstHelperParam *parm)
5902 {
5903     unsigned i;
5904     TCGReg dst3;
5905 
5906     /*
5907      * Start from the end, storing to the stack first.
5908      * This frees those registers, so we need not consider overlap.
5909      */
5910     for (i = nmov; i-- > 0; ) {
5911         unsigned slot = mov[i].dst;
5912 
5913         if (arg_slot_reg_p(slot)) {
5914             goto found_reg;
5915         }
5916 
5917         TCGReg src = mov[i].src;
5918         TCGType dst_type = mov[i].dst_type;
5919         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5920 
5921         /* The argument is going onto the stack; extend into scratch. */
5922         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5923             tcg_debug_assert(parm->ntmp != 0);
5924             mov[i].dst = src = parm->tmp[0];
5925             tcg_out_movext1(s, &mov[i]);
5926         }
5927 
5928         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5929                    tcg_out_helper_stk_ofs(dst_type, slot));
5930     }
5931     return;
5932 
5933  found_reg:
5934     /*
5935      * The remaining arguments are in registers.
5936      * Convert slot numbers to argument registers.
5937      */
5938     nmov = i + 1;
5939     for (i = 0; i < nmov; ++i) {
5940         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5941     }
5942 
5943     switch (nmov) {
5944     case 4:
5945         /* The backend must have provided enough temps for the worst case. */
5946         tcg_debug_assert(parm->ntmp >= 2);
5947 
5948         dst3 = mov[3].dst;
5949         for (unsigned j = 0; j < 3; ++j) {
5950             if (dst3 == mov[j].src) {
5951                 /*
5952                  * Conflict. Copy the source to a temporary, perform the
5953                  * remaining moves, then the extension from our scratch
5954                  * on the way out.
5955                  */
5956                 TCGReg scratch = parm->tmp[1];
5957 
5958                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5959                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5960                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5961                 break;
5962             }
5963         }
5964 
5965         /* No conflicts: perform this move and continue. */
5966         tcg_out_movext1(s, &mov[3]);
5967         /* fall through */
5968 
5969     case 3:
5970         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5971                         parm->ntmp ? parm->tmp[0] : -1);
5972         break;
5973     case 2:
5974         tcg_out_movext2(s, mov, mov + 1,
5975                         parm->ntmp ? parm->tmp[0] : -1);
5976         break;
5977     case 1:
5978         tcg_out_movext1(s, mov);
5979         break;
5980     default:
5981         g_assert_not_reached();
5982     }
5983 }
5984 
5985 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5986                                     TCGType type, tcg_target_long imm,
5987                                     const TCGLdstHelperParam *parm)
5988 {
5989     if (arg_slot_reg_p(slot)) {
5990         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5991     } else {
5992         int ofs = tcg_out_helper_stk_ofs(type, slot);
5993         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5994             tcg_debug_assert(parm->ntmp != 0);
5995             tcg_out_movi(s, type, parm->tmp[0], imm);
5996             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5997         }
5998     }
5999 }
6000 
6001 static void tcg_out_helper_load_common_args(TCGContext *s,
6002                                             const TCGLabelQemuLdst *ldst,
6003                                             const TCGLdstHelperParam *parm,
6004                                             const TCGHelperInfo *info,
6005                                             unsigned next_arg)
6006 {
6007     TCGMovExtend ptr_mov = {
6008         .dst_type = TCG_TYPE_PTR,
6009         .src_type = TCG_TYPE_PTR,
6010         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6011     };
6012     const TCGCallArgumentLoc *loc = &info->in[0];
6013     TCGType type;
6014     unsigned slot;
6015     tcg_target_ulong imm;
6016 
6017     /*
6018      * Handle env, which is always first.
6019      */
6020     ptr_mov.dst = loc->arg_slot;
6021     ptr_mov.src = TCG_AREG0;
6022     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6023 
6024     /*
6025      * Handle oi.
6026      */
6027     imm = ldst->oi;
6028     loc = &info->in[next_arg];
6029     type = TCG_TYPE_I32;
6030     switch (loc->kind) {
6031     case TCG_CALL_ARG_NORMAL:
6032         break;
6033     case TCG_CALL_ARG_EXTEND_U:
6034     case TCG_CALL_ARG_EXTEND_S:
6035         /* No extension required for MemOpIdx. */
6036         tcg_debug_assert(imm <= INT32_MAX);
6037         type = TCG_TYPE_REG;
6038         break;
6039     default:
6040         g_assert_not_reached();
6041     }
6042     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6043     next_arg++;
6044 
6045     /*
6046      * Handle ra.
6047      */
6048     loc = &info->in[next_arg];
6049     slot = loc->arg_slot;
6050     if (parm->ra_gen) {
6051         int arg_reg = -1;
6052         TCGReg ra_reg;
6053 
6054         if (arg_slot_reg_p(slot)) {
6055             arg_reg = tcg_target_call_iarg_regs[slot];
6056         }
6057         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6058 
6059         ptr_mov.dst = slot;
6060         ptr_mov.src = ra_reg;
6061         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6062     } else {
6063         imm = (uintptr_t)ldst->raddr;
6064         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6065     }
6066 }
6067 
6068 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6069                                        const TCGCallArgumentLoc *loc,
6070                                        TCGType dst_type, TCGType src_type,
6071                                        TCGReg lo, TCGReg hi)
6072 {
6073     MemOp reg_mo;
6074 
6075     if (dst_type <= TCG_TYPE_REG) {
6076         MemOp src_ext;
6077 
6078         switch (loc->kind) {
6079         case TCG_CALL_ARG_NORMAL:
6080             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6081             break;
6082         case TCG_CALL_ARG_EXTEND_U:
6083             dst_type = TCG_TYPE_REG;
6084             src_ext = MO_UL;
6085             break;
6086         case TCG_CALL_ARG_EXTEND_S:
6087             dst_type = TCG_TYPE_REG;
6088             src_ext = MO_SL;
6089             break;
6090         default:
6091             g_assert_not_reached();
6092         }
6093 
6094         mov[0].dst = loc->arg_slot;
6095         mov[0].dst_type = dst_type;
6096         mov[0].src = lo;
6097         mov[0].src_type = src_type;
6098         mov[0].src_ext = src_ext;
6099         return 1;
6100     }
6101 
6102     if (TCG_TARGET_REG_BITS == 32) {
6103         assert(dst_type == TCG_TYPE_I64);
6104         reg_mo = MO_32;
6105     } else {
6106         assert(dst_type == TCG_TYPE_I128);
6107         reg_mo = MO_64;
6108     }
6109 
6110     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6111     mov[0].src = lo;
6112     mov[0].dst_type = TCG_TYPE_REG;
6113     mov[0].src_type = TCG_TYPE_REG;
6114     mov[0].src_ext = reg_mo;
6115 
6116     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6117     mov[1].src = hi;
6118     mov[1].dst_type = TCG_TYPE_REG;
6119     mov[1].src_type = TCG_TYPE_REG;
6120     mov[1].src_ext = reg_mo;
6121 
6122     return 2;
6123 }
6124 
6125 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6126                                    const TCGLdstHelperParam *parm)
6127 {
6128     const TCGHelperInfo *info;
6129     const TCGCallArgumentLoc *loc;
6130     TCGMovExtend mov[2];
6131     unsigned next_arg, nmov;
6132     MemOp mop = get_memop(ldst->oi);
6133 
6134     switch (mop & MO_SIZE) {
6135     case MO_8:
6136     case MO_16:
6137     case MO_32:
6138         info = &info_helper_ld32_mmu;
6139         break;
6140     case MO_64:
6141         info = &info_helper_ld64_mmu;
6142         break;
6143     case MO_128:
6144         info = &info_helper_ld128_mmu;
6145         break;
6146     default:
6147         g_assert_not_reached();
6148     }
6149 
6150     /* Defer env argument. */
6151     next_arg = 1;
6152 
6153     loc = &info->in[next_arg];
6154     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6155         /*
6156          * 32-bit host with 32-bit guest: zero-extend the guest address
6157          * to 64-bits for the helper by storing the low part, then
6158          * load a zero for the high part.
6159          */
6160         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6161                                TCG_TYPE_I32, TCG_TYPE_I32,
6162                                ldst->addr_reg, -1);
6163         tcg_out_helper_load_slots(s, 1, mov, parm);
6164 
6165         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6166                                 TCG_TYPE_I32, 0, parm);
6167         next_arg += 2;
6168     } else {
6169         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6170                                       ldst->addr_reg, -1);
6171         tcg_out_helper_load_slots(s, nmov, mov, parm);
6172         next_arg += nmov;
6173     }
6174 
6175     switch (info->out_kind) {
6176     case TCG_CALL_RET_NORMAL:
6177     case TCG_CALL_RET_BY_VEC:
6178         break;
6179     case TCG_CALL_RET_BY_REF:
6180         /*
6181          * The return reference is in the first argument slot.
6182          * We need memory in which to return: re-use the top of stack.
6183          */
6184         {
6185             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6186 
6187             if (arg_slot_reg_p(0)) {
6188                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6189                                  TCG_REG_CALL_STACK, ofs_slot0);
6190             } else {
6191                 tcg_debug_assert(parm->ntmp != 0);
6192                 tcg_out_addi_ptr(s, parm->tmp[0],
6193                                  TCG_REG_CALL_STACK, ofs_slot0);
6194                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6195                            TCG_REG_CALL_STACK, ofs_slot0);
6196             }
6197         }
6198         break;
6199     default:
6200         g_assert_not_reached();
6201     }
6202 
6203     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6204 }
6205 
6206 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6207                                   bool load_sign,
6208                                   const TCGLdstHelperParam *parm)
6209 {
6210     MemOp mop = get_memop(ldst->oi);
6211     TCGMovExtend mov[2];
6212     int ofs_slot0;
6213 
6214     switch (ldst->type) {
6215     case TCG_TYPE_I64:
6216         if (TCG_TARGET_REG_BITS == 32) {
6217             break;
6218         }
6219         /* fall through */
6220 
6221     case TCG_TYPE_I32:
6222         mov[0].dst = ldst->datalo_reg;
6223         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6224         mov[0].dst_type = ldst->type;
6225         mov[0].src_type = TCG_TYPE_REG;
6226 
6227         /*
6228          * If load_sign, then we allowed the helper to perform the
6229          * appropriate sign extension to tcg_target_ulong, and all
6230          * we need now is a plain move.
6231          *
6232          * If they do not, then we expect the relevant extension
6233          * instruction to be no more expensive than a move, and
6234          * we thus save the icache etc by only using one of two
6235          * helper functions.
6236          */
6237         if (load_sign || !(mop & MO_SIGN)) {
6238             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6239                 mov[0].src_ext = MO_32;
6240             } else {
6241                 mov[0].src_ext = MO_64;
6242             }
6243         } else {
6244             mov[0].src_ext = mop & MO_SSIZE;
6245         }
6246         tcg_out_movext1(s, mov);
6247         return;
6248 
6249     case TCG_TYPE_I128:
6250         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6251         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6252         switch (TCG_TARGET_CALL_RET_I128) {
6253         case TCG_CALL_RET_NORMAL:
6254             break;
6255         case TCG_CALL_RET_BY_VEC:
6256             tcg_out_st(s, TCG_TYPE_V128,
6257                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6258                        TCG_REG_CALL_STACK, ofs_slot0);
6259             /* fall through */
6260         case TCG_CALL_RET_BY_REF:
6261             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6262                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6263             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6264                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6265             return;
6266         default:
6267             g_assert_not_reached();
6268         }
6269         break;
6270 
6271     default:
6272         g_assert_not_reached();
6273     }
6274 
6275     mov[0].dst = ldst->datalo_reg;
6276     mov[0].src =
6277         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6278     mov[0].dst_type = TCG_TYPE_REG;
6279     mov[0].src_type = TCG_TYPE_REG;
6280     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6281 
6282     mov[1].dst = ldst->datahi_reg;
6283     mov[1].src =
6284         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6285     mov[1].dst_type = TCG_TYPE_REG;
6286     mov[1].src_type = TCG_TYPE_REG;
6287     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6288 
6289     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6290 }
6291 
6292 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6293                                    const TCGLdstHelperParam *parm)
6294 {
6295     const TCGHelperInfo *info;
6296     const TCGCallArgumentLoc *loc;
6297     TCGMovExtend mov[4];
6298     TCGType data_type;
6299     unsigned next_arg, nmov, n;
6300     MemOp mop = get_memop(ldst->oi);
6301 
6302     switch (mop & MO_SIZE) {
6303     case MO_8:
6304     case MO_16:
6305     case MO_32:
6306         info = &info_helper_st32_mmu;
6307         data_type = TCG_TYPE_I32;
6308         break;
6309     case MO_64:
6310         info = &info_helper_st64_mmu;
6311         data_type = TCG_TYPE_I64;
6312         break;
6313     case MO_128:
6314         info = &info_helper_st128_mmu;
6315         data_type = TCG_TYPE_I128;
6316         break;
6317     default:
6318         g_assert_not_reached();
6319     }
6320 
6321     /* Defer env argument. */
6322     next_arg = 1;
6323     nmov = 0;
6324 
6325     /* Handle addr argument. */
6326     loc = &info->in[next_arg];
6327     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6328     if (TCG_TARGET_REG_BITS == 32) {
6329         /*
6330          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6331          * to 64-bits for the helper by storing the low part.  Later,
6332          * after we have processed the register inputs, we will load a
6333          * zero for the high part.
6334          */
6335         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6336                                TCG_TYPE_I32, TCG_TYPE_I32,
6337                                ldst->addr_reg, -1);
6338         next_arg += 2;
6339         nmov += 1;
6340     } else {
6341         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6342                                    ldst->addr_reg, -1);
6343         next_arg += n;
6344         nmov += n;
6345     }
6346 
6347     /* Handle data argument. */
6348     loc = &info->in[next_arg];
6349     switch (loc->kind) {
6350     case TCG_CALL_ARG_NORMAL:
6351     case TCG_CALL_ARG_EXTEND_U:
6352     case TCG_CALL_ARG_EXTEND_S:
6353         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6354                                    ldst->datalo_reg, ldst->datahi_reg);
6355         next_arg += n;
6356         nmov += n;
6357         tcg_out_helper_load_slots(s, nmov, mov, parm);
6358         break;
6359 
6360     case TCG_CALL_ARG_BY_REF:
6361         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6362         tcg_debug_assert(data_type == TCG_TYPE_I128);
6363         tcg_out_st(s, TCG_TYPE_I64,
6364                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6365                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6366         tcg_out_st(s, TCG_TYPE_I64,
6367                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6368                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6369 
6370         tcg_out_helper_load_slots(s, nmov, mov, parm);
6371 
6372         if (arg_slot_reg_p(loc->arg_slot)) {
6373             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6374                              TCG_REG_CALL_STACK,
6375                              arg_slot_stk_ofs(loc->ref_slot));
6376         } else {
6377             tcg_debug_assert(parm->ntmp != 0);
6378             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6379                              arg_slot_stk_ofs(loc->ref_slot));
6380             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6381                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6382         }
6383         next_arg += 2;
6384         break;
6385 
6386     default:
6387         g_assert_not_reached();
6388     }
6389 
6390     if (TCG_TARGET_REG_BITS == 32) {
6391         /* Zero extend the address by loading a zero for the high part. */
6392         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6393         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6394     }
6395 
6396     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6397 }
6398 
6399 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6400 {
6401     int i, start_words, num_insns;
6402     TCGOp *op;
6403 
6404     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6405                  && qemu_log_in_addr_range(pc_start))) {
6406         FILE *logfile = qemu_log_trylock();
6407         if (logfile) {
6408             fprintf(logfile, "OP:\n");
6409             tcg_dump_ops(s, logfile, false);
6410             fprintf(logfile, "\n");
6411             qemu_log_unlock(logfile);
6412         }
6413     }
6414 
6415 #ifdef CONFIG_DEBUG_TCG
6416     /* Ensure all labels referenced have been emitted.  */
6417     {
6418         TCGLabel *l;
6419         bool error = false;
6420 
6421         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6422             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6423                 qemu_log_mask(CPU_LOG_TB_OP,
6424                               "$L%d referenced but not present.\n", l->id);
6425                 error = true;
6426             }
6427         }
6428         assert(!error);
6429     }
6430 #endif
6431 
6432     /* Do not reuse any EBB that may be allocated within the TB. */
6433     tcg_temp_ebb_reset_freed(s);
6434 
6435     tcg_optimize(s);
6436 
6437     reachable_code_pass(s);
6438     liveness_pass_0(s);
6439     liveness_pass_1(s);
6440 
6441     if (s->nb_indirects > 0) {
6442         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6443                      && qemu_log_in_addr_range(pc_start))) {
6444             FILE *logfile = qemu_log_trylock();
6445             if (logfile) {
6446                 fprintf(logfile, "OP before indirect lowering:\n");
6447                 tcg_dump_ops(s, logfile, false);
6448                 fprintf(logfile, "\n");
6449                 qemu_log_unlock(logfile);
6450             }
6451         }
6452 
6453         /* Replace indirect temps with direct temps.  */
6454         if (liveness_pass_2(s)) {
6455             /* If changes were made, re-run liveness.  */
6456             liveness_pass_1(s);
6457         }
6458     }
6459 
6460     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6461                  && qemu_log_in_addr_range(pc_start))) {
6462         FILE *logfile = qemu_log_trylock();
6463         if (logfile) {
6464             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6465             tcg_dump_ops(s, logfile, true);
6466             fprintf(logfile, "\n");
6467             qemu_log_unlock(logfile);
6468         }
6469     }
6470 
6471     /* Initialize goto_tb jump offsets. */
6472     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6473     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6474     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6475     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6476 
6477     tcg_reg_alloc_start(s);
6478 
6479     /*
6480      * Reset the buffer pointers when restarting after overflow.
6481      * TODO: Move this into translate-all.c with the rest of the
6482      * buffer management.  Having only this done here is confusing.
6483      */
6484     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6485     s->code_ptr = s->code_buf;
6486     s->data_gen_ptr = NULL;
6487 
6488     QSIMPLEQ_INIT(&s->ldst_labels);
6489     s->pool_labels = NULL;
6490 
6491     start_words = s->insn_start_words;
6492     s->gen_insn_data =
6493         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6494 
6495     tcg_out_tb_start(s);
6496 
6497     num_insns = -1;
6498     QTAILQ_FOREACH(op, &s->ops, link) {
6499         TCGOpcode opc = op->opc;
6500 
6501         switch (opc) {
6502         case INDEX_op_mov:
6503         case INDEX_op_mov_vec:
6504             tcg_reg_alloc_mov(s, op);
6505             break;
6506         case INDEX_op_dup_vec:
6507             tcg_reg_alloc_dup(s, op);
6508             break;
6509         case INDEX_op_insn_start:
6510             if (num_insns >= 0) {
6511                 size_t off = tcg_current_code_size(s);
6512                 s->gen_insn_end_off[num_insns] = off;
6513                 /* Assert that we do not overflow our stored offset.  */
6514                 assert(s->gen_insn_end_off[num_insns] == off);
6515             }
6516             num_insns++;
6517             for (i = 0; i < start_words; ++i) {
6518                 s->gen_insn_data[num_insns * start_words + i] =
6519                     tcg_get_insn_start_param(op, i);
6520             }
6521             break;
6522         case INDEX_op_discard:
6523             temp_dead(s, arg_temp(op->args[0]));
6524             break;
6525         case INDEX_op_set_label:
6526             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6527             tcg_out_label(s, arg_label(op->args[0]));
6528             break;
6529         case INDEX_op_call:
6530             tcg_reg_alloc_call(s, op);
6531             break;
6532         case INDEX_op_exit_tb:
6533             tcg_out_exit_tb(s, op->args[0]);
6534             break;
6535         case INDEX_op_goto_tb:
6536             tcg_out_goto_tb(s, op->args[0]);
6537             break;
6538         case INDEX_op_dup2_vec:
6539             if (tcg_reg_alloc_dup2(s, op)) {
6540                 break;
6541             }
6542             /* fall through */
6543         default:
6544             /* Sanity check that we've not introduced any unhandled opcodes. */
6545             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6546                                               TCGOP_FLAGS(op)));
6547             /* Note: in order to speed up the code, it would be much
6548                faster to have specialized register allocator functions for
6549                some common argument patterns */
6550             tcg_reg_alloc_op(s, op);
6551             break;
6552         }
6553         /* Test for (pending) buffer overflow.  The assumption is that any
6554            one operation beginning below the high water mark cannot overrun
6555            the buffer completely.  Thus we can test for overflow after
6556            generating code without having to check during generation.  */
6557         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6558             return -1;
6559         }
6560         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6561         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6562             return -2;
6563         }
6564     }
6565     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6566     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6567 
6568     /* Generate TB finalization at the end of block */
6569     i = tcg_out_ldst_finalize(s);
6570     if (i < 0) {
6571         return i;
6572     }
6573     i = tcg_out_pool_finalize(s);
6574     if (i < 0) {
6575         return i;
6576     }
6577     if (!tcg_resolve_relocs(s)) {
6578         return -2;
6579     }
6580 
6581 #ifndef CONFIG_TCG_INTERPRETER
6582     /* flush instruction cache */
6583     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6584                         (uintptr_t)s->code_buf,
6585                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6586 #endif
6587 
6588     return tcg_current_code_size(s);
6589 }
6590 
6591 #ifdef ELF_HOST_MACHINE
6592 /* In order to use this feature, the backend needs to do three things:
6593 
6594    (1) Define ELF_HOST_MACHINE to indicate both what value to
6595        put into the ELF image and to indicate support for the feature.
6596 
6597    (2) Define tcg_register_jit.  This should create a buffer containing
6598        the contents of a .debug_frame section that describes the post-
6599        prologue unwind info for the tcg machine.
6600 
6601    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6602 */
6603 
6604 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6605 typedef enum {
6606     JIT_NOACTION = 0,
6607     JIT_REGISTER_FN,
6608     JIT_UNREGISTER_FN
6609 } jit_actions_t;
6610 
6611 struct jit_code_entry {
6612     struct jit_code_entry *next_entry;
6613     struct jit_code_entry *prev_entry;
6614     const void *symfile_addr;
6615     uint64_t symfile_size;
6616 };
6617 
6618 struct jit_descriptor {
6619     uint32_t version;
6620     uint32_t action_flag;
6621     struct jit_code_entry *relevant_entry;
6622     struct jit_code_entry *first_entry;
6623 };
6624 
6625 void __jit_debug_register_code(void) __attribute__((noinline));
6626 void __jit_debug_register_code(void)
6627 {
6628     asm("");
6629 }
6630 
6631 /* Must statically initialize the version, because GDB may check
6632    the version before we can set it.  */
6633 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6634 
6635 /* End GDB interface.  */
6636 
6637 static int find_string(const char *strtab, const char *str)
6638 {
6639     const char *p = strtab + 1;
6640 
6641     while (1) {
6642         if (strcmp(p, str) == 0) {
6643             return p - strtab;
6644         }
6645         p += strlen(p) + 1;
6646     }
6647 }
6648 
6649 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6650                                  const void *debug_frame,
6651                                  size_t debug_frame_size)
6652 {
6653     struct __attribute__((packed)) DebugInfo {
6654         uint32_t  len;
6655         uint16_t  version;
6656         uint32_t  abbrev;
6657         uint8_t   ptr_size;
6658         uint8_t   cu_die;
6659         uint16_t  cu_lang;
6660         uintptr_t cu_low_pc;
6661         uintptr_t cu_high_pc;
6662         uint8_t   fn_die;
6663         char      fn_name[16];
6664         uintptr_t fn_low_pc;
6665         uintptr_t fn_high_pc;
6666         uint8_t   cu_eoc;
6667     };
6668 
6669     struct ElfImage {
6670         ElfW(Ehdr) ehdr;
6671         ElfW(Phdr) phdr;
6672         ElfW(Shdr) shdr[7];
6673         ElfW(Sym)  sym[2];
6674         struct DebugInfo di;
6675         uint8_t    da[24];
6676         char       str[80];
6677     };
6678 
6679     struct ElfImage *img;
6680 
6681     static const struct ElfImage img_template = {
6682         .ehdr = {
6683             .e_ident[EI_MAG0] = ELFMAG0,
6684             .e_ident[EI_MAG1] = ELFMAG1,
6685             .e_ident[EI_MAG2] = ELFMAG2,
6686             .e_ident[EI_MAG3] = ELFMAG3,
6687             .e_ident[EI_CLASS] = ELF_CLASS,
6688             .e_ident[EI_DATA] = ELF_DATA,
6689             .e_ident[EI_VERSION] = EV_CURRENT,
6690             .e_type = ET_EXEC,
6691             .e_machine = ELF_HOST_MACHINE,
6692             .e_version = EV_CURRENT,
6693             .e_phoff = offsetof(struct ElfImage, phdr),
6694             .e_shoff = offsetof(struct ElfImage, shdr),
6695             .e_ehsize = sizeof(ElfW(Shdr)),
6696             .e_phentsize = sizeof(ElfW(Phdr)),
6697             .e_phnum = 1,
6698             .e_shentsize = sizeof(ElfW(Shdr)),
6699             .e_shnum = ARRAY_SIZE(img->shdr),
6700             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6701 #ifdef ELF_HOST_FLAGS
6702             .e_flags = ELF_HOST_FLAGS,
6703 #endif
6704 #ifdef ELF_OSABI
6705             .e_ident[EI_OSABI] = ELF_OSABI,
6706 #endif
6707         },
6708         .phdr = {
6709             .p_type = PT_LOAD,
6710             .p_flags = PF_X,
6711         },
6712         .shdr = {
6713             [0] = { .sh_type = SHT_NULL },
6714             /* Trick: The contents of code_gen_buffer are not present in
6715                this fake ELF file; that got allocated elsewhere.  Therefore
6716                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6717                will not look for contents.  We can record any address.  */
6718             [1] = { /* .text */
6719                 .sh_type = SHT_NOBITS,
6720                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6721             },
6722             [2] = { /* .debug_info */
6723                 .sh_type = SHT_PROGBITS,
6724                 .sh_offset = offsetof(struct ElfImage, di),
6725                 .sh_size = sizeof(struct DebugInfo),
6726             },
6727             [3] = { /* .debug_abbrev */
6728                 .sh_type = SHT_PROGBITS,
6729                 .sh_offset = offsetof(struct ElfImage, da),
6730                 .sh_size = sizeof(img->da),
6731             },
6732             [4] = { /* .debug_frame */
6733                 .sh_type = SHT_PROGBITS,
6734                 .sh_offset = sizeof(struct ElfImage),
6735             },
6736             [5] = { /* .symtab */
6737                 .sh_type = SHT_SYMTAB,
6738                 .sh_offset = offsetof(struct ElfImage, sym),
6739                 .sh_size = sizeof(img->sym),
6740                 .sh_info = 1,
6741                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6742                 .sh_entsize = sizeof(ElfW(Sym)),
6743             },
6744             [6] = { /* .strtab */
6745                 .sh_type = SHT_STRTAB,
6746                 .sh_offset = offsetof(struct ElfImage, str),
6747                 .sh_size = sizeof(img->str),
6748             }
6749         },
6750         .sym = {
6751             [1] = { /* code_gen_buffer */
6752                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6753                 .st_shndx = 1,
6754             }
6755         },
6756         .di = {
6757             .len = sizeof(struct DebugInfo) - 4,
6758             .version = 2,
6759             .ptr_size = sizeof(void *),
6760             .cu_die = 1,
6761             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6762             .fn_die = 2,
6763             .fn_name = "code_gen_buffer"
6764         },
6765         .da = {
6766             1,          /* abbrev number (the cu) */
6767             0x11, 1,    /* DW_TAG_compile_unit, has children */
6768             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6769             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6770             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6771             0, 0,       /* end of abbrev */
6772             2,          /* abbrev number (the fn) */
6773             0x2e, 0,    /* DW_TAG_subprogram, no children */
6774             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6775             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6776             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6777             0, 0,       /* end of abbrev */
6778             0           /* no more abbrev */
6779         },
6780         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6781                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6782     };
6783 
6784     /* We only need a single jit entry; statically allocate it.  */
6785     static struct jit_code_entry one_entry;
6786 
6787     uintptr_t buf = (uintptr_t)buf_ptr;
6788     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6789     DebugFrameHeader *dfh;
6790 
6791     img = g_malloc(img_size);
6792     *img = img_template;
6793 
6794     img->phdr.p_vaddr = buf;
6795     img->phdr.p_paddr = buf;
6796     img->phdr.p_memsz = buf_size;
6797 
6798     img->shdr[1].sh_name = find_string(img->str, ".text");
6799     img->shdr[1].sh_addr = buf;
6800     img->shdr[1].sh_size = buf_size;
6801 
6802     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6803     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6804 
6805     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6806     img->shdr[4].sh_size = debug_frame_size;
6807 
6808     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6809     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6810 
6811     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6812     img->sym[1].st_value = buf;
6813     img->sym[1].st_size = buf_size;
6814 
6815     img->di.cu_low_pc = buf;
6816     img->di.cu_high_pc = buf + buf_size;
6817     img->di.fn_low_pc = buf;
6818     img->di.fn_high_pc = buf + buf_size;
6819 
6820     dfh = (DebugFrameHeader *)(img + 1);
6821     memcpy(dfh, debug_frame, debug_frame_size);
6822     dfh->fde.func_start = buf;
6823     dfh->fde.func_len = buf_size;
6824 
6825 #ifdef DEBUG_JIT
6826     /* Enable this block to be able to debug the ELF image file creation.
6827        One can use readelf, objdump, or other inspection utilities.  */
6828     {
6829         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6830         FILE *f = fopen(jit, "w+b");
6831         if (f) {
6832             if (fwrite(img, img_size, 1, f) != img_size) {
6833                 /* Avoid stupid unused return value warning for fwrite.  */
6834             }
6835             fclose(f);
6836         }
6837     }
6838 #endif
6839 
6840     one_entry.symfile_addr = img;
6841     one_entry.symfile_size = img_size;
6842 
6843     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6844     __jit_debug_descriptor.relevant_entry = &one_entry;
6845     __jit_debug_descriptor.first_entry = &one_entry;
6846     __jit_debug_register_code();
6847 }
6848 #else
6849 /* No support for the feature.  Provide the entry point expected by exec.c,
6850    and implement the internal function we declared earlier.  */
6851 
6852 static void tcg_register_jit_int(const void *buf, size_t size,
6853                                  const void *debug_frame,
6854                                  size_t debug_frame_size)
6855 {
6856 }
6857 
6858 void tcg_register_jit(const void *buf, size_t buf_size)
6859 {
6860 }
6861 #endif /* ELF_HOST_MACHINE */
6862 
6863 #if !TCG_TARGET_MAYBE_vec
6864 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6865 {
6866     g_assert_not_reached();
6867 }
6868 #endif
6869