xref: /openbmc/qemu/tcg/tcg.c (revision 5a5bb0a5a0b879c8f110c6a9bde9146181ef840c)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpDivRem {
990     TCGOutOp base;
991     void (*out_rr01r)(TCGContext *s, TCGType type,
992                       TCGReg a0, TCGReg a1, TCGReg a4);
993 } TCGOutOpDivRem;
994 
995 typedef struct TCGOutOpUnary {
996     TCGOutOp base;
997     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
998 } TCGOutOpUnary;
999 
1000 typedef struct TCGOutOpSubtract {
1001     TCGOutOp base;
1002     void (*out_rrr)(TCGContext *s, TCGType type,
1003                     TCGReg a0, TCGReg a1, TCGReg a2);
1004     void (*out_rir)(TCGContext *s, TCGType type,
1005                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1006 } TCGOutOpSubtract;
1007 
1008 #include "tcg-target.c.inc"
1009 
1010 #ifndef CONFIG_TCG_INTERPRETER
1011 /* Validate CPUTLBDescFast placement. */
1012 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1013                         sizeof(CPUNegativeOffsetState))
1014                   < MIN_TLB_MASK_TABLE_OFS);
1015 #endif
1016 
1017 /*
1018  * Register V as the TCGOutOp for O.
1019  * This verifies that V is of type T, otherwise give a nice compiler error.
1020  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1021  */
1022 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1023 
1024 /* Register allocation descriptions for every TCGOpcode. */
1025 static const TCGOutOp * const all_outop[NB_OPS] = {
1026     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1027     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1028     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1029     OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz),
1030     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1031     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1032     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1033     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1034     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1035     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1036     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1037     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1038     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1039     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1040     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1041     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1042     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1043     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1044     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1045     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1046     OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl),
1047     OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr),
1048     OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1049     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1050     OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1051     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1052     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1053 };
1054 
1055 #undef OUTOP
1056 
1057 /*
1058  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1059  * and registered the target's TCG globals) must register with this function
1060  * before initiating translation.
1061  *
1062  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1063  * of tcg_region_init() for the reasoning behind this.
1064  *
1065  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1066  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1067  * is not used anymore for translation once this function is called.
1068  *
1069  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1070  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1071  * modes.
1072  */
1073 #ifdef CONFIG_USER_ONLY
1074 void tcg_register_thread(void)
1075 {
1076     tcg_ctx = &tcg_init_ctx;
1077 }
1078 #else
1079 void tcg_register_thread(void)
1080 {
1081     TCGContext *s = g_malloc(sizeof(*s));
1082     unsigned int i, n;
1083 
1084     *s = tcg_init_ctx;
1085 
1086     /* Relink mem_base.  */
1087     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1088         if (tcg_init_ctx.temps[i].mem_base) {
1089             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1090             tcg_debug_assert(b >= 0 && b < n);
1091             s->temps[i].mem_base = &s->temps[b];
1092         }
1093     }
1094 
1095     /* Claim an entry in tcg_ctxs */
1096     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1097     g_assert(n < tcg_max_ctxs);
1098     qatomic_set(&tcg_ctxs[n], s);
1099 
1100     if (n > 0) {
1101         tcg_region_initial_alloc(s);
1102     }
1103 
1104     tcg_ctx = s;
1105 }
1106 #endif /* !CONFIG_USER_ONLY */
1107 
1108 /* pool based memory allocation */
1109 void *tcg_malloc_internal(TCGContext *s, int size)
1110 {
1111     TCGPool *p;
1112     int pool_size;
1113 
1114     if (size > TCG_POOL_CHUNK_SIZE) {
1115         /* big malloc: insert a new pool (XXX: could optimize) */
1116         p = g_malloc(sizeof(TCGPool) + size);
1117         p->size = size;
1118         p->next = s->pool_first_large;
1119         s->pool_first_large = p;
1120         return p->data;
1121     } else {
1122         p = s->pool_current;
1123         if (!p) {
1124             p = s->pool_first;
1125             if (!p)
1126                 goto new_pool;
1127         } else {
1128             if (!p->next) {
1129             new_pool:
1130                 pool_size = TCG_POOL_CHUNK_SIZE;
1131                 p = g_malloc(sizeof(TCGPool) + pool_size);
1132                 p->size = pool_size;
1133                 p->next = NULL;
1134                 if (s->pool_current) {
1135                     s->pool_current->next = p;
1136                 } else {
1137                     s->pool_first = p;
1138                 }
1139             } else {
1140                 p = p->next;
1141             }
1142         }
1143     }
1144     s->pool_current = p;
1145     s->pool_cur = p->data + size;
1146     s->pool_end = p->data + p->size;
1147     return p->data;
1148 }
1149 
1150 void tcg_pool_reset(TCGContext *s)
1151 {
1152     TCGPool *p, *t;
1153     for (p = s->pool_first_large; p; p = t) {
1154         t = p->next;
1155         g_free(p);
1156     }
1157     s->pool_first_large = NULL;
1158     s->pool_cur = s->pool_end = NULL;
1159     s->pool_current = NULL;
1160 }
1161 
1162 /*
1163  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1164  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1165  * We only use these for layout in tcg_out_ld_helper_ret and
1166  * tcg_out_st_helper_args, and share them between several of
1167  * the helpers, with the end result that it's easier to build manually.
1168  */
1169 
1170 #if TCG_TARGET_REG_BITS == 32
1171 # define dh_typecode_ttl  dh_typecode_i32
1172 #else
1173 # define dh_typecode_ttl  dh_typecode_i64
1174 #endif
1175 
1176 static TCGHelperInfo info_helper_ld32_mmu = {
1177     .flags = TCG_CALL_NO_WG,
1178     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1179               | dh_typemask(env, 1)
1180               | dh_typemask(i64, 2)  /* uint64_t addr */
1181               | dh_typemask(i32, 3)  /* unsigned oi */
1182               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1183 };
1184 
1185 static TCGHelperInfo info_helper_ld64_mmu = {
1186     .flags = TCG_CALL_NO_WG,
1187     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1188               | dh_typemask(env, 1)
1189               | dh_typemask(i64, 2)  /* uint64_t addr */
1190               | dh_typemask(i32, 3)  /* unsigned oi */
1191               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1192 };
1193 
1194 static TCGHelperInfo info_helper_ld128_mmu = {
1195     .flags = TCG_CALL_NO_WG,
1196     .typemask = dh_typemask(i128, 0) /* return Int128 */
1197               | dh_typemask(env, 1)
1198               | dh_typemask(i64, 2)  /* uint64_t addr */
1199               | dh_typemask(i32, 3)  /* unsigned oi */
1200               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1201 };
1202 
1203 static TCGHelperInfo info_helper_st32_mmu = {
1204     .flags = TCG_CALL_NO_WG,
1205     .typemask = dh_typemask(void, 0)
1206               | dh_typemask(env, 1)
1207               | dh_typemask(i64, 2)  /* uint64_t addr */
1208               | dh_typemask(i32, 3)  /* uint32_t data */
1209               | dh_typemask(i32, 4)  /* unsigned oi */
1210               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1211 };
1212 
1213 static TCGHelperInfo info_helper_st64_mmu = {
1214     .flags = TCG_CALL_NO_WG,
1215     .typemask = dh_typemask(void, 0)
1216               | dh_typemask(env, 1)
1217               | dh_typemask(i64, 2)  /* uint64_t addr */
1218               | dh_typemask(i64, 3)  /* uint64_t data */
1219               | dh_typemask(i32, 4)  /* unsigned oi */
1220               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1221 };
1222 
1223 static TCGHelperInfo info_helper_st128_mmu = {
1224     .flags = TCG_CALL_NO_WG,
1225     .typemask = dh_typemask(void, 0)
1226               | dh_typemask(env, 1)
1227               | dh_typemask(i64, 2)  /* uint64_t addr */
1228               | dh_typemask(i128, 3) /* Int128 data */
1229               | dh_typemask(i32, 4)  /* unsigned oi */
1230               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1231 };
1232 
1233 #ifdef CONFIG_TCG_INTERPRETER
1234 static ffi_type *typecode_to_ffi(int argmask)
1235 {
1236     /*
1237      * libffi does not support __int128_t, so we have forced Int128
1238      * to use the structure definition instead of the builtin type.
1239      */
1240     static ffi_type *ffi_type_i128_elements[3] = {
1241         &ffi_type_uint64,
1242         &ffi_type_uint64,
1243         NULL
1244     };
1245     static ffi_type ffi_type_i128 = {
1246         .size = 16,
1247         .alignment = __alignof__(Int128),
1248         .type = FFI_TYPE_STRUCT,
1249         .elements = ffi_type_i128_elements,
1250     };
1251 
1252     switch (argmask) {
1253     case dh_typecode_void:
1254         return &ffi_type_void;
1255     case dh_typecode_i32:
1256         return &ffi_type_uint32;
1257     case dh_typecode_s32:
1258         return &ffi_type_sint32;
1259     case dh_typecode_i64:
1260         return &ffi_type_uint64;
1261     case dh_typecode_s64:
1262         return &ffi_type_sint64;
1263     case dh_typecode_ptr:
1264         return &ffi_type_pointer;
1265     case dh_typecode_i128:
1266         return &ffi_type_i128;
1267     }
1268     g_assert_not_reached();
1269 }
1270 
1271 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1272 {
1273     unsigned typemask = info->typemask;
1274     struct {
1275         ffi_cif cif;
1276         ffi_type *args[];
1277     } *ca;
1278     ffi_status status;
1279     int nargs;
1280 
1281     /* Ignoring the return type, find the last non-zero field. */
1282     nargs = 32 - clz32(typemask >> 3);
1283     nargs = DIV_ROUND_UP(nargs, 3);
1284     assert(nargs <= MAX_CALL_IARGS);
1285 
1286     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1287     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1288     ca->cif.nargs = nargs;
1289 
1290     if (nargs != 0) {
1291         ca->cif.arg_types = ca->args;
1292         for (int j = 0; j < nargs; ++j) {
1293             int typecode = extract32(typemask, (j + 1) * 3, 3);
1294             ca->args[j] = typecode_to_ffi(typecode);
1295         }
1296     }
1297 
1298     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1299                           ca->cif.rtype, ca->cif.arg_types);
1300     assert(status == FFI_OK);
1301 
1302     return &ca->cif;
1303 }
1304 
1305 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1306 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1307 #else
1308 #define HELPER_INFO_INIT(I)      (&(I)->init)
1309 #define HELPER_INFO_INIT_VAL(I)  1
1310 #endif /* CONFIG_TCG_INTERPRETER */
1311 
1312 static inline bool arg_slot_reg_p(unsigned arg_slot)
1313 {
1314     /*
1315      * Split the sizeof away from the comparison to avoid Werror from
1316      * "unsigned < 0 is always false", when iarg_regs is empty.
1317      */
1318     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1319     return arg_slot < nreg;
1320 }
1321 
1322 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1323 {
1324     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1325     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1326 
1327     tcg_debug_assert(stk_slot < max);
1328     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1329 }
1330 
1331 typedef struct TCGCumulativeArgs {
1332     int arg_idx;                /* tcg_gen_callN args[] */
1333     int info_in_idx;            /* TCGHelperInfo in[] */
1334     int arg_slot;               /* regs+stack slot */
1335     int ref_slot;               /* stack slots for references */
1336 } TCGCumulativeArgs;
1337 
1338 static void layout_arg_even(TCGCumulativeArgs *cum)
1339 {
1340     cum->arg_slot += cum->arg_slot & 1;
1341 }
1342 
1343 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1344                          TCGCallArgumentKind kind)
1345 {
1346     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1347 
1348     *loc = (TCGCallArgumentLoc){
1349         .kind = kind,
1350         .arg_idx = cum->arg_idx,
1351         .arg_slot = cum->arg_slot,
1352     };
1353     cum->info_in_idx++;
1354     cum->arg_slot++;
1355 }
1356 
1357 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1358                                 TCGHelperInfo *info, int n)
1359 {
1360     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1361 
1362     for (int i = 0; i < n; ++i) {
1363         /* Layout all using the same arg_idx, adjusting the subindex. */
1364         loc[i] = (TCGCallArgumentLoc){
1365             .kind = TCG_CALL_ARG_NORMAL,
1366             .arg_idx = cum->arg_idx,
1367             .tmp_subindex = i,
1368             .arg_slot = cum->arg_slot + i,
1369         };
1370     }
1371     cum->info_in_idx += n;
1372     cum->arg_slot += n;
1373 }
1374 
1375 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1376 {
1377     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1378     int n = 128 / TCG_TARGET_REG_BITS;
1379 
1380     /* The first subindex carries the pointer. */
1381     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1382 
1383     /*
1384      * The callee is allowed to clobber memory associated with
1385      * structure pass by-reference.  Therefore we must make copies.
1386      * Allocate space from "ref_slot", which will be adjusted to
1387      * follow the parameters on the stack.
1388      */
1389     loc[0].ref_slot = cum->ref_slot;
1390 
1391     /*
1392      * Subsequent words also go into the reference slot, but
1393      * do not accumulate into the regular arguments.
1394      */
1395     for (int i = 1; i < n; ++i) {
1396         loc[i] = (TCGCallArgumentLoc){
1397             .kind = TCG_CALL_ARG_BY_REF_N,
1398             .arg_idx = cum->arg_idx,
1399             .tmp_subindex = i,
1400             .ref_slot = cum->ref_slot + i,
1401         };
1402     }
1403     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1404     cum->ref_slot += n;
1405 }
1406 
1407 static void init_call_layout(TCGHelperInfo *info)
1408 {
1409     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1410     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1411     unsigned typemask = info->typemask;
1412     unsigned typecode;
1413     TCGCumulativeArgs cum = { };
1414 
1415     /*
1416      * Parse and place any function return value.
1417      */
1418     typecode = typemask & 7;
1419     switch (typecode) {
1420     case dh_typecode_void:
1421         info->nr_out = 0;
1422         break;
1423     case dh_typecode_i32:
1424     case dh_typecode_s32:
1425     case dh_typecode_ptr:
1426         info->nr_out = 1;
1427         info->out_kind = TCG_CALL_RET_NORMAL;
1428         break;
1429     case dh_typecode_i64:
1430     case dh_typecode_s64:
1431         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1432         info->out_kind = TCG_CALL_RET_NORMAL;
1433         /* Query the last register now to trigger any assert early. */
1434         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1435         break;
1436     case dh_typecode_i128:
1437         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1438         info->out_kind = TCG_TARGET_CALL_RET_I128;
1439         switch (TCG_TARGET_CALL_RET_I128) {
1440         case TCG_CALL_RET_NORMAL:
1441             /* Query the last register now to trigger any assert early. */
1442             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1443             break;
1444         case TCG_CALL_RET_BY_VEC:
1445             /* Query the single register now to trigger any assert early. */
1446             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1447             break;
1448         case TCG_CALL_RET_BY_REF:
1449             /*
1450              * Allocate the first argument to the output.
1451              * We don't need to store this anywhere, just make it
1452              * unavailable for use in the input loop below.
1453              */
1454             cum.arg_slot = 1;
1455             break;
1456         default:
1457             qemu_build_not_reached();
1458         }
1459         break;
1460     default:
1461         g_assert_not_reached();
1462     }
1463 
1464     /*
1465      * Parse and place function arguments.
1466      */
1467     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1468         TCGCallArgumentKind kind;
1469         TCGType type;
1470 
1471         typecode = typemask & 7;
1472         switch (typecode) {
1473         case dh_typecode_i32:
1474         case dh_typecode_s32:
1475             type = TCG_TYPE_I32;
1476             break;
1477         case dh_typecode_i64:
1478         case dh_typecode_s64:
1479             type = TCG_TYPE_I64;
1480             break;
1481         case dh_typecode_ptr:
1482             type = TCG_TYPE_PTR;
1483             break;
1484         case dh_typecode_i128:
1485             type = TCG_TYPE_I128;
1486             break;
1487         default:
1488             g_assert_not_reached();
1489         }
1490 
1491         switch (type) {
1492         case TCG_TYPE_I32:
1493             switch (TCG_TARGET_CALL_ARG_I32) {
1494             case TCG_CALL_ARG_EVEN:
1495                 layout_arg_even(&cum);
1496                 /* fall through */
1497             case TCG_CALL_ARG_NORMAL:
1498                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1499                 break;
1500             case TCG_CALL_ARG_EXTEND:
1501                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1502                 layout_arg_1(&cum, info, kind);
1503                 break;
1504             default:
1505                 qemu_build_not_reached();
1506             }
1507             break;
1508 
1509         case TCG_TYPE_I64:
1510             switch (TCG_TARGET_CALL_ARG_I64) {
1511             case TCG_CALL_ARG_EVEN:
1512                 layout_arg_even(&cum);
1513                 /* fall through */
1514             case TCG_CALL_ARG_NORMAL:
1515                 if (TCG_TARGET_REG_BITS == 32) {
1516                     layout_arg_normal_n(&cum, info, 2);
1517                 } else {
1518                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1519                 }
1520                 break;
1521             default:
1522                 qemu_build_not_reached();
1523             }
1524             break;
1525 
1526         case TCG_TYPE_I128:
1527             switch (TCG_TARGET_CALL_ARG_I128) {
1528             case TCG_CALL_ARG_EVEN:
1529                 layout_arg_even(&cum);
1530                 /* fall through */
1531             case TCG_CALL_ARG_NORMAL:
1532                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1533                 break;
1534             case TCG_CALL_ARG_BY_REF:
1535                 layout_arg_by_ref(&cum, info);
1536                 break;
1537             default:
1538                 qemu_build_not_reached();
1539             }
1540             break;
1541 
1542         default:
1543             g_assert_not_reached();
1544         }
1545     }
1546     info->nr_in = cum.info_in_idx;
1547 
1548     /* Validate that we didn't overrun the input array. */
1549     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1550     /* Validate the backend has enough argument space. */
1551     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1552 
1553     /*
1554      * Relocate the "ref_slot" area to the end of the parameters.
1555      * Minimizing this stack offset helps code size for x86,
1556      * which has a signed 8-bit offset encoding.
1557      */
1558     if (cum.ref_slot != 0) {
1559         int ref_base = 0;
1560 
1561         if (cum.arg_slot > max_reg_slots) {
1562             int align = __alignof(Int128) / sizeof(tcg_target_long);
1563 
1564             ref_base = cum.arg_slot - max_reg_slots;
1565             if (align > 1) {
1566                 ref_base = ROUND_UP(ref_base, align);
1567             }
1568         }
1569         assert(ref_base + cum.ref_slot <= max_stk_slots);
1570         ref_base += max_reg_slots;
1571 
1572         if (ref_base != 0) {
1573             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1574                 TCGCallArgumentLoc *loc = &info->in[i];
1575                 switch (loc->kind) {
1576                 case TCG_CALL_ARG_BY_REF:
1577                 case TCG_CALL_ARG_BY_REF_N:
1578                     loc->ref_slot += ref_base;
1579                     break;
1580                 default:
1581                     break;
1582                 }
1583             }
1584         }
1585     }
1586 }
1587 
1588 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1589 static void process_constraint_sets(void);
1590 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1591                                             TCGReg reg, const char *name);
1592 
1593 static void tcg_context_init(unsigned max_threads)
1594 {
1595     TCGContext *s = &tcg_init_ctx;
1596     int n, i;
1597     TCGTemp *ts;
1598 
1599     memset(s, 0, sizeof(*s));
1600     s->nb_globals = 0;
1601 
1602     init_call_layout(&info_helper_ld32_mmu);
1603     init_call_layout(&info_helper_ld64_mmu);
1604     init_call_layout(&info_helper_ld128_mmu);
1605     init_call_layout(&info_helper_st32_mmu);
1606     init_call_layout(&info_helper_st64_mmu);
1607     init_call_layout(&info_helper_st128_mmu);
1608 
1609     tcg_target_init(s);
1610     process_constraint_sets();
1611 
1612     /* Reverse the order of the saved registers, assuming they're all at
1613        the start of tcg_target_reg_alloc_order.  */
1614     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1615         int r = tcg_target_reg_alloc_order[n];
1616         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1617             break;
1618         }
1619     }
1620     for (i = 0; i < n; ++i) {
1621         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1622     }
1623     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1624         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1625     }
1626 
1627     tcg_ctx = s;
1628     /*
1629      * In user-mode we simply share the init context among threads, since we
1630      * use a single region. See the documentation tcg_region_init() for the
1631      * reasoning behind this.
1632      * In system-mode we will have at most max_threads TCG threads.
1633      */
1634 #ifdef CONFIG_USER_ONLY
1635     tcg_ctxs = &tcg_ctx;
1636     tcg_cur_ctxs = 1;
1637     tcg_max_ctxs = 1;
1638 #else
1639     tcg_max_ctxs = max_threads;
1640     tcg_ctxs = g_new0(TCGContext *, max_threads);
1641 #endif
1642 
1643     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1644     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1645     tcg_env = temp_tcgv_ptr(ts);
1646 }
1647 
1648 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1649 {
1650     tcg_context_init(max_threads);
1651     tcg_region_init(tb_size, splitwx, max_threads);
1652 }
1653 
1654 /*
1655  * Allocate TBs right before their corresponding translated code, making
1656  * sure that TBs and code are on different cache lines.
1657  */
1658 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1659 {
1660     uintptr_t align = qemu_icache_linesize;
1661     TranslationBlock *tb;
1662     void *next;
1663 
1664  retry:
1665     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1666     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1667 
1668     if (unlikely(next > s->code_gen_highwater)) {
1669         if (tcg_region_alloc(s)) {
1670             return NULL;
1671         }
1672         goto retry;
1673     }
1674     qatomic_set(&s->code_gen_ptr, next);
1675     return tb;
1676 }
1677 
1678 void tcg_prologue_init(void)
1679 {
1680     TCGContext *s = tcg_ctx;
1681     size_t prologue_size;
1682 
1683     s->code_ptr = s->code_gen_ptr;
1684     s->code_buf = s->code_gen_ptr;
1685     s->data_gen_ptr = NULL;
1686 
1687 #ifndef CONFIG_TCG_INTERPRETER
1688     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1689 #endif
1690 
1691     s->pool_labels = NULL;
1692 
1693     qemu_thread_jit_write();
1694     /* Generate the prologue.  */
1695     tcg_target_qemu_prologue(s);
1696 
1697     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1698     {
1699         int result = tcg_out_pool_finalize(s);
1700         tcg_debug_assert(result == 0);
1701     }
1702 
1703     prologue_size = tcg_current_code_size(s);
1704     perf_report_prologue(s->code_gen_ptr, prologue_size);
1705 
1706 #ifndef CONFIG_TCG_INTERPRETER
1707     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1708                         (uintptr_t)s->code_buf, prologue_size);
1709 #endif
1710 
1711     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1712         FILE *logfile = qemu_log_trylock();
1713         if (logfile) {
1714             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1715             if (s->data_gen_ptr) {
1716                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1717                 size_t data_size = prologue_size - code_size;
1718                 size_t i;
1719 
1720                 disas(logfile, s->code_gen_ptr, code_size);
1721 
1722                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1723                     if (sizeof(tcg_target_ulong) == 8) {
1724                         fprintf(logfile,
1725                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1726                                 (uintptr_t)s->data_gen_ptr + i,
1727                                 *(uint64_t *)(s->data_gen_ptr + i));
1728                     } else {
1729                         fprintf(logfile,
1730                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1731                                 (uintptr_t)s->data_gen_ptr + i,
1732                                 *(uint32_t *)(s->data_gen_ptr + i));
1733                     }
1734                 }
1735             } else {
1736                 disas(logfile, s->code_gen_ptr, prologue_size);
1737             }
1738             fprintf(logfile, "\n");
1739             qemu_log_unlock(logfile);
1740         }
1741     }
1742 
1743 #ifndef CONFIG_TCG_INTERPRETER
1744     /*
1745      * Assert that goto_ptr is implemented completely, setting an epilogue.
1746      * For tci, we use NULL as the signal to return from the interpreter,
1747      * so skip this check.
1748      */
1749     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1750 #endif
1751 
1752     tcg_region_prologue_set(s);
1753 }
1754 
1755 void tcg_func_start(TCGContext *s)
1756 {
1757     tcg_pool_reset(s);
1758     s->nb_temps = s->nb_globals;
1759 
1760     /* No temps have been previously allocated for size or locality.  */
1761     tcg_temp_ebb_reset_freed(s);
1762 
1763     /* No constant temps have been previously allocated. */
1764     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1765         if (s->const_table[i]) {
1766             g_hash_table_remove_all(s->const_table[i]);
1767         }
1768     }
1769 
1770     s->nb_ops = 0;
1771     s->nb_labels = 0;
1772     s->current_frame_offset = s->frame_start;
1773 
1774 #ifdef CONFIG_DEBUG_TCG
1775     s->goto_tb_issue_mask = 0;
1776 #endif
1777 
1778     QTAILQ_INIT(&s->ops);
1779     QTAILQ_INIT(&s->free_ops);
1780     s->emit_before_op = NULL;
1781     QSIMPLEQ_INIT(&s->labels);
1782 
1783     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1784     tcg_debug_assert(s->insn_start_words > 0);
1785 }
1786 
1787 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1788 {
1789     int n = s->nb_temps++;
1790 
1791     if (n >= TCG_MAX_TEMPS) {
1792         tcg_raise_tb_overflow(s);
1793     }
1794     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1795 }
1796 
1797 static TCGTemp *tcg_global_alloc(TCGContext *s)
1798 {
1799     TCGTemp *ts;
1800 
1801     tcg_debug_assert(s->nb_globals == s->nb_temps);
1802     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1803     s->nb_globals++;
1804     ts = tcg_temp_alloc(s);
1805     ts->kind = TEMP_GLOBAL;
1806 
1807     return ts;
1808 }
1809 
1810 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1811                                             TCGReg reg, const char *name)
1812 {
1813     TCGTemp *ts;
1814 
1815     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1816 
1817     ts = tcg_global_alloc(s);
1818     ts->base_type = type;
1819     ts->type = type;
1820     ts->kind = TEMP_FIXED;
1821     ts->reg = reg;
1822     ts->name = name;
1823     tcg_regset_set_reg(s->reserved_regs, reg);
1824 
1825     return ts;
1826 }
1827 
1828 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1829 {
1830     s->frame_start = start;
1831     s->frame_end = start + size;
1832     s->frame_temp
1833         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1834 }
1835 
1836 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1837                                             const char *name, TCGType type)
1838 {
1839     TCGContext *s = tcg_ctx;
1840     TCGTemp *base_ts = tcgv_ptr_temp(base);
1841     TCGTemp *ts = tcg_global_alloc(s);
1842     int indirect_reg = 0;
1843 
1844     switch (base_ts->kind) {
1845     case TEMP_FIXED:
1846         break;
1847     case TEMP_GLOBAL:
1848         /* We do not support double-indirect registers.  */
1849         tcg_debug_assert(!base_ts->indirect_reg);
1850         base_ts->indirect_base = 1;
1851         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1852                             ? 2 : 1);
1853         indirect_reg = 1;
1854         break;
1855     default:
1856         g_assert_not_reached();
1857     }
1858 
1859     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1860         TCGTemp *ts2 = tcg_global_alloc(s);
1861         char buf[64];
1862 
1863         ts->base_type = TCG_TYPE_I64;
1864         ts->type = TCG_TYPE_I32;
1865         ts->indirect_reg = indirect_reg;
1866         ts->mem_allocated = 1;
1867         ts->mem_base = base_ts;
1868         ts->mem_offset = offset;
1869         pstrcpy(buf, sizeof(buf), name);
1870         pstrcat(buf, sizeof(buf), "_0");
1871         ts->name = strdup(buf);
1872 
1873         tcg_debug_assert(ts2 == ts + 1);
1874         ts2->base_type = TCG_TYPE_I64;
1875         ts2->type = TCG_TYPE_I32;
1876         ts2->indirect_reg = indirect_reg;
1877         ts2->mem_allocated = 1;
1878         ts2->mem_base = base_ts;
1879         ts2->mem_offset = offset + 4;
1880         ts2->temp_subindex = 1;
1881         pstrcpy(buf, sizeof(buf), name);
1882         pstrcat(buf, sizeof(buf), "_1");
1883         ts2->name = strdup(buf);
1884     } else {
1885         ts->base_type = type;
1886         ts->type = type;
1887         ts->indirect_reg = indirect_reg;
1888         ts->mem_allocated = 1;
1889         ts->mem_base = base_ts;
1890         ts->mem_offset = offset;
1891         ts->name = name;
1892     }
1893     return ts;
1894 }
1895 
1896 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1897 {
1898     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1899     return temp_tcgv_i32(ts);
1900 }
1901 
1902 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1903 {
1904     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1905     return temp_tcgv_i64(ts);
1906 }
1907 
1908 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1909 {
1910     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1911     return temp_tcgv_ptr(ts);
1912 }
1913 
1914 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1915 {
1916     TCGContext *s = tcg_ctx;
1917     TCGTemp *ts;
1918     int n;
1919 
1920     if (kind == TEMP_EBB) {
1921         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1922 
1923         if (idx < TCG_MAX_TEMPS) {
1924             /* There is already an available temp with the right type.  */
1925             clear_bit(idx, s->free_temps[type].l);
1926 
1927             ts = &s->temps[idx];
1928             ts->temp_allocated = 1;
1929             tcg_debug_assert(ts->base_type == type);
1930             tcg_debug_assert(ts->kind == kind);
1931             return ts;
1932         }
1933     } else {
1934         tcg_debug_assert(kind == TEMP_TB);
1935     }
1936 
1937     switch (type) {
1938     case TCG_TYPE_I32:
1939     case TCG_TYPE_V64:
1940     case TCG_TYPE_V128:
1941     case TCG_TYPE_V256:
1942         n = 1;
1943         break;
1944     case TCG_TYPE_I64:
1945         n = 64 / TCG_TARGET_REG_BITS;
1946         break;
1947     case TCG_TYPE_I128:
1948         n = 128 / TCG_TARGET_REG_BITS;
1949         break;
1950     default:
1951         g_assert_not_reached();
1952     }
1953 
1954     ts = tcg_temp_alloc(s);
1955     ts->base_type = type;
1956     ts->temp_allocated = 1;
1957     ts->kind = kind;
1958 
1959     if (n == 1) {
1960         ts->type = type;
1961     } else {
1962         ts->type = TCG_TYPE_REG;
1963 
1964         for (int i = 1; i < n; ++i) {
1965             TCGTemp *ts2 = tcg_temp_alloc(s);
1966 
1967             tcg_debug_assert(ts2 == ts + i);
1968             ts2->base_type = type;
1969             ts2->type = TCG_TYPE_REG;
1970             ts2->temp_allocated = 1;
1971             ts2->temp_subindex = i;
1972             ts2->kind = kind;
1973         }
1974     }
1975     return ts;
1976 }
1977 
1978 TCGv_i32 tcg_temp_new_i32(void)
1979 {
1980     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1981 }
1982 
1983 TCGv_i32 tcg_temp_ebb_new_i32(void)
1984 {
1985     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1986 }
1987 
1988 TCGv_i64 tcg_temp_new_i64(void)
1989 {
1990     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1991 }
1992 
1993 TCGv_i64 tcg_temp_ebb_new_i64(void)
1994 {
1995     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1996 }
1997 
1998 TCGv_ptr tcg_temp_new_ptr(void)
1999 {
2000     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
2001 }
2002 
2003 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2004 {
2005     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2006 }
2007 
2008 TCGv_i128 tcg_temp_new_i128(void)
2009 {
2010     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2011 }
2012 
2013 TCGv_i128 tcg_temp_ebb_new_i128(void)
2014 {
2015     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2016 }
2017 
2018 TCGv_vec tcg_temp_new_vec(TCGType type)
2019 {
2020     TCGTemp *t;
2021 
2022 #ifdef CONFIG_DEBUG_TCG
2023     switch (type) {
2024     case TCG_TYPE_V64:
2025         assert(TCG_TARGET_HAS_v64);
2026         break;
2027     case TCG_TYPE_V128:
2028         assert(TCG_TARGET_HAS_v128);
2029         break;
2030     case TCG_TYPE_V256:
2031         assert(TCG_TARGET_HAS_v256);
2032         break;
2033     default:
2034         g_assert_not_reached();
2035     }
2036 #endif
2037 
2038     t = tcg_temp_new_internal(type, TEMP_EBB);
2039     return temp_tcgv_vec(t);
2040 }
2041 
2042 /* Create a new temp of the same type as an existing temp.  */
2043 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2044 {
2045     TCGTemp *t = tcgv_vec_temp(match);
2046 
2047     tcg_debug_assert(t->temp_allocated != 0);
2048 
2049     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2050     return temp_tcgv_vec(t);
2051 }
2052 
2053 void tcg_temp_free_internal(TCGTemp *ts)
2054 {
2055     TCGContext *s = tcg_ctx;
2056 
2057     switch (ts->kind) {
2058     case TEMP_CONST:
2059     case TEMP_TB:
2060         /* Silently ignore free. */
2061         break;
2062     case TEMP_EBB:
2063         tcg_debug_assert(ts->temp_allocated != 0);
2064         ts->temp_allocated = 0;
2065         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2066         break;
2067     default:
2068         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2069         g_assert_not_reached();
2070     }
2071 }
2072 
2073 void tcg_temp_free_i32(TCGv_i32 arg)
2074 {
2075     tcg_temp_free_internal(tcgv_i32_temp(arg));
2076 }
2077 
2078 void tcg_temp_free_i64(TCGv_i64 arg)
2079 {
2080     tcg_temp_free_internal(tcgv_i64_temp(arg));
2081 }
2082 
2083 void tcg_temp_free_i128(TCGv_i128 arg)
2084 {
2085     tcg_temp_free_internal(tcgv_i128_temp(arg));
2086 }
2087 
2088 void tcg_temp_free_ptr(TCGv_ptr arg)
2089 {
2090     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2091 }
2092 
2093 void tcg_temp_free_vec(TCGv_vec arg)
2094 {
2095     tcg_temp_free_internal(tcgv_vec_temp(arg));
2096 }
2097 
2098 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2099 {
2100     TCGContext *s = tcg_ctx;
2101     GHashTable *h = s->const_table[type];
2102     TCGTemp *ts;
2103 
2104     if (h == NULL) {
2105         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2106         s->const_table[type] = h;
2107     }
2108 
2109     ts = g_hash_table_lookup(h, &val);
2110     if (ts == NULL) {
2111         int64_t *val_ptr;
2112 
2113         ts = tcg_temp_alloc(s);
2114 
2115         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2116             TCGTemp *ts2 = tcg_temp_alloc(s);
2117 
2118             tcg_debug_assert(ts2 == ts + 1);
2119 
2120             ts->base_type = TCG_TYPE_I64;
2121             ts->type = TCG_TYPE_I32;
2122             ts->kind = TEMP_CONST;
2123             ts->temp_allocated = 1;
2124 
2125             ts2->base_type = TCG_TYPE_I64;
2126             ts2->type = TCG_TYPE_I32;
2127             ts2->kind = TEMP_CONST;
2128             ts2->temp_allocated = 1;
2129             ts2->temp_subindex = 1;
2130 
2131             /*
2132              * Retain the full value of the 64-bit constant in the low
2133              * part, so that the hash table works.  Actual uses will
2134              * truncate the value to the low part.
2135              */
2136             ts[HOST_BIG_ENDIAN].val = val;
2137             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2138             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2139         } else {
2140             ts->base_type = type;
2141             ts->type = type;
2142             ts->kind = TEMP_CONST;
2143             ts->temp_allocated = 1;
2144             ts->val = val;
2145             val_ptr = &ts->val;
2146         }
2147         g_hash_table_insert(h, val_ptr, ts);
2148     }
2149 
2150     return ts;
2151 }
2152 
2153 TCGv_i32 tcg_constant_i32(int32_t val)
2154 {
2155     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2156 }
2157 
2158 TCGv_i64 tcg_constant_i64(int64_t val)
2159 {
2160     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2161 }
2162 
2163 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2164 {
2165     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2166 }
2167 
2168 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2169 {
2170     val = dup_const(vece, val);
2171     return temp_tcgv_vec(tcg_constant_internal(type, val));
2172 }
2173 
2174 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2175 {
2176     TCGTemp *t = tcgv_vec_temp(match);
2177 
2178     tcg_debug_assert(t->temp_allocated != 0);
2179     return tcg_constant_vec(t->base_type, vece, val);
2180 }
2181 
2182 #ifdef CONFIG_DEBUG_TCG
2183 size_t temp_idx(TCGTemp *ts)
2184 {
2185     ptrdiff_t n = ts - tcg_ctx->temps;
2186     assert(n >= 0 && n < tcg_ctx->nb_temps);
2187     return n;
2188 }
2189 
2190 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2191 {
2192     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2193 
2194     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2195     assert(o % sizeof(TCGTemp) == 0);
2196 
2197     return (void *)tcg_ctx + (uintptr_t)v;
2198 }
2199 #endif /* CONFIG_DEBUG_TCG */
2200 
2201 /*
2202  * Return true if OP may appear in the opcode stream with TYPE.
2203  * Test the runtime variable that controls each opcode.
2204  */
2205 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2206 {
2207     bool has_type;
2208 
2209     switch (type) {
2210     case TCG_TYPE_I32:
2211         has_type = true;
2212         break;
2213     case TCG_TYPE_I64:
2214         has_type = TCG_TARGET_REG_BITS == 64;
2215         break;
2216     case TCG_TYPE_V64:
2217         has_type = TCG_TARGET_HAS_v64;
2218         break;
2219     case TCG_TYPE_V128:
2220         has_type = TCG_TARGET_HAS_v128;
2221         break;
2222     case TCG_TYPE_V256:
2223         has_type = TCG_TARGET_HAS_v256;
2224         break;
2225     default:
2226         has_type = false;
2227         break;
2228     }
2229 
2230     switch (op) {
2231     case INDEX_op_discard:
2232     case INDEX_op_set_label:
2233     case INDEX_op_call:
2234     case INDEX_op_br:
2235     case INDEX_op_mb:
2236     case INDEX_op_insn_start:
2237     case INDEX_op_exit_tb:
2238     case INDEX_op_goto_tb:
2239     case INDEX_op_goto_ptr:
2240     case INDEX_op_qemu_ld_i32:
2241     case INDEX_op_qemu_st_i32:
2242     case INDEX_op_qemu_ld_i64:
2243     case INDEX_op_qemu_st_i64:
2244         return true;
2245 
2246     case INDEX_op_qemu_st8_i32:
2247         return TCG_TARGET_HAS_qemu_st8_i32;
2248 
2249     case INDEX_op_qemu_ld_i128:
2250     case INDEX_op_qemu_st_i128:
2251         return TCG_TARGET_HAS_qemu_ldst_i128;
2252 
2253     case INDEX_op_add:
2254     case INDEX_op_and:
2255     case INDEX_op_mov:
2256     case INDEX_op_or:
2257     case INDEX_op_xor:
2258         return has_type;
2259 
2260     case INDEX_op_setcond_i32:
2261     case INDEX_op_brcond_i32:
2262     case INDEX_op_movcond_i32:
2263     case INDEX_op_ld8u_i32:
2264     case INDEX_op_ld8s_i32:
2265     case INDEX_op_ld16u_i32:
2266     case INDEX_op_ld16s_i32:
2267     case INDEX_op_ld_i32:
2268     case INDEX_op_st8_i32:
2269     case INDEX_op_st16_i32:
2270     case INDEX_op_st_i32:
2271     case INDEX_op_extract_i32:
2272     case INDEX_op_sextract_i32:
2273     case INDEX_op_deposit_i32:
2274         return true;
2275 
2276     case INDEX_op_negsetcond_i32:
2277         return TCG_TARGET_HAS_negsetcond_i32;
2278     case INDEX_op_extract2_i32:
2279         return TCG_TARGET_HAS_extract2_i32;
2280     case INDEX_op_add2_i32:
2281         return TCG_TARGET_HAS_add2_i32;
2282     case INDEX_op_sub2_i32:
2283         return TCG_TARGET_HAS_sub2_i32;
2284     case INDEX_op_mulu2_i32:
2285         return TCG_TARGET_HAS_mulu2_i32;
2286     case INDEX_op_muls2_i32:
2287         return TCG_TARGET_HAS_muls2_i32;
2288     case INDEX_op_bswap16_i32:
2289         return TCG_TARGET_HAS_bswap16_i32;
2290     case INDEX_op_bswap32_i32:
2291         return TCG_TARGET_HAS_bswap32_i32;
2292     case INDEX_op_ctz_i32:
2293         return TCG_TARGET_HAS_ctz_i32;
2294     case INDEX_op_ctpop_i32:
2295         return TCG_TARGET_HAS_ctpop_i32;
2296 
2297     case INDEX_op_brcond2_i32:
2298     case INDEX_op_setcond2_i32:
2299         return TCG_TARGET_REG_BITS == 32;
2300 
2301     case INDEX_op_setcond_i64:
2302     case INDEX_op_brcond_i64:
2303     case INDEX_op_movcond_i64:
2304     case INDEX_op_ld8u_i64:
2305     case INDEX_op_ld8s_i64:
2306     case INDEX_op_ld16u_i64:
2307     case INDEX_op_ld16s_i64:
2308     case INDEX_op_ld32u_i64:
2309     case INDEX_op_ld32s_i64:
2310     case INDEX_op_ld_i64:
2311     case INDEX_op_st8_i64:
2312     case INDEX_op_st16_i64:
2313     case INDEX_op_st32_i64:
2314     case INDEX_op_st_i64:
2315     case INDEX_op_ext_i32_i64:
2316     case INDEX_op_extu_i32_i64:
2317     case INDEX_op_extract_i64:
2318     case INDEX_op_sextract_i64:
2319     case INDEX_op_deposit_i64:
2320         return TCG_TARGET_REG_BITS == 64;
2321 
2322     case INDEX_op_negsetcond_i64:
2323         return TCG_TARGET_HAS_negsetcond_i64;
2324     case INDEX_op_extract2_i64:
2325         return TCG_TARGET_HAS_extract2_i64;
2326     case INDEX_op_extrl_i64_i32:
2327     case INDEX_op_extrh_i64_i32:
2328         return TCG_TARGET_HAS_extr_i64_i32;
2329     case INDEX_op_bswap16_i64:
2330         return TCG_TARGET_HAS_bswap16_i64;
2331     case INDEX_op_bswap32_i64:
2332         return TCG_TARGET_HAS_bswap32_i64;
2333     case INDEX_op_bswap64_i64:
2334         return TCG_TARGET_HAS_bswap64_i64;
2335     case INDEX_op_ctz_i64:
2336         return TCG_TARGET_HAS_ctz_i64;
2337     case INDEX_op_ctpop_i64:
2338         return TCG_TARGET_HAS_ctpop_i64;
2339     case INDEX_op_add2_i64:
2340         return TCG_TARGET_HAS_add2_i64;
2341     case INDEX_op_sub2_i64:
2342         return TCG_TARGET_HAS_sub2_i64;
2343     case INDEX_op_mulu2_i64:
2344         return TCG_TARGET_HAS_mulu2_i64;
2345     case INDEX_op_muls2_i64:
2346         return TCG_TARGET_HAS_muls2_i64;
2347 
2348     case INDEX_op_mov_vec:
2349     case INDEX_op_dup_vec:
2350     case INDEX_op_dupm_vec:
2351     case INDEX_op_ld_vec:
2352     case INDEX_op_st_vec:
2353     case INDEX_op_add_vec:
2354     case INDEX_op_sub_vec:
2355     case INDEX_op_and_vec:
2356     case INDEX_op_or_vec:
2357     case INDEX_op_xor_vec:
2358     case INDEX_op_cmp_vec:
2359         return has_type;
2360     case INDEX_op_dup2_vec:
2361         return has_type && TCG_TARGET_REG_BITS == 32;
2362     case INDEX_op_not_vec:
2363         return has_type && TCG_TARGET_HAS_not_vec;
2364     case INDEX_op_neg_vec:
2365         return has_type && TCG_TARGET_HAS_neg_vec;
2366     case INDEX_op_abs_vec:
2367         return has_type && TCG_TARGET_HAS_abs_vec;
2368     case INDEX_op_andc_vec:
2369         return has_type && TCG_TARGET_HAS_andc_vec;
2370     case INDEX_op_orc_vec:
2371         return has_type && TCG_TARGET_HAS_orc_vec;
2372     case INDEX_op_nand_vec:
2373         return has_type && TCG_TARGET_HAS_nand_vec;
2374     case INDEX_op_nor_vec:
2375         return has_type && TCG_TARGET_HAS_nor_vec;
2376     case INDEX_op_eqv_vec:
2377         return has_type && TCG_TARGET_HAS_eqv_vec;
2378     case INDEX_op_mul_vec:
2379         return has_type && TCG_TARGET_HAS_mul_vec;
2380     case INDEX_op_shli_vec:
2381     case INDEX_op_shri_vec:
2382     case INDEX_op_sari_vec:
2383         return has_type && TCG_TARGET_HAS_shi_vec;
2384     case INDEX_op_shls_vec:
2385     case INDEX_op_shrs_vec:
2386     case INDEX_op_sars_vec:
2387         return has_type && TCG_TARGET_HAS_shs_vec;
2388     case INDEX_op_shlv_vec:
2389     case INDEX_op_shrv_vec:
2390     case INDEX_op_sarv_vec:
2391         return has_type && TCG_TARGET_HAS_shv_vec;
2392     case INDEX_op_rotli_vec:
2393         return has_type && TCG_TARGET_HAS_roti_vec;
2394     case INDEX_op_rotls_vec:
2395         return has_type && TCG_TARGET_HAS_rots_vec;
2396     case INDEX_op_rotlv_vec:
2397     case INDEX_op_rotrv_vec:
2398         return has_type && TCG_TARGET_HAS_rotv_vec;
2399     case INDEX_op_ssadd_vec:
2400     case INDEX_op_usadd_vec:
2401     case INDEX_op_sssub_vec:
2402     case INDEX_op_ussub_vec:
2403         return has_type && TCG_TARGET_HAS_sat_vec;
2404     case INDEX_op_smin_vec:
2405     case INDEX_op_umin_vec:
2406     case INDEX_op_smax_vec:
2407     case INDEX_op_umax_vec:
2408         return has_type && TCG_TARGET_HAS_minmax_vec;
2409     case INDEX_op_bitsel_vec:
2410         return has_type && TCG_TARGET_HAS_bitsel_vec;
2411     case INDEX_op_cmpsel_vec:
2412         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2413 
2414     default:
2415         if (op < INDEX_op_last_generic) {
2416             const TCGOutOp *outop;
2417             TCGConstraintSetIndex con_set;
2418 
2419             if (!has_type) {
2420                 return false;
2421             }
2422 
2423             outop = all_outop[op];
2424             tcg_debug_assert(outop != NULL);
2425 
2426             con_set = outop->static_constraint;
2427             if (con_set == C_Dynamic) {
2428                 con_set = outop->dynamic_constraint(type, flags);
2429             }
2430             if (con_set >= 0) {
2431                 return true;
2432             }
2433             tcg_debug_assert(con_set == C_NotImplemented);
2434             return false;
2435         }
2436         tcg_debug_assert(op < NB_OPS);
2437         return true;
2438 
2439     case INDEX_op_last_generic:
2440         g_assert_not_reached();
2441     }
2442 }
2443 
2444 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2445 {
2446     unsigned width;
2447 
2448     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2449     width = (type == TCG_TYPE_I32 ? 32 : 64);
2450 
2451     tcg_debug_assert(ofs < width);
2452     tcg_debug_assert(len > 0);
2453     tcg_debug_assert(len <= width - ofs);
2454 
2455     return TCG_TARGET_deposit_valid(type, ofs, len);
2456 }
2457 
2458 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2459 
2460 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2461                           TCGTemp *ret, TCGTemp **args)
2462 {
2463     TCGv_i64 extend_free[MAX_CALL_IARGS];
2464     int n_extend = 0;
2465     TCGOp *op;
2466     int i, n, pi = 0, total_args;
2467 
2468     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2469         init_call_layout(info);
2470         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2471     }
2472 
2473     total_args = info->nr_out + info->nr_in + 2;
2474     op = tcg_op_alloc(INDEX_op_call, total_args);
2475 
2476 #ifdef CONFIG_PLUGIN
2477     /* Flag helpers that may affect guest state */
2478     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2479         tcg_ctx->plugin_insn->calls_helpers = true;
2480     }
2481 #endif
2482 
2483     TCGOP_CALLO(op) = n = info->nr_out;
2484     switch (n) {
2485     case 0:
2486         tcg_debug_assert(ret == NULL);
2487         break;
2488     case 1:
2489         tcg_debug_assert(ret != NULL);
2490         op->args[pi++] = temp_arg(ret);
2491         break;
2492     case 2:
2493     case 4:
2494         tcg_debug_assert(ret != NULL);
2495         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2496         tcg_debug_assert(ret->temp_subindex == 0);
2497         for (i = 0; i < n; ++i) {
2498             op->args[pi++] = temp_arg(ret + i);
2499         }
2500         break;
2501     default:
2502         g_assert_not_reached();
2503     }
2504 
2505     TCGOP_CALLI(op) = n = info->nr_in;
2506     for (i = 0; i < n; i++) {
2507         const TCGCallArgumentLoc *loc = &info->in[i];
2508         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2509 
2510         switch (loc->kind) {
2511         case TCG_CALL_ARG_NORMAL:
2512         case TCG_CALL_ARG_BY_REF:
2513         case TCG_CALL_ARG_BY_REF_N:
2514             op->args[pi++] = temp_arg(ts);
2515             break;
2516 
2517         case TCG_CALL_ARG_EXTEND_U:
2518         case TCG_CALL_ARG_EXTEND_S:
2519             {
2520                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2521                 TCGv_i32 orig = temp_tcgv_i32(ts);
2522 
2523                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2524                     tcg_gen_ext_i32_i64(temp, orig);
2525                 } else {
2526                     tcg_gen_extu_i32_i64(temp, orig);
2527                 }
2528                 op->args[pi++] = tcgv_i64_arg(temp);
2529                 extend_free[n_extend++] = temp;
2530             }
2531             break;
2532 
2533         default:
2534             g_assert_not_reached();
2535         }
2536     }
2537     op->args[pi++] = (uintptr_t)func;
2538     op->args[pi++] = (uintptr_t)info;
2539     tcg_debug_assert(pi == total_args);
2540 
2541     if (tcg_ctx->emit_before_op) {
2542         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2543     } else {
2544         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2545     }
2546 
2547     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2548     for (i = 0; i < n_extend; ++i) {
2549         tcg_temp_free_i64(extend_free[i]);
2550     }
2551 }
2552 
2553 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2554 {
2555     tcg_gen_callN(func, info, ret, NULL);
2556 }
2557 
2558 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2559 {
2560     tcg_gen_callN(func, info, ret, &t1);
2561 }
2562 
2563 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2564                    TCGTemp *t1, TCGTemp *t2)
2565 {
2566     TCGTemp *args[2] = { t1, t2 };
2567     tcg_gen_callN(func, info, ret, args);
2568 }
2569 
2570 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2571                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2572 {
2573     TCGTemp *args[3] = { t1, t2, t3 };
2574     tcg_gen_callN(func, info, ret, args);
2575 }
2576 
2577 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2578                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2579 {
2580     TCGTemp *args[4] = { t1, t2, t3, t4 };
2581     tcg_gen_callN(func, info, ret, args);
2582 }
2583 
2584 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2585                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2586 {
2587     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2588     tcg_gen_callN(func, info, ret, args);
2589 }
2590 
2591 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2592                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2593                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2594 {
2595     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2596     tcg_gen_callN(func, info, ret, args);
2597 }
2598 
2599 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2600                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2601                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2602 {
2603     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2604     tcg_gen_callN(func, info, ret, args);
2605 }
2606 
2607 static void tcg_reg_alloc_start(TCGContext *s)
2608 {
2609     int i, n;
2610 
2611     for (i = 0, n = s->nb_temps; i < n; i++) {
2612         TCGTemp *ts = &s->temps[i];
2613         TCGTempVal val = TEMP_VAL_MEM;
2614 
2615         switch (ts->kind) {
2616         case TEMP_CONST:
2617             val = TEMP_VAL_CONST;
2618             break;
2619         case TEMP_FIXED:
2620             val = TEMP_VAL_REG;
2621             break;
2622         case TEMP_GLOBAL:
2623             break;
2624         case TEMP_EBB:
2625             val = TEMP_VAL_DEAD;
2626             /* fall through */
2627         case TEMP_TB:
2628             ts->mem_allocated = 0;
2629             break;
2630         default:
2631             g_assert_not_reached();
2632         }
2633         ts->val_type = val;
2634     }
2635 
2636     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2637 }
2638 
2639 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2640                                  TCGTemp *ts)
2641 {
2642     int idx = temp_idx(ts);
2643 
2644     switch (ts->kind) {
2645     case TEMP_FIXED:
2646     case TEMP_GLOBAL:
2647         pstrcpy(buf, buf_size, ts->name);
2648         break;
2649     case TEMP_TB:
2650         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2651         break;
2652     case TEMP_EBB:
2653         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2654         break;
2655     case TEMP_CONST:
2656         switch (ts->type) {
2657         case TCG_TYPE_I32:
2658             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2659             break;
2660 #if TCG_TARGET_REG_BITS > 32
2661         case TCG_TYPE_I64:
2662             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2663             break;
2664 #endif
2665         case TCG_TYPE_V64:
2666         case TCG_TYPE_V128:
2667         case TCG_TYPE_V256:
2668             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2669                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2670             break;
2671         default:
2672             g_assert_not_reached();
2673         }
2674         break;
2675     }
2676     return buf;
2677 }
2678 
2679 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2680                              int buf_size, TCGArg arg)
2681 {
2682     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2683 }
2684 
2685 static const char * const cond_name[] =
2686 {
2687     [TCG_COND_NEVER] = "never",
2688     [TCG_COND_ALWAYS] = "always",
2689     [TCG_COND_EQ] = "eq",
2690     [TCG_COND_NE] = "ne",
2691     [TCG_COND_LT] = "lt",
2692     [TCG_COND_GE] = "ge",
2693     [TCG_COND_LE] = "le",
2694     [TCG_COND_GT] = "gt",
2695     [TCG_COND_LTU] = "ltu",
2696     [TCG_COND_GEU] = "geu",
2697     [TCG_COND_LEU] = "leu",
2698     [TCG_COND_GTU] = "gtu",
2699     [TCG_COND_TSTEQ] = "tsteq",
2700     [TCG_COND_TSTNE] = "tstne",
2701 };
2702 
2703 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2704 {
2705     [MO_UB]   = "ub",
2706     [MO_SB]   = "sb",
2707     [MO_LEUW] = "leuw",
2708     [MO_LESW] = "lesw",
2709     [MO_LEUL] = "leul",
2710     [MO_LESL] = "lesl",
2711     [MO_LEUQ] = "leq",
2712     [MO_BEUW] = "beuw",
2713     [MO_BESW] = "besw",
2714     [MO_BEUL] = "beul",
2715     [MO_BESL] = "besl",
2716     [MO_BEUQ] = "beq",
2717     [MO_128 + MO_BE] = "beo",
2718     [MO_128 + MO_LE] = "leo",
2719 };
2720 
2721 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2722     [MO_UNALN >> MO_ASHIFT]    = "un+",
2723     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2724     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2725     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2726     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2727     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2728     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2729     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2730 };
2731 
2732 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2733     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2734     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2735     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2736     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2737     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2738     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2739 };
2740 
2741 static const char bswap_flag_name[][6] = {
2742     [TCG_BSWAP_IZ] = "iz",
2743     [TCG_BSWAP_OZ] = "oz",
2744     [TCG_BSWAP_OS] = "os",
2745     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2746     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2747 };
2748 
2749 #ifdef CONFIG_PLUGIN
2750 static const char * const plugin_from_name[] = {
2751     "from-tb",
2752     "from-insn",
2753     "after-insn",
2754     "after-tb",
2755 };
2756 #endif
2757 
2758 static inline bool tcg_regset_single(TCGRegSet d)
2759 {
2760     return (d & (d - 1)) == 0;
2761 }
2762 
2763 static inline TCGReg tcg_regset_first(TCGRegSet d)
2764 {
2765     if (TCG_TARGET_NB_REGS <= 32) {
2766         return ctz32(d);
2767     } else {
2768         return ctz64(d);
2769     }
2770 }
2771 
2772 /* Return only the number of characters output -- no error return. */
2773 #define ne_fprintf(...) \
2774     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2775 
2776 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2777 {
2778     char buf[128];
2779     TCGOp *op;
2780 
2781     QTAILQ_FOREACH(op, &s->ops, link) {
2782         int i, k, nb_oargs, nb_iargs, nb_cargs;
2783         const TCGOpDef *def;
2784         TCGOpcode c;
2785         int col = 0;
2786 
2787         c = op->opc;
2788         def = &tcg_op_defs[c];
2789 
2790         if (c == INDEX_op_insn_start) {
2791             nb_oargs = 0;
2792             col += ne_fprintf(f, "\n ----");
2793 
2794             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2795                 col += ne_fprintf(f, " %016" PRIx64,
2796                                   tcg_get_insn_start_param(op, i));
2797             }
2798         } else if (c == INDEX_op_call) {
2799             const TCGHelperInfo *info = tcg_call_info(op);
2800             void *func = tcg_call_func(op);
2801 
2802             /* variable number of arguments */
2803             nb_oargs = TCGOP_CALLO(op);
2804             nb_iargs = TCGOP_CALLI(op);
2805             nb_cargs = def->nb_cargs;
2806 
2807             col += ne_fprintf(f, " %s ", def->name);
2808 
2809             /*
2810              * Print the function name from TCGHelperInfo, if available.
2811              * Note that plugins have a template function for the info,
2812              * but the actual function pointer comes from the plugin.
2813              */
2814             if (func == info->func) {
2815                 col += ne_fprintf(f, "%s", info->name);
2816             } else {
2817                 col += ne_fprintf(f, "plugin(%p)", func);
2818             }
2819 
2820             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2821             for (i = 0; i < nb_oargs; i++) {
2822                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2823                                                             op->args[i]));
2824             }
2825             for (i = 0; i < nb_iargs; i++) {
2826                 TCGArg arg = op->args[nb_oargs + i];
2827                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2828                 col += ne_fprintf(f, ",%s", t);
2829             }
2830         } else {
2831             if (def->flags & TCG_OPF_INT) {
2832                 col += ne_fprintf(f, " %s_i%d ",
2833                                   def->name,
2834                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2835             } else if (def->flags & TCG_OPF_VECTOR) {
2836                 col += ne_fprintf(f, "%s v%d,e%d,",
2837                                   def->name,
2838                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2839                                   8 << TCGOP_VECE(op));
2840             } else {
2841                 col += ne_fprintf(f, " %s ", def->name);
2842             }
2843 
2844             nb_oargs = def->nb_oargs;
2845             nb_iargs = def->nb_iargs;
2846             nb_cargs = def->nb_cargs;
2847 
2848             k = 0;
2849             for (i = 0; i < nb_oargs; i++) {
2850                 const char *sep =  k ? "," : "";
2851                 col += ne_fprintf(f, "%s%s", sep,
2852                                   tcg_get_arg_str(s, buf, sizeof(buf),
2853                                                   op->args[k++]));
2854             }
2855             for (i = 0; i < nb_iargs; i++) {
2856                 const char *sep =  k ? "," : "";
2857                 col += ne_fprintf(f, "%s%s", sep,
2858                                   tcg_get_arg_str(s, buf, sizeof(buf),
2859                                                   op->args[k++]));
2860             }
2861             switch (c) {
2862             case INDEX_op_brcond_i32:
2863             case INDEX_op_setcond_i32:
2864             case INDEX_op_negsetcond_i32:
2865             case INDEX_op_movcond_i32:
2866             case INDEX_op_brcond2_i32:
2867             case INDEX_op_setcond2_i32:
2868             case INDEX_op_brcond_i64:
2869             case INDEX_op_setcond_i64:
2870             case INDEX_op_negsetcond_i64:
2871             case INDEX_op_movcond_i64:
2872             case INDEX_op_cmp_vec:
2873             case INDEX_op_cmpsel_vec:
2874                 if (op->args[k] < ARRAY_SIZE(cond_name)
2875                     && cond_name[op->args[k]]) {
2876                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2877                 } else {
2878                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2879                 }
2880                 i = 1;
2881                 break;
2882             case INDEX_op_qemu_ld_i32:
2883             case INDEX_op_qemu_st_i32:
2884             case INDEX_op_qemu_st8_i32:
2885             case INDEX_op_qemu_ld_i64:
2886             case INDEX_op_qemu_st_i64:
2887             case INDEX_op_qemu_ld_i128:
2888             case INDEX_op_qemu_st_i128:
2889                 {
2890                     const char *s_al, *s_op, *s_at;
2891                     MemOpIdx oi = op->args[k++];
2892                     MemOp mop = get_memop(oi);
2893                     unsigned ix = get_mmuidx(oi);
2894 
2895                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2896                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2897                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2898                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2899 
2900                     /* If all fields are accounted for, print symbolically. */
2901                     if (!mop && s_al && s_op && s_at) {
2902                         col += ne_fprintf(f, ",%s%s%s,%u",
2903                                           s_at, s_al, s_op, ix);
2904                     } else {
2905                         mop = get_memop(oi);
2906                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2907                     }
2908                     i = 1;
2909                 }
2910                 break;
2911             case INDEX_op_bswap16_i32:
2912             case INDEX_op_bswap16_i64:
2913             case INDEX_op_bswap32_i32:
2914             case INDEX_op_bswap32_i64:
2915             case INDEX_op_bswap64_i64:
2916                 {
2917                     TCGArg flags = op->args[k];
2918                     const char *name = NULL;
2919 
2920                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2921                         name = bswap_flag_name[flags];
2922                     }
2923                     if (name) {
2924                         col += ne_fprintf(f, ",%s", name);
2925                     } else {
2926                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2927                     }
2928                     i = k = 1;
2929                 }
2930                 break;
2931 #ifdef CONFIG_PLUGIN
2932             case INDEX_op_plugin_cb:
2933                 {
2934                     TCGArg from = op->args[k++];
2935                     const char *name = NULL;
2936 
2937                     if (from < ARRAY_SIZE(plugin_from_name)) {
2938                         name = plugin_from_name[from];
2939                     }
2940                     if (name) {
2941                         col += ne_fprintf(f, "%s", name);
2942                     } else {
2943                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2944                     }
2945                     i = 1;
2946                 }
2947                 break;
2948 #endif
2949             default:
2950                 i = 0;
2951                 break;
2952             }
2953             switch (c) {
2954             case INDEX_op_set_label:
2955             case INDEX_op_br:
2956             case INDEX_op_brcond_i32:
2957             case INDEX_op_brcond_i64:
2958             case INDEX_op_brcond2_i32:
2959                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2960                                   arg_label(op->args[k])->id);
2961                 i++, k++;
2962                 break;
2963             case INDEX_op_mb:
2964                 {
2965                     TCGBar membar = op->args[k];
2966                     const char *b_op, *m_op;
2967 
2968                     switch (membar & TCG_BAR_SC) {
2969                     case 0:
2970                         b_op = "none";
2971                         break;
2972                     case TCG_BAR_LDAQ:
2973                         b_op = "acq";
2974                         break;
2975                     case TCG_BAR_STRL:
2976                         b_op = "rel";
2977                         break;
2978                     case TCG_BAR_SC:
2979                         b_op = "seq";
2980                         break;
2981                     default:
2982                         g_assert_not_reached();
2983                     }
2984 
2985                     switch (membar & TCG_MO_ALL) {
2986                     case 0:
2987                         m_op = "none";
2988                         break;
2989                     case TCG_MO_LD_LD:
2990                         m_op = "rr";
2991                         break;
2992                     case TCG_MO_LD_ST:
2993                         m_op = "rw";
2994                         break;
2995                     case TCG_MO_ST_LD:
2996                         m_op = "wr";
2997                         break;
2998                     case TCG_MO_ST_ST:
2999                         m_op = "ww";
3000                         break;
3001                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3002                         m_op = "rr+rw";
3003                         break;
3004                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3005                         m_op = "rr+wr";
3006                         break;
3007                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3008                         m_op = "rr+ww";
3009                         break;
3010                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3011                         m_op = "rw+wr";
3012                         break;
3013                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3014                         m_op = "rw+ww";
3015                         break;
3016                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3017                         m_op = "wr+ww";
3018                         break;
3019                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3020                         m_op = "rr+rw+wr";
3021                         break;
3022                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3023                         m_op = "rr+rw+ww";
3024                         break;
3025                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3026                         m_op = "rr+wr+ww";
3027                         break;
3028                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3029                         m_op = "rw+wr+ww";
3030                         break;
3031                     case TCG_MO_ALL:
3032                         m_op = "all";
3033                         break;
3034                     default:
3035                         g_assert_not_reached();
3036                     }
3037 
3038                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3039                     i++, k++;
3040                 }
3041                 break;
3042             default:
3043                 break;
3044             }
3045             for (; i < nb_cargs; i++, k++) {
3046                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3047                                   op->args[k]);
3048             }
3049         }
3050 
3051         if (have_prefs || op->life) {
3052             for (; col < 40; ++col) {
3053                 putc(' ', f);
3054             }
3055         }
3056 
3057         if (op->life) {
3058             unsigned life = op->life;
3059 
3060             if (life & (SYNC_ARG * 3)) {
3061                 ne_fprintf(f, "  sync:");
3062                 for (i = 0; i < 2; ++i) {
3063                     if (life & (SYNC_ARG << i)) {
3064                         ne_fprintf(f, " %d", i);
3065                     }
3066                 }
3067             }
3068             life /= DEAD_ARG;
3069             if (life) {
3070                 ne_fprintf(f, "  dead:");
3071                 for (i = 0; life; ++i, life >>= 1) {
3072                     if (life & 1) {
3073                         ne_fprintf(f, " %d", i);
3074                     }
3075                 }
3076             }
3077         }
3078 
3079         if (have_prefs) {
3080             for (i = 0; i < nb_oargs; ++i) {
3081                 TCGRegSet set = output_pref(op, i);
3082 
3083                 if (i == 0) {
3084                     ne_fprintf(f, "  pref=");
3085                 } else {
3086                     ne_fprintf(f, ",");
3087                 }
3088                 if (set == 0) {
3089                     ne_fprintf(f, "none");
3090                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3091                     ne_fprintf(f, "all");
3092 #ifdef CONFIG_DEBUG_TCG
3093                 } else if (tcg_regset_single(set)) {
3094                     TCGReg reg = tcg_regset_first(set);
3095                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3096 #endif
3097                 } else if (TCG_TARGET_NB_REGS <= 32) {
3098                     ne_fprintf(f, "0x%x", (uint32_t)set);
3099                 } else {
3100                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3101                 }
3102             }
3103         }
3104 
3105         putc('\n', f);
3106     }
3107 }
3108 
3109 /* we give more priority to constraints with less registers */
3110 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3111 {
3112     int n;
3113 
3114     arg_ct += k;
3115     n = ctpop64(arg_ct->regs);
3116 
3117     /*
3118      * Sort constraints of a single register first, which includes output
3119      * aliases (which must exactly match the input already allocated).
3120      */
3121     if (n == 1 || arg_ct->oalias) {
3122         return INT_MAX;
3123     }
3124 
3125     /*
3126      * Sort register pairs next, first then second immediately after.
3127      * Arbitrarily sort multiple pairs by the index of the first reg;
3128      * there shouldn't be many pairs.
3129      */
3130     switch (arg_ct->pair) {
3131     case 1:
3132     case 3:
3133         return (k + 1) * 2;
3134     case 2:
3135         return (arg_ct->pair_index + 1) * 2 - 1;
3136     }
3137 
3138     /* Finally, sort by decreasing register count. */
3139     assert(n > 1);
3140     return -n;
3141 }
3142 
3143 /* sort from highest priority to lowest */
3144 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3145 {
3146     int i, j;
3147 
3148     for (i = 0; i < n; i++) {
3149         a[start + i].sort_index = start + i;
3150     }
3151     if (n <= 1) {
3152         return;
3153     }
3154     for (i = 0; i < n - 1; i++) {
3155         for (j = i + 1; j < n; j++) {
3156             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3157             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3158             if (p1 < p2) {
3159                 int tmp = a[start + i].sort_index;
3160                 a[start + i].sort_index = a[start + j].sort_index;
3161                 a[start + j].sort_index = tmp;
3162             }
3163         }
3164     }
3165 }
3166 
3167 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3168 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3169 
3170 static void process_constraint_sets(void)
3171 {
3172     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3173         const TCGConstraintSet *tdefs = &constraint_sets[c];
3174         TCGArgConstraint *args_ct = all_cts[c];
3175         int nb_oargs = tdefs->nb_oargs;
3176         int nb_iargs = tdefs->nb_iargs;
3177         int nb_args = nb_oargs + nb_iargs;
3178         bool saw_alias_pair = false;
3179 
3180         for (int i = 0; i < nb_args; i++) {
3181             const char *ct_str = tdefs->args_ct_str[i];
3182             bool input_p = i >= nb_oargs;
3183             int o;
3184 
3185             switch (*ct_str) {
3186             case '0' ... '9':
3187                 o = *ct_str - '0';
3188                 tcg_debug_assert(input_p);
3189                 tcg_debug_assert(o < nb_oargs);
3190                 tcg_debug_assert(args_ct[o].regs != 0);
3191                 tcg_debug_assert(!args_ct[o].oalias);
3192                 args_ct[i] = args_ct[o];
3193                 /* The output sets oalias.  */
3194                 args_ct[o].oalias = 1;
3195                 args_ct[o].alias_index = i;
3196                 /* The input sets ialias. */
3197                 args_ct[i].ialias = 1;
3198                 args_ct[i].alias_index = o;
3199                 if (args_ct[i].pair) {
3200                     saw_alias_pair = true;
3201                 }
3202                 tcg_debug_assert(ct_str[1] == '\0');
3203                 continue;
3204 
3205             case '&':
3206                 tcg_debug_assert(!input_p);
3207                 args_ct[i].newreg = true;
3208                 ct_str++;
3209                 break;
3210 
3211             case 'p': /* plus */
3212                 /* Allocate to the register after the previous. */
3213                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3214                 o = i - 1;
3215                 tcg_debug_assert(!args_ct[o].pair);
3216                 tcg_debug_assert(!args_ct[o].ct);
3217                 args_ct[i] = (TCGArgConstraint){
3218                     .pair = 2,
3219                     .pair_index = o,
3220                     .regs = args_ct[o].regs << 1,
3221                     .newreg = args_ct[o].newreg,
3222                 };
3223                 args_ct[o].pair = 1;
3224                 args_ct[o].pair_index = i;
3225                 tcg_debug_assert(ct_str[1] == '\0');
3226                 continue;
3227 
3228             case 'm': /* minus */
3229                 /* Allocate to the register before the previous. */
3230                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3231                 o = i - 1;
3232                 tcg_debug_assert(!args_ct[o].pair);
3233                 tcg_debug_assert(!args_ct[o].ct);
3234                 args_ct[i] = (TCGArgConstraint){
3235                     .pair = 1,
3236                     .pair_index = o,
3237                     .regs = args_ct[o].regs >> 1,
3238                     .newreg = args_ct[o].newreg,
3239                 };
3240                 args_ct[o].pair = 2;
3241                 args_ct[o].pair_index = i;
3242                 tcg_debug_assert(ct_str[1] == '\0');
3243                 continue;
3244             }
3245 
3246             do {
3247                 switch (*ct_str) {
3248                 case 'i':
3249                     args_ct[i].ct |= TCG_CT_CONST;
3250                     break;
3251 #ifdef TCG_REG_ZERO
3252                 case 'z':
3253                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3254                     break;
3255 #endif
3256 
3257                 /* Include all of the target-specific constraints. */
3258 
3259 #undef CONST
3260 #define CONST(CASE, MASK) \
3261     case CASE: args_ct[i].ct |= MASK; break;
3262 #define REGS(CASE, MASK) \
3263     case CASE: args_ct[i].regs |= MASK; break;
3264 
3265 #include "tcg-target-con-str.h"
3266 
3267 #undef REGS
3268 #undef CONST
3269                 default:
3270                 case '0' ... '9':
3271                 case '&':
3272                 case 'p':
3273                 case 'm':
3274                     /* Typo in TCGConstraintSet constraint. */
3275                     g_assert_not_reached();
3276                 }
3277             } while (*++ct_str != '\0');
3278         }
3279 
3280         /*
3281          * Fix up output pairs that are aliased with inputs.
3282          * When we created the alias, we copied pair from the output.
3283          * There are three cases:
3284          *    (1a) Pairs of inputs alias pairs of outputs.
3285          *    (1b) One input aliases the first of a pair of outputs.
3286          *    (2)  One input aliases the second of a pair of outputs.
3287          *
3288          * Case 1a is handled by making sure that the pair_index'es are
3289          * properly updated so that they appear the same as a pair of inputs.
3290          *
3291          * Case 1b is handled by setting the pair_index of the input to
3292          * itself, simply so it doesn't point to an unrelated argument.
3293          * Since we don't encounter the "second" during the input allocation
3294          * phase, nothing happens with the second half of the input pair.
3295          *
3296          * Case 2 is handled by setting the second input to pair=3, the
3297          * first output to pair=3, and the pair_index'es to match.
3298          */
3299         if (saw_alias_pair) {
3300             for (int i = nb_oargs; i < nb_args; i++) {
3301                 int o, o2, i2;
3302 
3303                 /*
3304                  * Since [0-9pm] must be alone in the constraint string,
3305                  * the only way they can both be set is if the pair comes
3306                  * from the output alias.
3307                  */
3308                 if (!args_ct[i].ialias) {
3309                     continue;
3310                 }
3311                 switch (args_ct[i].pair) {
3312                 case 0:
3313                     break;
3314                 case 1:
3315                     o = args_ct[i].alias_index;
3316                     o2 = args_ct[o].pair_index;
3317                     tcg_debug_assert(args_ct[o].pair == 1);
3318                     tcg_debug_assert(args_ct[o2].pair == 2);
3319                     if (args_ct[o2].oalias) {
3320                         /* Case 1a */
3321                         i2 = args_ct[o2].alias_index;
3322                         tcg_debug_assert(args_ct[i2].pair == 2);
3323                         args_ct[i2].pair_index = i;
3324                         args_ct[i].pair_index = i2;
3325                     } else {
3326                         /* Case 1b */
3327                         args_ct[i].pair_index = i;
3328                     }
3329                     break;
3330                 case 2:
3331                     o = args_ct[i].alias_index;
3332                     o2 = args_ct[o].pair_index;
3333                     tcg_debug_assert(args_ct[o].pair == 2);
3334                     tcg_debug_assert(args_ct[o2].pair == 1);
3335                     if (args_ct[o2].oalias) {
3336                         /* Case 1a */
3337                         i2 = args_ct[o2].alias_index;
3338                         tcg_debug_assert(args_ct[i2].pair == 1);
3339                         args_ct[i2].pair_index = i;
3340                         args_ct[i].pair_index = i2;
3341                     } else {
3342                         /* Case 2 */
3343                         args_ct[i].pair = 3;
3344                         args_ct[o2].pair = 3;
3345                         args_ct[i].pair_index = o2;
3346                         args_ct[o2].pair_index = i;
3347                     }
3348                     break;
3349                 default:
3350                     g_assert_not_reached();
3351                 }
3352             }
3353         }
3354 
3355         /* sort the constraints (XXX: this is just an heuristic) */
3356         sort_constraints(args_ct, 0, nb_oargs);
3357         sort_constraints(args_ct, nb_oargs, nb_iargs);
3358     }
3359 }
3360 
3361 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3362 {
3363     TCGOpcode opc = op->opc;
3364     TCGType type = TCGOP_TYPE(op);
3365     unsigned flags = TCGOP_FLAGS(op);
3366     const TCGOpDef *def = &tcg_op_defs[opc];
3367     const TCGOutOp *outop = all_outop[opc];
3368     TCGConstraintSetIndex con_set;
3369 
3370     if (def->flags & TCG_OPF_NOT_PRESENT) {
3371         return empty_cts;
3372     }
3373 
3374     if (outop) {
3375         con_set = outop->static_constraint;
3376         if (con_set == C_Dynamic) {
3377             con_set = outop->dynamic_constraint(type, flags);
3378         }
3379     } else {
3380         con_set = tcg_target_op_def(opc, type, flags);
3381     }
3382     tcg_debug_assert(con_set >= 0);
3383     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3384 
3385     /* The constraint arguments must match TCGOpcode arguments. */
3386     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3387     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3388 
3389     return all_cts[con_set];
3390 }
3391 
3392 static void remove_label_use(TCGOp *op, int idx)
3393 {
3394     TCGLabel *label = arg_label(op->args[idx]);
3395     TCGLabelUse *use;
3396 
3397     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3398         if (use->op == op) {
3399             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3400             return;
3401         }
3402     }
3403     g_assert_not_reached();
3404 }
3405 
3406 void tcg_op_remove(TCGContext *s, TCGOp *op)
3407 {
3408     switch (op->opc) {
3409     case INDEX_op_br:
3410         remove_label_use(op, 0);
3411         break;
3412     case INDEX_op_brcond_i32:
3413     case INDEX_op_brcond_i64:
3414         remove_label_use(op, 3);
3415         break;
3416     case INDEX_op_brcond2_i32:
3417         remove_label_use(op, 5);
3418         break;
3419     default:
3420         break;
3421     }
3422 
3423     QTAILQ_REMOVE(&s->ops, op, link);
3424     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3425     s->nb_ops--;
3426 }
3427 
3428 void tcg_remove_ops_after(TCGOp *op)
3429 {
3430     TCGContext *s = tcg_ctx;
3431 
3432     while (true) {
3433         TCGOp *last = tcg_last_op();
3434         if (last == op) {
3435             return;
3436         }
3437         tcg_op_remove(s, last);
3438     }
3439 }
3440 
3441 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3442 {
3443     TCGContext *s = tcg_ctx;
3444     TCGOp *op = NULL;
3445 
3446     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3447         QTAILQ_FOREACH(op, &s->free_ops, link) {
3448             if (nargs <= op->nargs) {
3449                 QTAILQ_REMOVE(&s->free_ops, op, link);
3450                 nargs = op->nargs;
3451                 goto found;
3452             }
3453         }
3454     }
3455 
3456     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3457     nargs = MAX(4, nargs);
3458     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3459 
3460  found:
3461     memset(op, 0, offsetof(TCGOp, link));
3462     op->opc = opc;
3463     op->nargs = nargs;
3464 
3465     /* Check for bitfield overflow. */
3466     tcg_debug_assert(op->nargs == nargs);
3467 
3468     s->nb_ops++;
3469     return op;
3470 }
3471 
3472 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3473 {
3474     TCGOp *op = tcg_op_alloc(opc, nargs);
3475 
3476     if (tcg_ctx->emit_before_op) {
3477         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3478     } else {
3479         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3480     }
3481     return op;
3482 }
3483 
3484 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3485                             TCGOpcode opc, TCGType type, unsigned nargs)
3486 {
3487     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3488 
3489     TCGOP_TYPE(new_op) = type;
3490     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3491     return new_op;
3492 }
3493 
3494 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3495                            TCGOpcode opc, TCGType type, unsigned nargs)
3496 {
3497     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3498 
3499     TCGOP_TYPE(new_op) = type;
3500     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3501     return new_op;
3502 }
3503 
3504 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3505 {
3506     TCGLabelUse *u;
3507 
3508     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3509         TCGOp *op = u->op;
3510         switch (op->opc) {
3511         case INDEX_op_br:
3512             op->args[0] = label_arg(to);
3513             break;
3514         case INDEX_op_brcond_i32:
3515         case INDEX_op_brcond_i64:
3516             op->args[3] = label_arg(to);
3517             break;
3518         case INDEX_op_brcond2_i32:
3519             op->args[5] = label_arg(to);
3520             break;
3521         default:
3522             g_assert_not_reached();
3523         }
3524     }
3525 
3526     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3527 }
3528 
3529 /* Reachable analysis : remove unreachable code.  */
3530 static void __attribute__((noinline))
3531 reachable_code_pass(TCGContext *s)
3532 {
3533     TCGOp *op, *op_next, *op_prev;
3534     bool dead = false;
3535 
3536     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3537         bool remove = dead;
3538         TCGLabel *label;
3539 
3540         switch (op->opc) {
3541         case INDEX_op_set_label:
3542             label = arg_label(op->args[0]);
3543 
3544             /*
3545              * Note that the first op in the TB is always a load,
3546              * so there is always something before a label.
3547              */
3548             op_prev = QTAILQ_PREV(op, link);
3549 
3550             /*
3551              * If we find two sequential labels, move all branches to
3552              * reference the second label and remove the first label.
3553              * Do this before branch to next optimization, so that the
3554              * middle label is out of the way.
3555              */
3556             if (op_prev->opc == INDEX_op_set_label) {
3557                 move_label_uses(label, arg_label(op_prev->args[0]));
3558                 tcg_op_remove(s, op_prev);
3559                 op_prev = QTAILQ_PREV(op, link);
3560             }
3561 
3562             /*
3563              * Optimization can fold conditional branches to unconditional.
3564              * If we find a label which is preceded by an unconditional
3565              * branch to next, remove the branch.  We couldn't do this when
3566              * processing the branch because any dead code between the branch
3567              * and label had not yet been removed.
3568              */
3569             if (op_prev->opc == INDEX_op_br &&
3570                 label == arg_label(op_prev->args[0])) {
3571                 tcg_op_remove(s, op_prev);
3572                 /* Fall through means insns become live again.  */
3573                 dead = false;
3574             }
3575 
3576             if (QSIMPLEQ_EMPTY(&label->branches)) {
3577                 /*
3578                  * While there is an occasional backward branch, virtually
3579                  * all branches generated by the translators are forward.
3580                  * Which means that generally we will have already removed
3581                  * all references to the label that will be, and there is
3582                  * little to be gained by iterating.
3583                  */
3584                 remove = true;
3585             } else {
3586                 /* Once we see a label, insns become live again.  */
3587                 dead = false;
3588                 remove = false;
3589             }
3590             break;
3591 
3592         case INDEX_op_br:
3593         case INDEX_op_exit_tb:
3594         case INDEX_op_goto_ptr:
3595             /* Unconditional branches; everything following is dead.  */
3596             dead = true;
3597             break;
3598 
3599         case INDEX_op_call:
3600             /* Notice noreturn helper calls, raising exceptions.  */
3601             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3602                 dead = true;
3603             }
3604             break;
3605 
3606         case INDEX_op_insn_start:
3607             /* Never remove -- we need to keep these for unwind.  */
3608             remove = false;
3609             break;
3610 
3611         default:
3612             break;
3613         }
3614 
3615         if (remove) {
3616             tcg_op_remove(s, op);
3617         }
3618     }
3619 }
3620 
3621 #define TS_DEAD  1
3622 #define TS_MEM   2
3623 
3624 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3625 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3626 
3627 /* For liveness_pass_1, the register preferences for a given temp.  */
3628 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3629 {
3630     return ts->state_ptr;
3631 }
3632 
3633 /* For liveness_pass_1, reset the preferences for a given temp to the
3634  * maximal regset for its type.
3635  */
3636 static inline void la_reset_pref(TCGTemp *ts)
3637 {
3638     *la_temp_pref(ts)
3639         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3640 }
3641 
3642 /* liveness analysis: end of function: all temps are dead, and globals
3643    should be in memory. */
3644 static void la_func_end(TCGContext *s, int ng, int nt)
3645 {
3646     int i;
3647 
3648     for (i = 0; i < ng; ++i) {
3649         s->temps[i].state = TS_DEAD | TS_MEM;
3650         la_reset_pref(&s->temps[i]);
3651     }
3652     for (i = ng; i < nt; ++i) {
3653         s->temps[i].state = TS_DEAD;
3654         la_reset_pref(&s->temps[i]);
3655     }
3656 }
3657 
3658 /* liveness analysis: end of basic block: all temps are dead, globals
3659    and local temps should be in memory. */
3660 static void la_bb_end(TCGContext *s, int ng, int nt)
3661 {
3662     int i;
3663 
3664     for (i = 0; i < nt; ++i) {
3665         TCGTemp *ts = &s->temps[i];
3666         int state;
3667 
3668         switch (ts->kind) {
3669         case TEMP_FIXED:
3670         case TEMP_GLOBAL:
3671         case TEMP_TB:
3672             state = TS_DEAD | TS_MEM;
3673             break;
3674         case TEMP_EBB:
3675         case TEMP_CONST:
3676             state = TS_DEAD;
3677             break;
3678         default:
3679             g_assert_not_reached();
3680         }
3681         ts->state = state;
3682         la_reset_pref(ts);
3683     }
3684 }
3685 
3686 /* liveness analysis: sync globals back to memory.  */
3687 static void la_global_sync(TCGContext *s, int ng)
3688 {
3689     int i;
3690 
3691     for (i = 0; i < ng; ++i) {
3692         int state = s->temps[i].state;
3693         s->temps[i].state = state | TS_MEM;
3694         if (state == TS_DEAD) {
3695             /* If the global was previously dead, reset prefs.  */
3696             la_reset_pref(&s->temps[i]);
3697         }
3698     }
3699 }
3700 
3701 /*
3702  * liveness analysis: conditional branch: all temps are dead unless
3703  * explicitly live-across-conditional-branch, globals and local temps
3704  * should be synced.
3705  */
3706 static void la_bb_sync(TCGContext *s, int ng, int nt)
3707 {
3708     la_global_sync(s, ng);
3709 
3710     for (int i = ng; i < nt; ++i) {
3711         TCGTemp *ts = &s->temps[i];
3712         int state;
3713 
3714         switch (ts->kind) {
3715         case TEMP_TB:
3716             state = ts->state;
3717             ts->state = state | TS_MEM;
3718             if (state != TS_DEAD) {
3719                 continue;
3720             }
3721             break;
3722         case TEMP_EBB:
3723         case TEMP_CONST:
3724             continue;
3725         default:
3726             g_assert_not_reached();
3727         }
3728         la_reset_pref(&s->temps[i]);
3729     }
3730 }
3731 
3732 /* liveness analysis: sync globals back to memory and kill.  */
3733 static void la_global_kill(TCGContext *s, int ng)
3734 {
3735     int i;
3736 
3737     for (i = 0; i < ng; i++) {
3738         s->temps[i].state = TS_DEAD | TS_MEM;
3739         la_reset_pref(&s->temps[i]);
3740     }
3741 }
3742 
3743 /* liveness analysis: note live globals crossing calls.  */
3744 static void la_cross_call(TCGContext *s, int nt)
3745 {
3746     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3747     int i;
3748 
3749     for (i = 0; i < nt; i++) {
3750         TCGTemp *ts = &s->temps[i];
3751         if (!(ts->state & TS_DEAD)) {
3752             TCGRegSet *pset = la_temp_pref(ts);
3753             TCGRegSet set = *pset;
3754 
3755             set &= mask;
3756             /* If the combination is not possible, restart.  */
3757             if (set == 0) {
3758                 set = tcg_target_available_regs[ts->type] & mask;
3759             }
3760             *pset = set;
3761         }
3762     }
3763 }
3764 
3765 /*
3766  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3767  * to TEMP_EBB, if possible.
3768  */
3769 static void __attribute__((noinline))
3770 liveness_pass_0(TCGContext *s)
3771 {
3772     void * const multiple_ebb = (void *)(uintptr_t)-1;
3773     int nb_temps = s->nb_temps;
3774     TCGOp *op, *ebb;
3775 
3776     for (int i = s->nb_globals; i < nb_temps; ++i) {
3777         s->temps[i].state_ptr = NULL;
3778     }
3779 
3780     /*
3781      * Represent each EBB by the op at which it begins.  In the case of
3782      * the first EBB, this is the first op, otherwise it is a label.
3783      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3784      * within a single EBB, else MULTIPLE_EBB.
3785      */
3786     ebb = QTAILQ_FIRST(&s->ops);
3787     QTAILQ_FOREACH(op, &s->ops, link) {
3788         const TCGOpDef *def;
3789         int nb_oargs, nb_iargs;
3790 
3791         switch (op->opc) {
3792         case INDEX_op_set_label:
3793             ebb = op;
3794             continue;
3795         case INDEX_op_discard:
3796             continue;
3797         case INDEX_op_call:
3798             nb_oargs = TCGOP_CALLO(op);
3799             nb_iargs = TCGOP_CALLI(op);
3800             break;
3801         default:
3802             def = &tcg_op_defs[op->opc];
3803             nb_oargs = def->nb_oargs;
3804             nb_iargs = def->nb_iargs;
3805             break;
3806         }
3807 
3808         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3809             TCGTemp *ts = arg_temp(op->args[i]);
3810 
3811             if (ts->kind != TEMP_TB) {
3812                 continue;
3813             }
3814             if (ts->state_ptr == NULL) {
3815                 ts->state_ptr = ebb;
3816             } else if (ts->state_ptr != ebb) {
3817                 ts->state_ptr = multiple_ebb;
3818             }
3819         }
3820     }
3821 
3822     /*
3823      * For TEMP_TB that turned out not to be used beyond one EBB,
3824      * reduce the liveness to TEMP_EBB.
3825      */
3826     for (int i = s->nb_globals; i < nb_temps; ++i) {
3827         TCGTemp *ts = &s->temps[i];
3828         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3829             ts->kind = TEMP_EBB;
3830         }
3831     }
3832 }
3833 
3834 /* Liveness analysis : update the opc_arg_life array to tell if a
3835    given input arguments is dead. Instructions updating dead
3836    temporaries are removed. */
3837 static void __attribute__((noinline))
3838 liveness_pass_1(TCGContext *s)
3839 {
3840     int nb_globals = s->nb_globals;
3841     int nb_temps = s->nb_temps;
3842     TCGOp *op, *op_prev;
3843     TCGRegSet *prefs;
3844     int i;
3845 
3846     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3847     for (i = 0; i < nb_temps; ++i) {
3848         s->temps[i].state_ptr = prefs + i;
3849     }
3850 
3851     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3852     la_func_end(s, nb_globals, nb_temps);
3853 
3854     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3855         int nb_iargs, nb_oargs;
3856         TCGOpcode opc_new, opc_new2;
3857         TCGLifeData arg_life = 0;
3858         TCGTemp *ts;
3859         TCGOpcode opc = op->opc;
3860         const TCGOpDef *def = &tcg_op_defs[opc];
3861         const TCGArgConstraint *args_ct;
3862 
3863         switch (opc) {
3864         case INDEX_op_call:
3865             {
3866                 const TCGHelperInfo *info = tcg_call_info(op);
3867                 int call_flags = tcg_call_flags(op);
3868 
3869                 nb_oargs = TCGOP_CALLO(op);
3870                 nb_iargs = TCGOP_CALLI(op);
3871 
3872                 /* pure functions can be removed if their result is unused */
3873                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3874                     for (i = 0; i < nb_oargs; i++) {
3875                         ts = arg_temp(op->args[i]);
3876                         if (ts->state != TS_DEAD) {
3877                             goto do_not_remove_call;
3878                         }
3879                     }
3880                     goto do_remove;
3881                 }
3882             do_not_remove_call:
3883 
3884                 /* Output args are dead.  */
3885                 for (i = 0; i < nb_oargs; i++) {
3886                     ts = arg_temp(op->args[i]);
3887                     if (ts->state & TS_DEAD) {
3888                         arg_life |= DEAD_ARG << i;
3889                     }
3890                     if (ts->state & TS_MEM) {
3891                         arg_life |= SYNC_ARG << i;
3892                     }
3893                     ts->state = TS_DEAD;
3894                     la_reset_pref(ts);
3895                 }
3896 
3897                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3898                 memset(op->output_pref, 0, sizeof(op->output_pref));
3899 
3900                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3901                                     TCG_CALL_NO_READ_GLOBALS))) {
3902                     la_global_kill(s, nb_globals);
3903                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3904                     la_global_sync(s, nb_globals);
3905                 }
3906 
3907                 /* Record arguments that die in this helper.  */
3908                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3909                     ts = arg_temp(op->args[i]);
3910                     if (ts->state & TS_DEAD) {
3911                         arg_life |= DEAD_ARG << i;
3912                     }
3913                 }
3914 
3915                 /* For all live registers, remove call-clobbered prefs.  */
3916                 la_cross_call(s, nb_temps);
3917 
3918                 /*
3919                  * Input arguments are live for preceding opcodes.
3920                  *
3921                  * For those arguments that die, and will be allocated in
3922                  * registers, clear the register set for that arg, to be
3923                  * filled in below.  For args that will be on the stack,
3924                  * reset to any available reg.  Process arguments in reverse
3925                  * order so that if a temp is used more than once, the stack
3926                  * reset to max happens before the register reset to 0.
3927                  */
3928                 for (i = nb_iargs - 1; i >= 0; i--) {
3929                     const TCGCallArgumentLoc *loc = &info->in[i];
3930                     ts = arg_temp(op->args[nb_oargs + i]);
3931 
3932                     if (ts->state & TS_DEAD) {
3933                         switch (loc->kind) {
3934                         case TCG_CALL_ARG_NORMAL:
3935                         case TCG_CALL_ARG_EXTEND_U:
3936                         case TCG_CALL_ARG_EXTEND_S:
3937                             if (arg_slot_reg_p(loc->arg_slot)) {
3938                                 *la_temp_pref(ts) = 0;
3939                                 break;
3940                             }
3941                             /* fall through */
3942                         default:
3943                             *la_temp_pref(ts) =
3944                                 tcg_target_available_regs[ts->type];
3945                             break;
3946                         }
3947                         ts->state &= ~TS_DEAD;
3948                     }
3949                 }
3950 
3951                 /*
3952                  * For each input argument, add its input register to prefs.
3953                  * If a temp is used once, this produces a single set bit;
3954                  * if a temp is used multiple times, this produces a set.
3955                  */
3956                 for (i = 0; i < nb_iargs; i++) {
3957                     const TCGCallArgumentLoc *loc = &info->in[i];
3958                     ts = arg_temp(op->args[nb_oargs + i]);
3959 
3960                     switch (loc->kind) {
3961                     case TCG_CALL_ARG_NORMAL:
3962                     case TCG_CALL_ARG_EXTEND_U:
3963                     case TCG_CALL_ARG_EXTEND_S:
3964                         if (arg_slot_reg_p(loc->arg_slot)) {
3965                             tcg_regset_set_reg(*la_temp_pref(ts),
3966                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3967                         }
3968                         break;
3969                     default:
3970                         break;
3971                     }
3972                 }
3973             }
3974             break;
3975         case INDEX_op_insn_start:
3976             break;
3977         case INDEX_op_discard:
3978             /* mark the temporary as dead */
3979             ts = arg_temp(op->args[0]);
3980             ts->state = TS_DEAD;
3981             la_reset_pref(ts);
3982             break;
3983 
3984         case INDEX_op_add2_i32:
3985         case INDEX_op_add2_i64:
3986             opc_new = INDEX_op_add;
3987             goto do_addsub2;
3988         case INDEX_op_sub2_i32:
3989         case INDEX_op_sub2_i64:
3990             opc_new = INDEX_op_sub;
3991         do_addsub2:
3992             nb_iargs = 4;
3993             nb_oargs = 2;
3994             /* Test if the high part of the operation is dead, but not
3995                the low part.  The result can be optimized to a simple
3996                add or sub.  This happens often for x86_64 guest when the
3997                cpu mode is set to 32 bit.  */
3998             if (arg_temp(op->args[1])->state == TS_DEAD) {
3999                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4000                     goto do_remove;
4001                 }
4002                 /* Replace the opcode and adjust the args in place,
4003                    leaving 3 unused args at the end.  */
4004                 op->opc = opc = opc_new;
4005                 op->args[1] = op->args[2];
4006                 op->args[2] = op->args[4];
4007                 /* Fall through and mark the single-word operation live.  */
4008                 nb_iargs = 2;
4009                 nb_oargs = 1;
4010             }
4011             goto do_not_remove;
4012 
4013         case INDEX_op_muls2_i32:
4014         case INDEX_op_muls2_i64:
4015             opc_new = INDEX_op_mul;
4016             opc_new2 = INDEX_op_mulsh;
4017             goto do_mul2;
4018         case INDEX_op_mulu2_i32:
4019         case INDEX_op_mulu2_i64:
4020             opc_new = INDEX_op_mul;
4021             opc_new2 = INDEX_op_muluh;
4022         do_mul2:
4023             nb_iargs = 2;
4024             nb_oargs = 2;
4025             if (arg_temp(op->args[1])->state == TS_DEAD) {
4026                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4027                     /* Both parts of the operation are dead.  */
4028                     goto do_remove;
4029                 }
4030                 /* The high part of the operation is dead; generate the low. */
4031                 op->opc = opc = opc_new;
4032                 op->args[1] = op->args[2];
4033                 op->args[2] = op->args[3];
4034             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4035                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4036                 /* The low part of the operation is dead; generate the high. */
4037                 op->opc = opc = opc_new2;
4038                 op->args[0] = op->args[1];
4039                 op->args[1] = op->args[2];
4040                 op->args[2] = op->args[3];
4041             } else {
4042                 goto do_not_remove;
4043             }
4044             /* Mark the single-word operation live.  */
4045             nb_oargs = 1;
4046             goto do_not_remove;
4047 
4048         default:
4049             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4050             nb_iargs = def->nb_iargs;
4051             nb_oargs = def->nb_oargs;
4052 
4053             /* Test if the operation can be removed because all
4054                its outputs are dead. We assume that nb_oargs == 0
4055                implies side effects */
4056             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4057                 for (i = 0; i < nb_oargs; i++) {
4058                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4059                         goto do_not_remove;
4060                     }
4061                 }
4062                 goto do_remove;
4063             }
4064             goto do_not_remove;
4065 
4066         do_remove:
4067             tcg_op_remove(s, op);
4068             break;
4069 
4070         do_not_remove:
4071             for (i = 0; i < nb_oargs; i++) {
4072                 ts = arg_temp(op->args[i]);
4073 
4074                 /* Remember the preference of the uses that followed.  */
4075                 if (i < ARRAY_SIZE(op->output_pref)) {
4076                     op->output_pref[i] = *la_temp_pref(ts);
4077                 }
4078 
4079                 /* Output args are dead.  */
4080                 if (ts->state & TS_DEAD) {
4081                     arg_life |= DEAD_ARG << i;
4082                 }
4083                 if (ts->state & TS_MEM) {
4084                     arg_life |= SYNC_ARG << i;
4085                 }
4086                 ts->state = TS_DEAD;
4087                 la_reset_pref(ts);
4088             }
4089 
4090             /* If end of basic block, update.  */
4091             if (def->flags & TCG_OPF_BB_EXIT) {
4092                 la_func_end(s, nb_globals, nb_temps);
4093             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4094                 la_bb_sync(s, nb_globals, nb_temps);
4095             } else if (def->flags & TCG_OPF_BB_END) {
4096                 la_bb_end(s, nb_globals, nb_temps);
4097             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4098                 la_global_sync(s, nb_globals);
4099                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4100                     la_cross_call(s, nb_temps);
4101                 }
4102             }
4103 
4104             /* Record arguments that die in this opcode.  */
4105             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4106                 ts = arg_temp(op->args[i]);
4107                 if (ts->state & TS_DEAD) {
4108                     arg_life |= DEAD_ARG << i;
4109                 }
4110             }
4111 
4112             /* Input arguments are live for preceding opcodes.  */
4113             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4114                 ts = arg_temp(op->args[i]);
4115                 if (ts->state & TS_DEAD) {
4116                     /* For operands that were dead, initially allow
4117                        all regs for the type.  */
4118                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4119                     ts->state &= ~TS_DEAD;
4120                 }
4121             }
4122 
4123             /* Incorporate constraints for this operand.  */
4124             switch (opc) {
4125             case INDEX_op_mov:
4126                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4127                    have proper constraints.  That said, special case
4128                    moves to propagate preferences backward.  */
4129                 if (IS_DEAD_ARG(1)) {
4130                     *la_temp_pref(arg_temp(op->args[0]))
4131                         = *la_temp_pref(arg_temp(op->args[1]));
4132                 }
4133                 break;
4134 
4135             default:
4136                 args_ct = opcode_args_ct(op);
4137                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4138                     const TCGArgConstraint *ct = &args_ct[i];
4139                     TCGRegSet set, *pset;
4140 
4141                     ts = arg_temp(op->args[i]);
4142                     pset = la_temp_pref(ts);
4143                     set = *pset;
4144 
4145                     set &= ct->regs;
4146                     if (ct->ialias) {
4147                         set &= output_pref(op, ct->alias_index);
4148                     }
4149                     /* If the combination is not possible, restart.  */
4150                     if (set == 0) {
4151                         set = ct->regs;
4152                     }
4153                     *pset = set;
4154                 }
4155                 break;
4156             }
4157             break;
4158         }
4159         op->life = arg_life;
4160     }
4161 }
4162 
4163 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4164 static bool __attribute__((noinline))
4165 liveness_pass_2(TCGContext *s)
4166 {
4167     int nb_globals = s->nb_globals;
4168     int nb_temps, i;
4169     bool changes = false;
4170     TCGOp *op, *op_next;
4171 
4172     /* Create a temporary for each indirect global.  */
4173     for (i = 0; i < nb_globals; ++i) {
4174         TCGTemp *its = &s->temps[i];
4175         if (its->indirect_reg) {
4176             TCGTemp *dts = tcg_temp_alloc(s);
4177             dts->type = its->type;
4178             dts->base_type = its->base_type;
4179             dts->temp_subindex = its->temp_subindex;
4180             dts->kind = TEMP_EBB;
4181             its->state_ptr = dts;
4182         } else {
4183             its->state_ptr = NULL;
4184         }
4185         /* All globals begin dead.  */
4186         its->state = TS_DEAD;
4187     }
4188     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4189         TCGTemp *its = &s->temps[i];
4190         its->state_ptr = NULL;
4191         its->state = TS_DEAD;
4192     }
4193 
4194     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4195         TCGOpcode opc = op->opc;
4196         const TCGOpDef *def = &tcg_op_defs[opc];
4197         TCGLifeData arg_life = op->life;
4198         int nb_iargs, nb_oargs, call_flags;
4199         TCGTemp *arg_ts, *dir_ts;
4200 
4201         if (opc == INDEX_op_call) {
4202             nb_oargs = TCGOP_CALLO(op);
4203             nb_iargs = TCGOP_CALLI(op);
4204             call_flags = tcg_call_flags(op);
4205         } else {
4206             nb_iargs = def->nb_iargs;
4207             nb_oargs = def->nb_oargs;
4208 
4209             /* Set flags similar to how calls require.  */
4210             if (def->flags & TCG_OPF_COND_BRANCH) {
4211                 /* Like reading globals: sync_globals */
4212                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4213             } else if (def->flags & TCG_OPF_BB_END) {
4214                 /* Like writing globals: save_globals */
4215                 call_flags = 0;
4216             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4217                 /* Like reading globals: sync_globals */
4218                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4219             } else {
4220                 /* No effect on globals.  */
4221                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4222                               TCG_CALL_NO_WRITE_GLOBALS);
4223             }
4224         }
4225 
4226         /* Make sure that input arguments are available.  */
4227         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4228             arg_ts = arg_temp(op->args[i]);
4229             dir_ts = arg_ts->state_ptr;
4230             if (dir_ts && arg_ts->state == TS_DEAD) {
4231                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4232                                   ? INDEX_op_ld_i32
4233                                   : INDEX_op_ld_i64);
4234                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4235                                                   arg_ts->type, 3);
4236 
4237                 lop->args[0] = temp_arg(dir_ts);
4238                 lop->args[1] = temp_arg(arg_ts->mem_base);
4239                 lop->args[2] = arg_ts->mem_offset;
4240 
4241                 /* Loaded, but synced with memory.  */
4242                 arg_ts->state = TS_MEM;
4243             }
4244         }
4245 
4246         /* Perform input replacement, and mark inputs that became dead.
4247            No action is required except keeping temp_state up to date
4248            so that we reload when needed.  */
4249         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4250             arg_ts = arg_temp(op->args[i]);
4251             dir_ts = arg_ts->state_ptr;
4252             if (dir_ts) {
4253                 op->args[i] = temp_arg(dir_ts);
4254                 changes = true;
4255                 if (IS_DEAD_ARG(i)) {
4256                     arg_ts->state = TS_DEAD;
4257                 }
4258             }
4259         }
4260 
4261         /* Liveness analysis should ensure that the following are
4262            all correct, for call sites and basic block end points.  */
4263         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4264             /* Nothing to do */
4265         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4266             for (i = 0; i < nb_globals; ++i) {
4267                 /* Liveness should see that globals are synced back,
4268                    that is, either TS_DEAD or TS_MEM.  */
4269                 arg_ts = &s->temps[i];
4270                 tcg_debug_assert(arg_ts->state_ptr == 0
4271                                  || arg_ts->state != 0);
4272             }
4273         } else {
4274             for (i = 0; i < nb_globals; ++i) {
4275                 /* Liveness should see that globals are saved back,
4276                    that is, TS_DEAD, waiting to be reloaded.  */
4277                 arg_ts = &s->temps[i];
4278                 tcg_debug_assert(arg_ts->state_ptr == 0
4279                                  || arg_ts->state == TS_DEAD);
4280             }
4281         }
4282 
4283         /* Outputs become available.  */
4284         if (opc == INDEX_op_mov) {
4285             arg_ts = arg_temp(op->args[0]);
4286             dir_ts = arg_ts->state_ptr;
4287             if (dir_ts) {
4288                 op->args[0] = temp_arg(dir_ts);
4289                 changes = true;
4290 
4291                 /* The output is now live and modified.  */
4292                 arg_ts->state = 0;
4293 
4294                 if (NEED_SYNC_ARG(0)) {
4295                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4296                                       ? INDEX_op_st_i32
4297                                       : INDEX_op_st_i64);
4298                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4299                                                      arg_ts->type, 3);
4300                     TCGTemp *out_ts = dir_ts;
4301 
4302                     if (IS_DEAD_ARG(0)) {
4303                         out_ts = arg_temp(op->args[1]);
4304                         arg_ts->state = TS_DEAD;
4305                         tcg_op_remove(s, op);
4306                     } else {
4307                         arg_ts->state = TS_MEM;
4308                     }
4309 
4310                     sop->args[0] = temp_arg(out_ts);
4311                     sop->args[1] = temp_arg(arg_ts->mem_base);
4312                     sop->args[2] = arg_ts->mem_offset;
4313                 } else {
4314                     tcg_debug_assert(!IS_DEAD_ARG(0));
4315                 }
4316             }
4317         } else {
4318             for (i = 0; i < nb_oargs; i++) {
4319                 arg_ts = arg_temp(op->args[i]);
4320                 dir_ts = arg_ts->state_ptr;
4321                 if (!dir_ts) {
4322                     continue;
4323                 }
4324                 op->args[i] = temp_arg(dir_ts);
4325                 changes = true;
4326 
4327                 /* The output is now live and modified.  */
4328                 arg_ts->state = 0;
4329 
4330                 /* Sync outputs upon their last write.  */
4331                 if (NEED_SYNC_ARG(i)) {
4332                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4333                                       ? INDEX_op_st_i32
4334                                       : INDEX_op_st_i64);
4335                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4336                                                      arg_ts->type, 3);
4337 
4338                     sop->args[0] = temp_arg(dir_ts);
4339                     sop->args[1] = temp_arg(arg_ts->mem_base);
4340                     sop->args[2] = arg_ts->mem_offset;
4341 
4342                     arg_ts->state = TS_MEM;
4343                 }
4344                 /* Drop outputs that are dead.  */
4345                 if (IS_DEAD_ARG(i)) {
4346                     arg_ts->state = TS_DEAD;
4347                 }
4348             }
4349         }
4350     }
4351 
4352     return changes;
4353 }
4354 
4355 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4356 {
4357     intptr_t off;
4358     int size, align;
4359 
4360     /* When allocating an object, look at the full type. */
4361     size = tcg_type_size(ts->base_type);
4362     switch (ts->base_type) {
4363     case TCG_TYPE_I32:
4364         align = 4;
4365         break;
4366     case TCG_TYPE_I64:
4367     case TCG_TYPE_V64:
4368         align = 8;
4369         break;
4370     case TCG_TYPE_I128:
4371     case TCG_TYPE_V128:
4372     case TCG_TYPE_V256:
4373         /*
4374          * Note that we do not require aligned storage for V256,
4375          * and that we provide alignment for I128 to match V128,
4376          * even if that's above what the host ABI requires.
4377          */
4378         align = 16;
4379         break;
4380     default:
4381         g_assert_not_reached();
4382     }
4383 
4384     /*
4385      * Assume the stack is sufficiently aligned.
4386      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4387      * and do not require 16 byte vector alignment.  This seems slightly
4388      * easier than fully parameterizing the above switch statement.
4389      */
4390     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4391     off = ROUND_UP(s->current_frame_offset, align);
4392 
4393     /* If we've exhausted the stack frame, restart with a smaller TB. */
4394     if (off + size > s->frame_end) {
4395         tcg_raise_tb_overflow(s);
4396     }
4397     s->current_frame_offset = off + size;
4398 #if defined(__sparc__)
4399     off += TCG_TARGET_STACK_BIAS;
4400 #endif
4401 
4402     /* If the object was subdivided, assign memory to all the parts. */
4403     if (ts->base_type != ts->type) {
4404         int part_size = tcg_type_size(ts->type);
4405         int part_count = size / part_size;
4406 
4407         /*
4408          * Each part is allocated sequentially in tcg_temp_new_internal.
4409          * Jump back to the first part by subtracting the current index.
4410          */
4411         ts -= ts->temp_subindex;
4412         for (int i = 0; i < part_count; ++i) {
4413             ts[i].mem_offset = off + i * part_size;
4414             ts[i].mem_base = s->frame_temp;
4415             ts[i].mem_allocated = 1;
4416         }
4417     } else {
4418         ts->mem_offset = off;
4419         ts->mem_base = s->frame_temp;
4420         ts->mem_allocated = 1;
4421     }
4422 }
4423 
4424 /* Assign @reg to @ts, and update reg_to_temp[]. */
4425 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4426 {
4427     if (ts->val_type == TEMP_VAL_REG) {
4428         TCGReg old = ts->reg;
4429         tcg_debug_assert(s->reg_to_temp[old] == ts);
4430         if (old == reg) {
4431             return;
4432         }
4433         s->reg_to_temp[old] = NULL;
4434     }
4435     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4436     s->reg_to_temp[reg] = ts;
4437     ts->val_type = TEMP_VAL_REG;
4438     ts->reg = reg;
4439 }
4440 
4441 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4442 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4443 {
4444     tcg_debug_assert(type != TEMP_VAL_REG);
4445     if (ts->val_type == TEMP_VAL_REG) {
4446         TCGReg reg = ts->reg;
4447         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4448         s->reg_to_temp[reg] = NULL;
4449     }
4450     ts->val_type = type;
4451 }
4452 
4453 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4454 
4455 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4456    mark it free; otherwise mark it dead.  */
4457 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4458 {
4459     TCGTempVal new_type;
4460 
4461     switch (ts->kind) {
4462     case TEMP_FIXED:
4463         return;
4464     case TEMP_GLOBAL:
4465     case TEMP_TB:
4466         new_type = TEMP_VAL_MEM;
4467         break;
4468     case TEMP_EBB:
4469         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4470         break;
4471     case TEMP_CONST:
4472         new_type = TEMP_VAL_CONST;
4473         break;
4474     default:
4475         g_assert_not_reached();
4476     }
4477     set_temp_val_nonreg(s, ts, new_type);
4478 }
4479 
4480 /* Mark a temporary as dead.  */
4481 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4482 {
4483     temp_free_or_dead(s, ts, 1);
4484 }
4485 
4486 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4487    registers needs to be allocated to store a constant.  If 'free_or_dead'
4488    is non-zero, subsequently release the temporary; if it is positive, the
4489    temp is dead; if it is negative, the temp is free.  */
4490 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4491                       TCGRegSet preferred_regs, int free_or_dead)
4492 {
4493     if (!temp_readonly(ts) && !ts->mem_coherent) {
4494         if (!ts->mem_allocated) {
4495             temp_allocate_frame(s, ts);
4496         }
4497         switch (ts->val_type) {
4498         case TEMP_VAL_CONST:
4499             /* If we're going to free the temp immediately, then we won't
4500                require it later in a register, so attempt to store the
4501                constant to memory directly.  */
4502             if (free_or_dead
4503                 && tcg_out_sti(s, ts->type, ts->val,
4504                                ts->mem_base->reg, ts->mem_offset)) {
4505                 break;
4506             }
4507             temp_load(s, ts, tcg_target_available_regs[ts->type],
4508                       allocated_regs, preferred_regs);
4509             /* fallthrough */
4510 
4511         case TEMP_VAL_REG:
4512             tcg_out_st(s, ts->type, ts->reg,
4513                        ts->mem_base->reg, ts->mem_offset);
4514             break;
4515 
4516         case TEMP_VAL_MEM:
4517             break;
4518 
4519         case TEMP_VAL_DEAD:
4520         default:
4521             g_assert_not_reached();
4522         }
4523         ts->mem_coherent = 1;
4524     }
4525     if (free_or_dead) {
4526         temp_free_or_dead(s, ts, free_or_dead);
4527     }
4528 }
4529 
4530 /* free register 'reg' by spilling the corresponding temporary if necessary */
4531 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4532 {
4533     TCGTemp *ts = s->reg_to_temp[reg];
4534     if (ts != NULL) {
4535         temp_sync(s, ts, allocated_regs, 0, -1);
4536     }
4537 }
4538 
4539 /**
4540  * tcg_reg_alloc:
4541  * @required_regs: Set of registers in which we must allocate.
4542  * @allocated_regs: Set of registers which must be avoided.
4543  * @preferred_regs: Set of registers we should prefer.
4544  * @rev: True if we search the registers in "indirect" order.
4545  *
4546  * The allocated register must be in @required_regs & ~@allocated_regs,
4547  * but if we can put it in @preferred_regs we may save a move later.
4548  */
4549 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4550                             TCGRegSet allocated_regs,
4551                             TCGRegSet preferred_regs, bool rev)
4552 {
4553     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4554     TCGRegSet reg_ct[2];
4555     const int *order;
4556 
4557     reg_ct[1] = required_regs & ~allocated_regs;
4558     tcg_debug_assert(reg_ct[1] != 0);
4559     reg_ct[0] = reg_ct[1] & preferred_regs;
4560 
4561     /* Skip the preferred_regs option if it cannot be satisfied,
4562        or if the preference made no difference.  */
4563     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4564 
4565     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4566 
4567     /* Try free registers, preferences first.  */
4568     for (j = f; j < 2; j++) {
4569         TCGRegSet set = reg_ct[j];
4570 
4571         if (tcg_regset_single(set)) {
4572             /* One register in the set.  */
4573             TCGReg reg = tcg_regset_first(set);
4574             if (s->reg_to_temp[reg] == NULL) {
4575                 return reg;
4576             }
4577         } else {
4578             for (i = 0; i < n; i++) {
4579                 TCGReg reg = order[i];
4580                 if (s->reg_to_temp[reg] == NULL &&
4581                     tcg_regset_test_reg(set, reg)) {
4582                     return reg;
4583                 }
4584             }
4585         }
4586     }
4587 
4588     /* We must spill something.  */
4589     for (j = f; j < 2; j++) {
4590         TCGRegSet set = reg_ct[j];
4591 
4592         if (tcg_regset_single(set)) {
4593             /* One register in the set.  */
4594             TCGReg reg = tcg_regset_first(set);
4595             tcg_reg_free(s, reg, allocated_regs);
4596             return reg;
4597         } else {
4598             for (i = 0; i < n; i++) {
4599                 TCGReg reg = order[i];
4600                 if (tcg_regset_test_reg(set, reg)) {
4601                     tcg_reg_free(s, reg, allocated_regs);
4602                     return reg;
4603                 }
4604             }
4605         }
4606     }
4607 
4608     g_assert_not_reached();
4609 }
4610 
4611 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4612                                  TCGRegSet allocated_regs,
4613                                  TCGRegSet preferred_regs, bool rev)
4614 {
4615     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4616     TCGRegSet reg_ct[2];
4617     const int *order;
4618 
4619     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4620     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4621     tcg_debug_assert(reg_ct[1] != 0);
4622     reg_ct[0] = reg_ct[1] & preferred_regs;
4623 
4624     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4625 
4626     /*
4627      * Skip the preferred_regs option if it cannot be satisfied,
4628      * or if the preference made no difference.
4629      */
4630     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4631 
4632     /*
4633      * Minimize the number of flushes by looking for 2 free registers first,
4634      * then a single flush, then two flushes.
4635      */
4636     for (fmin = 2; fmin >= 0; fmin--) {
4637         for (j = k; j < 2; j++) {
4638             TCGRegSet set = reg_ct[j];
4639 
4640             for (i = 0; i < n; i++) {
4641                 TCGReg reg = order[i];
4642 
4643                 if (tcg_regset_test_reg(set, reg)) {
4644                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4645                     if (f >= fmin) {
4646                         tcg_reg_free(s, reg, allocated_regs);
4647                         tcg_reg_free(s, reg + 1, allocated_regs);
4648                         return reg;
4649                     }
4650                 }
4651             }
4652         }
4653     }
4654     g_assert_not_reached();
4655 }
4656 
4657 /* Make sure the temporary is in a register.  If needed, allocate the register
4658    from DESIRED while avoiding ALLOCATED.  */
4659 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4660                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4661 {
4662     TCGReg reg;
4663 
4664     switch (ts->val_type) {
4665     case TEMP_VAL_REG:
4666         return;
4667     case TEMP_VAL_CONST:
4668         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4669                             preferred_regs, ts->indirect_base);
4670         if (ts->type <= TCG_TYPE_I64) {
4671             tcg_out_movi(s, ts->type, reg, ts->val);
4672         } else {
4673             uint64_t val = ts->val;
4674             MemOp vece = MO_64;
4675 
4676             /*
4677              * Find the minimal vector element that matches the constant.
4678              * The targets will, in general, have to do this search anyway,
4679              * do this generically.
4680              */
4681             if (val == dup_const(MO_8, val)) {
4682                 vece = MO_8;
4683             } else if (val == dup_const(MO_16, val)) {
4684                 vece = MO_16;
4685             } else if (val == dup_const(MO_32, val)) {
4686                 vece = MO_32;
4687             }
4688 
4689             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4690         }
4691         ts->mem_coherent = 0;
4692         break;
4693     case TEMP_VAL_MEM:
4694         if (!ts->mem_allocated) {
4695             temp_allocate_frame(s, ts);
4696         }
4697         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4698                             preferred_regs, ts->indirect_base);
4699         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4700         ts->mem_coherent = 1;
4701         break;
4702     case TEMP_VAL_DEAD:
4703     default:
4704         g_assert_not_reached();
4705     }
4706     set_temp_val_reg(s, ts, reg);
4707 }
4708 
4709 /* Save a temporary to memory. 'allocated_regs' is used in case a
4710    temporary registers needs to be allocated to store a constant.  */
4711 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4712 {
4713     /* The liveness analysis already ensures that globals are back
4714        in memory. Keep an tcg_debug_assert for safety. */
4715     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4716 }
4717 
4718 /* save globals to their canonical location and assume they can be
4719    modified be the following code. 'allocated_regs' is used in case a
4720    temporary registers needs to be allocated to store a constant. */
4721 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4722 {
4723     int i, n;
4724 
4725     for (i = 0, n = s->nb_globals; i < n; i++) {
4726         temp_save(s, &s->temps[i], allocated_regs);
4727     }
4728 }
4729 
4730 /* sync globals to their canonical location and assume they can be
4731    read by the following code. 'allocated_regs' is used in case a
4732    temporary registers needs to be allocated to store a constant. */
4733 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4734 {
4735     int i, n;
4736 
4737     for (i = 0, n = s->nb_globals; i < n; i++) {
4738         TCGTemp *ts = &s->temps[i];
4739         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4740                          || ts->kind == TEMP_FIXED
4741                          || ts->mem_coherent);
4742     }
4743 }
4744 
4745 /* at the end of a basic block, we assume all temporaries are dead and
4746    all globals are stored at their canonical location. */
4747 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4748 {
4749     int i;
4750 
4751     for (i = s->nb_globals; i < s->nb_temps; i++) {
4752         TCGTemp *ts = &s->temps[i];
4753 
4754         switch (ts->kind) {
4755         case TEMP_TB:
4756             temp_save(s, ts, allocated_regs);
4757             break;
4758         case TEMP_EBB:
4759             /* The liveness analysis already ensures that temps are dead.
4760                Keep an tcg_debug_assert for safety. */
4761             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4762             break;
4763         case TEMP_CONST:
4764             /* Similarly, we should have freed any allocated register. */
4765             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4766             break;
4767         default:
4768             g_assert_not_reached();
4769         }
4770     }
4771 
4772     save_globals(s, allocated_regs);
4773 }
4774 
4775 /*
4776  * At a conditional branch, we assume all temporaries are dead unless
4777  * explicitly live-across-conditional-branch; all globals and local
4778  * temps are synced to their location.
4779  */
4780 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4781 {
4782     sync_globals(s, allocated_regs);
4783 
4784     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4785         TCGTemp *ts = &s->temps[i];
4786         /*
4787          * The liveness analysis already ensures that temps are dead.
4788          * Keep tcg_debug_asserts for safety.
4789          */
4790         switch (ts->kind) {
4791         case TEMP_TB:
4792             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4793             break;
4794         case TEMP_EBB:
4795         case TEMP_CONST:
4796             break;
4797         default:
4798             g_assert_not_reached();
4799         }
4800     }
4801 }
4802 
4803 /*
4804  * Specialized code generation for INDEX_op_mov_* with a constant.
4805  */
4806 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4807                                   tcg_target_ulong val, TCGLifeData arg_life,
4808                                   TCGRegSet preferred_regs)
4809 {
4810     /* ENV should not be modified.  */
4811     tcg_debug_assert(!temp_readonly(ots));
4812 
4813     /* The movi is not explicitly generated here.  */
4814     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4815     ots->val = val;
4816     ots->mem_coherent = 0;
4817     if (NEED_SYNC_ARG(0)) {
4818         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4819     } else if (IS_DEAD_ARG(0)) {
4820         temp_dead(s, ots);
4821     }
4822 }
4823 
4824 /*
4825  * Specialized code generation for INDEX_op_mov_*.
4826  */
4827 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4828 {
4829     const TCGLifeData arg_life = op->life;
4830     TCGRegSet allocated_regs, preferred_regs;
4831     TCGTemp *ts, *ots;
4832     TCGType otype, itype;
4833     TCGReg oreg, ireg;
4834 
4835     allocated_regs = s->reserved_regs;
4836     preferred_regs = output_pref(op, 0);
4837     ots = arg_temp(op->args[0]);
4838     ts = arg_temp(op->args[1]);
4839 
4840     /* ENV should not be modified.  */
4841     tcg_debug_assert(!temp_readonly(ots));
4842 
4843     /* Note that otype != itype for no-op truncation.  */
4844     otype = ots->type;
4845     itype = ts->type;
4846 
4847     if (ts->val_type == TEMP_VAL_CONST) {
4848         /* propagate constant or generate sti */
4849         tcg_target_ulong val = ts->val;
4850         if (IS_DEAD_ARG(1)) {
4851             temp_dead(s, ts);
4852         }
4853         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4854         return;
4855     }
4856 
4857     /* If the source value is in memory we're going to be forced
4858        to have it in a register in order to perform the copy.  Copy
4859        the SOURCE value into its own register first, that way we
4860        don't have to reload SOURCE the next time it is used. */
4861     if (ts->val_type == TEMP_VAL_MEM) {
4862         temp_load(s, ts, tcg_target_available_regs[itype],
4863                   allocated_regs, preferred_regs);
4864     }
4865     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4866     ireg = ts->reg;
4867 
4868     if (IS_DEAD_ARG(0)) {
4869         /* mov to a non-saved dead register makes no sense (even with
4870            liveness analysis disabled). */
4871         tcg_debug_assert(NEED_SYNC_ARG(0));
4872         if (!ots->mem_allocated) {
4873             temp_allocate_frame(s, ots);
4874         }
4875         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4876         if (IS_DEAD_ARG(1)) {
4877             temp_dead(s, ts);
4878         }
4879         temp_dead(s, ots);
4880         return;
4881     }
4882 
4883     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4884         /*
4885          * The mov can be suppressed.  Kill input first, so that it
4886          * is unlinked from reg_to_temp, then set the output to the
4887          * reg that we saved from the input.
4888          */
4889         temp_dead(s, ts);
4890         oreg = ireg;
4891     } else {
4892         if (ots->val_type == TEMP_VAL_REG) {
4893             oreg = ots->reg;
4894         } else {
4895             /* Make sure to not spill the input register during allocation. */
4896             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4897                                  allocated_regs | ((TCGRegSet)1 << ireg),
4898                                  preferred_regs, ots->indirect_base);
4899         }
4900         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4901             /*
4902              * Cross register class move not supported.
4903              * Store the source register into the destination slot
4904              * and leave the destination temp as TEMP_VAL_MEM.
4905              */
4906             assert(!temp_readonly(ots));
4907             if (!ts->mem_allocated) {
4908                 temp_allocate_frame(s, ots);
4909             }
4910             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4911             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4912             ots->mem_coherent = 1;
4913             return;
4914         }
4915     }
4916     set_temp_val_reg(s, ots, oreg);
4917     ots->mem_coherent = 0;
4918 
4919     if (NEED_SYNC_ARG(0)) {
4920         temp_sync(s, ots, allocated_regs, 0, 0);
4921     }
4922 }
4923 
4924 /*
4925  * Specialized code generation for INDEX_op_dup_vec.
4926  */
4927 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4928 {
4929     const TCGLifeData arg_life = op->life;
4930     TCGRegSet dup_out_regs, dup_in_regs;
4931     const TCGArgConstraint *dup_args_ct;
4932     TCGTemp *its, *ots;
4933     TCGType itype, vtype;
4934     unsigned vece;
4935     int lowpart_ofs;
4936     bool ok;
4937 
4938     ots = arg_temp(op->args[0]);
4939     its = arg_temp(op->args[1]);
4940 
4941     /* ENV should not be modified.  */
4942     tcg_debug_assert(!temp_readonly(ots));
4943 
4944     itype = its->type;
4945     vece = TCGOP_VECE(op);
4946     vtype = TCGOP_TYPE(op);
4947 
4948     if (its->val_type == TEMP_VAL_CONST) {
4949         /* Propagate constant via movi -> dupi.  */
4950         tcg_target_ulong val = its->val;
4951         if (IS_DEAD_ARG(1)) {
4952             temp_dead(s, its);
4953         }
4954         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4955         return;
4956     }
4957 
4958     dup_args_ct = opcode_args_ct(op);
4959     dup_out_regs = dup_args_ct[0].regs;
4960     dup_in_regs = dup_args_ct[1].regs;
4961 
4962     /* Allocate the output register now.  */
4963     if (ots->val_type != TEMP_VAL_REG) {
4964         TCGRegSet allocated_regs = s->reserved_regs;
4965         TCGReg oreg;
4966 
4967         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4968             /* Make sure to not spill the input register. */
4969             tcg_regset_set_reg(allocated_regs, its->reg);
4970         }
4971         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4972                              output_pref(op, 0), ots->indirect_base);
4973         set_temp_val_reg(s, ots, oreg);
4974     }
4975 
4976     switch (its->val_type) {
4977     case TEMP_VAL_REG:
4978         /*
4979          * The dup constriaints must be broad, covering all possible VECE.
4980          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4981          * to fail, indicating that extra moves are required for that case.
4982          */
4983         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4984             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4985                 goto done;
4986             }
4987             /* Try again from memory or a vector input register.  */
4988         }
4989         if (!its->mem_coherent) {
4990             /*
4991              * The input register is not synced, and so an extra store
4992              * would be required to use memory.  Attempt an integer-vector
4993              * register move first.  We do not have a TCGRegSet for this.
4994              */
4995             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4996                 break;
4997             }
4998             /* Sync the temp back to its slot and load from there.  */
4999             temp_sync(s, its, s->reserved_regs, 0, 0);
5000         }
5001         /* fall through */
5002 
5003     case TEMP_VAL_MEM:
5004         lowpart_ofs = 0;
5005         if (HOST_BIG_ENDIAN) {
5006             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5007         }
5008         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5009                              its->mem_offset + lowpart_ofs)) {
5010             goto done;
5011         }
5012         /* Load the input into the destination vector register. */
5013         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5014         break;
5015 
5016     default:
5017         g_assert_not_reached();
5018     }
5019 
5020     /* We now have a vector input register, so dup must succeed. */
5021     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5022     tcg_debug_assert(ok);
5023 
5024  done:
5025     ots->mem_coherent = 0;
5026     if (IS_DEAD_ARG(1)) {
5027         temp_dead(s, its);
5028     }
5029     if (NEED_SYNC_ARG(0)) {
5030         temp_sync(s, ots, s->reserved_regs, 0, 0);
5031     }
5032     if (IS_DEAD_ARG(0)) {
5033         temp_dead(s, ots);
5034     }
5035 }
5036 
5037 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5038 {
5039     const TCGLifeData arg_life = op->life;
5040     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5041     TCGRegSet i_allocated_regs;
5042     TCGRegSet o_allocated_regs;
5043     int i, k, nb_iargs, nb_oargs;
5044     TCGReg reg;
5045     TCGArg arg;
5046     const TCGArgConstraint *args_ct;
5047     const TCGArgConstraint *arg_ct;
5048     TCGTemp *ts;
5049     TCGArg new_args[TCG_MAX_OP_ARGS];
5050     int const_args[TCG_MAX_OP_ARGS];
5051     TCGCond op_cond;
5052 
5053     nb_oargs = def->nb_oargs;
5054     nb_iargs = def->nb_iargs;
5055 
5056     /* copy constants */
5057     memcpy(new_args + nb_oargs + nb_iargs,
5058            op->args + nb_oargs + nb_iargs,
5059            sizeof(TCGArg) * def->nb_cargs);
5060 
5061     i_allocated_regs = s->reserved_regs;
5062     o_allocated_regs = s->reserved_regs;
5063 
5064     switch (op->opc) {
5065     case INDEX_op_brcond_i32:
5066     case INDEX_op_brcond_i64:
5067         op_cond = op->args[2];
5068         break;
5069     case INDEX_op_setcond_i32:
5070     case INDEX_op_setcond_i64:
5071     case INDEX_op_negsetcond_i32:
5072     case INDEX_op_negsetcond_i64:
5073     case INDEX_op_cmp_vec:
5074         op_cond = op->args[3];
5075         break;
5076     case INDEX_op_brcond2_i32:
5077         op_cond = op->args[4];
5078         break;
5079     case INDEX_op_movcond_i32:
5080     case INDEX_op_movcond_i64:
5081     case INDEX_op_setcond2_i32:
5082     case INDEX_op_cmpsel_vec:
5083         op_cond = op->args[5];
5084         break;
5085     default:
5086         /* No condition within opcode. */
5087         op_cond = TCG_COND_ALWAYS;
5088         break;
5089     }
5090 
5091     args_ct = opcode_args_ct(op);
5092 
5093     /* satisfy input constraints */
5094     for (k = 0; k < nb_iargs; k++) {
5095         TCGRegSet i_preferred_regs, i_required_regs;
5096         bool allocate_new_reg, copyto_new_reg;
5097         TCGTemp *ts2;
5098         int i1, i2;
5099 
5100         i = args_ct[nb_oargs + k].sort_index;
5101         arg = op->args[i];
5102         arg_ct = &args_ct[i];
5103         ts = arg_temp(arg);
5104 
5105         if (ts->val_type == TEMP_VAL_CONST) {
5106 #ifdef TCG_REG_ZERO
5107             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5108                 /* Hardware zero register: indicate register via non-const. */
5109                 const_args[i] = 0;
5110                 new_args[i] = TCG_REG_ZERO;
5111                 continue;
5112             }
5113 #endif
5114 
5115             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5116                                        op_cond, TCGOP_VECE(op))) {
5117                 /* constant is OK for instruction */
5118                 const_args[i] = 1;
5119                 new_args[i] = ts->val;
5120                 continue;
5121             }
5122         }
5123 
5124         reg = ts->reg;
5125         i_preferred_regs = 0;
5126         i_required_regs = arg_ct->regs;
5127         allocate_new_reg = false;
5128         copyto_new_reg = false;
5129 
5130         switch (arg_ct->pair) {
5131         case 0: /* not paired */
5132             if (arg_ct->ialias) {
5133                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5134 
5135                 /*
5136                  * If the input is readonly, then it cannot also be an
5137                  * output and aliased to itself.  If the input is not
5138                  * dead after the instruction, we must allocate a new
5139                  * register and move it.
5140                  */
5141                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5142                     || args_ct[arg_ct->alias_index].newreg) {
5143                     allocate_new_reg = true;
5144                 } else if (ts->val_type == TEMP_VAL_REG) {
5145                     /*
5146                      * Check if the current register has already been
5147                      * allocated for another input.
5148                      */
5149                     allocate_new_reg =
5150                         tcg_regset_test_reg(i_allocated_regs, reg);
5151                 }
5152             }
5153             if (!allocate_new_reg) {
5154                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5155                           i_preferred_regs);
5156                 reg = ts->reg;
5157                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5158             }
5159             if (allocate_new_reg) {
5160                 /*
5161                  * Allocate a new register matching the constraint
5162                  * and move the temporary register into it.
5163                  */
5164                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5165                           i_allocated_regs, 0);
5166                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5167                                     i_preferred_regs, ts->indirect_base);
5168                 copyto_new_reg = true;
5169             }
5170             break;
5171 
5172         case 1:
5173             /* First of an input pair; if i1 == i2, the second is an output. */
5174             i1 = i;
5175             i2 = arg_ct->pair_index;
5176             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5177 
5178             /*
5179              * It is easier to default to allocating a new pair
5180              * and to identify a few cases where it's not required.
5181              */
5182             if (arg_ct->ialias) {
5183                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5184                 if (IS_DEAD_ARG(i1) &&
5185                     IS_DEAD_ARG(i2) &&
5186                     !temp_readonly(ts) &&
5187                     ts->val_type == TEMP_VAL_REG &&
5188                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5189                     tcg_regset_test_reg(i_required_regs, reg) &&
5190                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5191                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5192                     (ts2
5193                      ? ts2->val_type == TEMP_VAL_REG &&
5194                        ts2->reg == reg + 1 &&
5195                        !temp_readonly(ts2)
5196                      : s->reg_to_temp[reg + 1] == NULL)) {
5197                     break;
5198                 }
5199             } else {
5200                 /* Without aliasing, the pair must also be an input. */
5201                 tcg_debug_assert(ts2);
5202                 if (ts->val_type == TEMP_VAL_REG &&
5203                     ts2->val_type == TEMP_VAL_REG &&
5204                     ts2->reg == reg + 1 &&
5205                     tcg_regset_test_reg(i_required_regs, reg)) {
5206                     break;
5207                 }
5208             }
5209             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5210                                      0, ts->indirect_base);
5211             goto do_pair;
5212 
5213         case 2: /* pair second */
5214             reg = new_args[arg_ct->pair_index] + 1;
5215             goto do_pair;
5216 
5217         case 3: /* ialias with second output, no first input */
5218             tcg_debug_assert(arg_ct->ialias);
5219             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5220 
5221             if (IS_DEAD_ARG(i) &&
5222                 !temp_readonly(ts) &&
5223                 ts->val_type == TEMP_VAL_REG &&
5224                 reg > 0 &&
5225                 s->reg_to_temp[reg - 1] == NULL &&
5226                 tcg_regset_test_reg(i_required_regs, reg) &&
5227                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5228                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5229                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5230                 break;
5231             }
5232             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5233                                      i_allocated_regs, 0,
5234                                      ts->indirect_base);
5235             tcg_regset_set_reg(i_allocated_regs, reg);
5236             reg += 1;
5237             goto do_pair;
5238 
5239         do_pair:
5240             /*
5241              * If an aliased input is not dead after the instruction,
5242              * we must allocate a new register and move it.
5243              */
5244             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5245                 TCGRegSet t_allocated_regs = i_allocated_regs;
5246 
5247                 /*
5248                  * Because of the alias, and the continued life, make sure
5249                  * that the temp is somewhere *other* than the reg pair,
5250                  * and we get a copy in reg.
5251                  */
5252                 tcg_regset_set_reg(t_allocated_regs, reg);
5253                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5254                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5255                     /* If ts was already in reg, copy it somewhere else. */
5256                     TCGReg nr;
5257                     bool ok;
5258 
5259                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5260                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5261                                        t_allocated_regs, 0, ts->indirect_base);
5262                     ok = tcg_out_mov(s, ts->type, nr, reg);
5263                     tcg_debug_assert(ok);
5264 
5265                     set_temp_val_reg(s, ts, nr);
5266                 } else {
5267                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5268                               t_allocated_regs, 0);
5269                     copyto_new_reg = true;
5270                 }
5271             } else {
5272                 /* Preferably allocate to reg, otherwise copy. */
5273                 i_required_regs = (TCGRegSet)1 << reg;
5274                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5275                           i_preferred_regs);
5276                 copyto_new_reg = ts->reg != reg;
5277             }
5278             break;
5279 
5280         default:
5281             g_assert_not_reached();
5282         }
5283 
5284         if (copyto_new_reg) {
5285             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5286                 /*
5287                  * Cross register class move not supported.  Sync the
5288                  * temp back to its slot and load from there.
5289                  */
5290                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5291                 tcg_out_ld(s, ts->type, reg,
5292                            ts->mem_base->reg, ts->mem_offset);
5293             }
5294         }
5295         new_args[i] = reg;
5296         const_args[i] = 0;
5297         tcg_regset_set_reg(i_allocated_regs, reg);
5298     }
5299 
5300     /* mark dead temporaries and free the associated registers */
5301     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5302         if (IS_DEAD_ARG(i)) {
5303             temp_dead(s, arg_temp(op->args[i]));
5304         }
5305     }
5306 
5307     if (def->flags & TCG_OPF_COND_BRANCH) {
5308         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5309     } else if (def->flags & TCG_OPF_BB_END) {
5310         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5311     } else {
5312         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5313             /* XXX: permit generic clobber register list ? */
5314             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5315                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5316                     tcg_reg_free(s, i, i_allocated_regs);
5317                 }
5318             }
5319         }
5320         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5321             /* sync globals if the op has side effects and might trigger
5322                an exception. */
5323             sync_globals(s, i_allocated_regs);
5324         }
5325 
5326         /* satisfy the output constraints */
5327         for (k = 0; k < nb_oargs; k++) {
5328             i = args_ct[k].sort_index;
5329             arg = op->args[i];
5330             arg_ct = &args_ct[i];
5331             ts = arg_temp(arg);
5332 
5333             /* ENV should not be modified.  */
5334             tcg_debug_assert(!temp_readonly(ts));
5335 
5336             switch (arg_ct->pair) {
5337             case 0: /* not paired */
5338                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5339                     reg = new_args[arg_ct->alias_index];
5340                 } else if (arg_ct->newreg) {
5341                     reg = tcg_reg_alloc(s, arg_ct->regs,
5342                                         i_allocated_regs | o_allocated_regs,
5343                                         output_pref(op, k), ts->indirect_base);
5344                 } else {
5345                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5346                                         output_pref(op, k), ts->indirect_base);
5347                 }
5348                 break;
5349 
5350             case 1: /* first of pair */
5351                 if (arg_ct->oalias) {
5352                     reg = new_args[arg_ct->alias_index];
5353                 } else if (arg_ct->newreg) {
5354                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5355                                              i_allocated_regs | o_allocated_regs,
5356                                              output_pref(op, k),
5357                                              ts->indirect_base);
5358                 } else {
5359                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5360                                              output_pref(op, k),
5361                                              ts->indirect_base);
5362                 }
5363                 break;
5364 
5365             case 2: /* second of pair */
5366                 if (arg_ct->oalias) {
5367                     reg = new_args[arg_ct->alias_index];
5368                 } else {
5369                     reg = new_args[arg_ct->pair_index] + 1;
5370                 }
5371                 break;
5372 
5373             case 3: /* first of pair, aliasing with a second input */
5374                 tcg_debug_assert(!arg_ct->newreg);
5375                 reg = new_args[arg_ct->pair_index] - 1;
5376                 break;
5377 
5378             default:
5379                 g_assert_not_reached();
5380             }
5381             tcg_regset_set_reg(o_allocated_regs, reg);
5382             set_temp_val_reg(s, ts, reg);
5383             ts->mem_coherent = 0;
5384             new_args[i] = reg;
5385         }
5386     }
5387 
5388     /* emit instruction */
5389     TCGType type = TCGOP_TYPE(op);
5390     switch (op->opc) {
5391     case INDEX_op_ext_i32_i64:
5392         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5393         break;
5394     case INDEX_op_extu_i32_i64:
5395         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5396         break;
5397     case INDEX_op_extrl_i64_i32:
5398         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5399         break;
5400 
5401     case INDEX_op_add:
5402     case INDEX_op_and:
5403     case INDEX_op_andc:
5404     case INDEX_op_clz:
5405     case INDEX_op_divs:
5406     case INDEX_op_divu:
5407     case INDEX_op_eqv:
5408     case INDEX_op_mul:
5409     case INDEX_op_mulsh:
5410     case INDEX_op_muluh:
5411     case INDEX_op_nand:
5412     case INDEX_op_nor:
5413     case INDEX_op_or:
5414     case INDEX_op_orc:
5415     case INDEX_op_rems:
5416     case INDEX_op_remu:
5417     case INDEX_op_rotl:
5418     case INDEX_op_rotr:
5419     case INDEX_op_sar:
5420     case INDEX_op_shl:
5421     case INDEX_op_shr:
5422     case INDEX_op_xor:
5423         {
5424             const TCGOutOpBinary *out =
5425                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5426 
5427             /* Constants should never appear in the first source operand. */
5428             tcg_debug_assert(!const_args[1]);
5429             if (const_args[2]) {
5430                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5431             } else {
5432                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5433             }
5434         }
5435         break;
5436 
5437     case INDEX_op_sub:
5438         {
5439             const TCGOutOpSubtract *out = &outop_sub;
5440 
5441             /*
5442              * Constants should never appear in the second source operand.
5443              * These are folded to add with negative constant.
5444              */
5445             tcg_debug_assert(!const_args[2]);
5446             if (const_args[1]) {
5447                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5448             } else {
5449                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5450             }
5451         }
5452         break;
5453 
5454     case INDEX_op_neg:
5455     case INDEX_op_not:
5456         {
5457             const TCGOutOpUnary *out =
5458                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5459 
5460             /* Constants should have been folded. */
5461             tcg_debug_assert(!const_args[1]);
5462             out->out_rr(s, type, new_args[0], new_args[1]);
5463         }
5464         break;
5465 
5466     case INDEX_op_divs2:
5467     case INDEX_op_divu2:
5468         {
5469             const TCGOutOpDivRem *out =
5470                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5471 
5472             /* Only used by x86 and s390x, which use matching constraints. */
5473             tcg_debug_assert(new_args[0] == new_args[2]);
5474             tcg_debug_assert(new_args[1] == new_args[3]);
5475             tcg_debug_assert(!const_args[4]);
5476             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5477         }
5478         break;
5479 
5480     default:
5481         if (def->flags & TCG_OPF_VECTOR) {
5482             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5483                            TCGOP_VECE(op), new_args, const_args);
5484         } else {
5485             tcg_out_op(s, op->opc, type, new_args, const_args);
5486         }
5487         break;
5488     }
5489 
5490     /* move the outputs in the correct register if needed */
5491     for(i = 0; i < nb_oargs; i++) {
5492         ts = arg_temp(op->args[i]);
5493 
5494         /* ENV should not be modified.  */
5495         tcg_debug_assert(!temp_readonly(ts));
5496 
5497         if (NEED_SYNC_ARG(i)) {
5498             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5499         } else if (IS_DEAD_ARG(i)) {
5500             temp_dead(s, ts);
5501         }
5502     }
5503 }
5504 
5505 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5506 {
5507     const TCGLifeData arg_life = op->life;
5508     TCGTemp *ots, *itsl, *itsh;
5509     TCGType vtype = TCGOP_TYPE(op);
5510 
5511     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5512     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5513     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5514 
5515     ots = arg_temp(op->args[0]);
5516     itsl = arg_temp(op->args[1]);
5517     itsh = arg_temp(op->args[2]);
5518 
5519     /* ENV should not be modified.  */
5520     tcg_debug_assert(!temp_readonly(ots));
5521 
5522     /* Allocate the output register now.  */
5523     if (ots->val_type != TEMP_VAL_REG) {
5524         TCGRegSet allocated_regs = s->reserved_regs;
5525         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5526         TCGReg oreg;
5527 
5528         /* Make sure to not spill the input registers. */
5529         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5530             tcg_regset_set_reg(allocated_regs, itsl->reg);
5531         }
5532         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5533             tcg_regset_set_reg(allocated_regs, itsh->reg);
5534         }
5535 
5536         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5537                              output_pref(op, 0), ots->indirect_base);
5538         set_temp_val_reg(s, ots, oreg);
5539     }
5540 
5541     /* Promote dup2 of immediates to dupi_vec. */
5542     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5543         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5544         MemOp vece = MO_64;
5545 
5546         if (val == dup_const(MO_8, val)) {
5547             vece = MO_8;
5548         } else if (val == dup_const(MO_16, val)) {
5549             vece = MO_16;
5550         } else if (val == dup_const(MO_32, val)) {
5551             vece = MO_32;
5552         }
5553 
5554         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5555         goto done;
5556     }
5557 
5558     /* If the two inputs form one 64-bit value, try dupm_vec. */
5559     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5560         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5561         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5562         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5563 
5564         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5565         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5566 
5567         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5568                              its->mem_base->reg, its->mem_offset)) {
5569             goto done;
5570         }
5571     }
5572 
5573     /* Fall back to generic expansion. */
5574     return false;
5575 
5576  done:
5577     ots->mem_coherent = 0;
5578     if (IS_DEAD_ARG(1)) {
5579         temp_dead(s, itsl);
5580     }
5581     if (IS_DEAD_ARG(2)) {
5582         temp_dead(s, itsh);
5583     }
5584     if (NEED_SYNC_ARG(0)) {
5585         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5586     } else if (IS_DEAD_ARG(0)) {
5587         temp_dead(s, ots);
5588     }
5589     return true;
5590 }
5591 
5592 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5593                          TCGRegSet allocated_regs)
5594 {
5595     if (ts->val_type == TEMP_VAL_REG) {
5596         if (ts->reg != reg) {
5597             tcg_reg_free(s, reg, allocated_regs);
5598             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5599                 /*
5600                  * Cross register class move not supported.  Sync the
5601                  * temp back to its slot and load from there.
5602                  */
5603                 temp_sync(s, ts, allocated_regs, 0, 0);
5604                 tcg_out_ld(s, ts->type, reg,
5605                            ts->mem_base->reg, ts->mem_offset);
5606             }
5607         }
5608     } else {
5609         TCGRegSet arg_set = 0;
5610 
5611         tcg_reg_free(s, reg, allocated_regs);
5612         tcg_regset_set_reg(arg_set, reg);
5613         temp_load(s, ts, arg_set, allocated_regs, 0);
5614     }
5615 }
5616 
5617 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5618                          TCGRegSet allocated_regs)
5619 {
5620     /*
5621      * When the destination is on the stack, load up the temp and store.
5622      * If there are many call-saved registers, the temp might live to
5623      * see another use; otherwise it'll be discarded.
5624      */
5625     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5626     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5627                arg_slot_stk_ofs(arg_slot));
5628 }
5629 
5630 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5631                             TCGTemp *ts, TCGRegSet *allocated_regs)
5632 {
5633     if (arg_slot_reg_p(l->arg_slot)) {
5634         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5635         load_arg_reg(s, reg, ts, *allocated_regs);
5636         tcg_regset_set_reg(*allocated_regs, reg);
5637     } else {
5638         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5639     }
5640 }
5641 
5642 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5643                          intptr_t ref_off, TCGRegSet *allocated_regs)
5644 {
5645     TCGReg reg;
5646 
5647     if (arg_slot_reg_p(arg_slot)) {
5648         reg = tcg_target_call_iarg_regs[arg_slot];
5649         tcg_reg_free(s, reg, *allocated_regs);
5650         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5651         tcg_regset_set_reg(*allocated_regs, reg);
5652     } else {
5653         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5654                             *allocated_regs, 0, false);
5655         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5656         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5657                    arg_slot_stk_ofs(arg_slot));
5658     }
5659 }
5660 
5661 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5662 {
5663     const int nb_oargs = TCGOP_CALLO(op);
5664     const int nb_iargs = TCGOP_CALLI(op);
5665     const TCGLifeData arg_life = op->life;
5666     const TCGHelperInfo *info = tcg_call_info(op);
5667     TCGRegSet allocated_regs = s->reserved_regs;
5668     int i;
5669 
5670     /*
5671      * Move inputs into place in reverse order,
5672      * so that we place stacked arguments first.
5673      */
5674     for (i = nb_iargs - 1; i >= 0; --i) {
5675         const TCGCallArgumentLoc *loc = &info->in[i];
5676         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5677 
5678         switch (loc->kind) {
5679         case TCG_CALL_ARG_NORMAL:
5680         case TCG_CALL_ARG_EXTEND_U:
5681         case TCG_CALL_ARG_EXTEND_S:
5682             load_arg_normal(s, loc, ts, &allocated_regs);
5683             break;
5684         case TCG_CALL_ARG_BY_REF:
5685             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5686             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5687                          arg_slot_stk_ofs(loc->ref_slot),
5688                          &allocated_regs);
5689             break;
5690         case TCG_CALL_ARG_BY_REF_N:
5691             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5692             break;
5693         default:
5694             g_assert_not_reached();
5695         }
5696     }
5697 
5698     /* Mark dead temporaries and free the associated registers.  */
5699     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5700         if (IS_DEAD_ARG(i)) {
5701             temp_dead(s, arg_temp(op->args[i]));
5702         }
5703     }
5704 
5705     /* Clobber call registers.  */
5706     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5707         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5708             tcg_reg_free(s, i, allocated_regs);
5709         }
5710     }
5711 
5712     /*
5713      * Save globals if they might be written by the helper,
5714      * sync them if they might be read.
5715      */
5716     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5717         /* Nothing to do */
5718     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5719         sync_globals(s, allocated_regs);
5720     } else {
5721         save_globals(s, allocated_regs);
5722     }
5723 
5724     /*
5725      * If the ABI passes a pointer to the returned struct as the first
5726      * argument, load that now.  Pass a pointer to the output home slot.
5727      */
5728     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5729         TCGTemp *ts = arg_temp(op->args[0]);
5730 
5731         if (!ts->mem_allocated) {
5732             temp_allocate_frame(s, ts);
5733         }
5734         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5735     }
5736 
5737     tcg_out_call(s, tcg_call_func(op), info);
5738 
5739     /* Assign output registers and emit moves if needed.  */
5740     switch (info->out_kind) {
5741     case TCG_CALL_RET_NORMAL:
5742         for (i = 0; i < nb_oargs; i++) {
5743             TCGTemp *ts = arg_temp(op->args[i]);
5744             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5745 
5746             /* ENV should not be modified.  */
5747             tcg_debug_assert(!temp_readonly(ts));
5748 
5749             set_temp_val_reg(s, ts, reg);
5750             ts->mem_coherent = 0;
5751         }
5752         break;
5753 
5754     case TCG_CALL_RET_BY_VEC:
5755         {
5756             TCGTemp *ts = arg_temp(op->args[0]);
5757 
5758             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5759             tcg_debug_assert(ts->temp_subindex == 0);
5760             if (!ts->mem_allocated) {
5761                 temp_allocate_frame(s, ts);
5762             }
5763             tcg_out_st(s, TCG_TYPE_V128,
5764                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5765                        ts->mem_base->reg, ts->mem_offset);
5766         }
5767         /* fall through to mark all parts in memory */
5768 
5769     case TCG_CALL_RET_BY_REF:
5770         /* The callee has performed a write through the reference. */
5771         for (i = 0; i < nb_oargs; i++) {
5772             TCGTemp *ts = arg_temp(op->args[i]);
5773             ts->val_type = TEMP_VAL_MEM;
5774         }
5775         break;
5776 
5777     default:
5778         g_assert_not_reached();
5779     }
5780 
5781     /* Flush or discard output registers as needed. */
5782     for (i = 0; i < nb_oargs; i++) {
5783         TCGTemp *ts = arg_temp(op->args[i]);
5784         if (NEED_SYNC_ARG(i)) {
5785             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5786         } else if (IS_DEAD_ARG(i)) {
5787             temp_dead(s, ts);
5788         }
5789     }
5790 }
5791 
5792 /**
5793  * atom_and_align_for_opc:
5794  * @s: tcg context
5795  * @opc: memory operation code
5796  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5797  * @allow_two_ops: true if we are prepared to issue two operations
5798  *
5799  * Return the alignment and atomicity to use for the inline fast path
5800  * for the given memory operation.  The alignment may be larger than
5801  * that specified in @opc, and the correct alignment will be diagnosed
5802  * by the slow path helper.
5803  *
5804  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5805  * and issue two loads or stores for subalignment.
5806  */
5807 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5808                                            MemOp host_atom, bool allow_two_ops)
5809 {
5810     MemOp align = memop_alignment_bits(opc);
5811     MemOp size = opc & MO_SIZE;
5812     MemOp half = size ? size - 1 : 0;
5813     MemOp atom = opc & MO_ATOM_MASK;
5814     MemOp atmax;
5815 
5816     switch (atom) {
5817     case MO_ATOM_NONE:
5818         /* The operation requires no specific atomicity. */
5819         atmax = MO_8;
5820         break;
5821 
5822     case MO_ATOM_IFALIGN:
5823         atmax = size;
5824         break;
5825 
5826     case MO_ATOM_IFALIGN_PAIR:
5827         atmax = half;
5828         break;
5829 
5830     case MO_ATOM_WITHIN16:
5831         atmax = size;
5832         if (size == MO_128) {
5833             /* Misalignment implies !within16, and therefore no atomicity. */
5834         } else if (host_atom != MO_ATOM_WITHIN16) {
5835             /* The host does not implement within16, so require alignment. */
5836             align = MAX(align, size);
5837         }
5838         break;
5839 
5840     case MO_ATOM_WITHIN16_PAIR:
5841         atmax = size;
5842         /*
5843          * Misalignment implies !within16, and therefore half atomicity.
5844          * Any host prepared for two operations can implement this with
5845          * half alignment.
5846          */
5847         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5848             align = MAX(align, half);
5849         }
5850         break;
5851 
5852     case MO_ATOM_SUBALIGN:
5853         atmax = size;
5854         if (host_atom != MO_ATOM_SUBALIGN) {
5855             /* If unaligned but not odd, there are subobjects up to half. */
5856             if (allow_two_ops) {
5857                 align = MAX(align, half);
5858             } else {
5859                 align = MAX(align, size);
5860             }
5861         }
5862         break;
5863 
5864     default:
5865         g_assert_not_reached();
5866     }
5867 
5868     return (TCGAtomAlign){ .atom = atmax, .align = align };
5869 }
5870 
5871 /*
5872  * Similarly for qemu_ld/st slow path helpers.
5873  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5874  * using only the provided backend tcg_out_* functions.
5875  */
5876 
5877 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5878 {
5879     int ofs = arg_slot_stk_ofs(slot);
5880 
5881     /*
5882      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5883      * require extension to uint64_t, adjust the address for uint32_t.
5884      */
5885     if (HOST_BIG_ENDIAN &&
5886         TCG_TARGET_REG_BITS == 64 &&
5887         type == TCG_TYPE_I32) {
5888         ofs += 4;
5889     }
5890     return ofs;
5891 }
5892 
5893 static void tcg_out_helper_load_slots(TCGContext *s,
5894                                       unsigned nmov, TCGMovExtend *mov,
5895                                       const TCGLdstHelperParam *parm)
5896 {
5897     unsigned i;
5898     TCGReg dst3;
5899 
5900     /*
5901      * Start from the end, storing to the stack first.
5902      * This frees those registers, so we need not consider overlap.
5903      */
5904     for (i = nmov; i-- > 0; ) {
5905         unsigned slot = mov[i].dst;
5906 
5907         if (arg_slot_reg_p(slot)) {
5908             goto found_reg;
5909         }
5910 
5911         TCGReg src = mov[i].src;
5912         TCGType dst_type = mov[i].dst_type;
5913         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5914 
5915         /* The argument is going onto the stack; extend into scratch. */
5916         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5917             tcg_debug_assert(parm->ntmp != 0);
5918             mov[i].dst = src = parm->tmp[0];
5919             tcg_out_movext1(s, &mov[i]);
5920         }
5921 
5922         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5923                    tcg_out_helper_stk_ofs(dst_type, slot));
5924     }
5925     return;
5926 
5927  found_reg:
5928     /*
5929      * The remaining arguments are in registers.
5930      * Convert slot numbers to argument registers.
5931      */
5932     nmov = i + 1;
5933     for (i = 0; i < nmov; ++i) {
5934         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5935     }
5936 
5937     switch (nmov) {
5938     case 4:
5939         /* The backend must have provided enough temps for the worst case. */
5940         tcg_debug_assert(parm->ntmp >= 2);
5941 
5942         dst3 = mov[3].dst;
5943         for (unsigned j = 0; j < 3; ++j) {
5944             if (dst3 == mov[j].src) {
5945                 /*
5946                  * Conflict. Copy the source to a temporary, perform the
5947                  * remaining moves, then the extension from our scratch
5948                  * on the way out.
5949                  */
5950                 TCGReg scratch = parm->tmp[1];
5951 
5952                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5953                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5954                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5955                 break;
5956             }
5957         }
5958 
5959         /* No conflicts: perform this move and continue. */
5960         tcg_out_movext1(s, &mov[3]);
5961         /* fall through */
5962 
5963     case 3:
5964         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5965                         parm->ntmp ? parm->tmp[0] : -1);
5966         break;
5967     case 2:
5968         tcg_out_movext2(s, mov, mov + 1,
5969                         parm->ntmp ? parm->tmp[0] : -1);
5970         break;
5971     case 1:
5972         tcg_out_movext1(s, mov);
5973         break;
5974     default:
5975         g_assert_not_reached();
5976     }
5977 }
5978 
5979 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5980                                     TCGType type, tcg_target_long imm,
5981                                     const TCGLdstHelperParam *parm)
5982 {
5983     if (arg_slot_reg_p(slot)) {
5984         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5985     } else {
5986         int ofs = tcg_out_helper_stk_ofs(type, slot);
5987         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5988             tcg_debug_assert(parm->ntmp != 0);
5989             tcg_out_movi(s, type, parm->tmp[0], imm);
5990             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5991         }
5992     }
5993 }
5994 
5995 static void tcg_out_helper_load_common_args(TCGContext *s,
5996                                             const TCGLabelQemuLdst *ldst,
5997                                             const TCGLdstHelperParam *parm,
5998                                             const TCGHelperInfo *info,
5999                                             unsigned next_arg)
6000 {
6001     TCGMovExtend ptr_mov = {
6002         .dst_type = TCG_TYPE_PTR,
6003         .src_type = TCG_TYPE_PTR,
6004         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6005     };
6006     const TCGCallArgumentLoc *loc = &info->in[0];
6007     TCGType type;
6008     unsigned slot;
6009     tcg_target_ulong imm;
6010 
6011     /*
6012      * Handle env, which is always first.
6013      */
6014     ptr_mov.dst = loc->arg_slot;
6015     ptr_mov.src = TCG_AREG0;
6016     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6017 
6018     /*
6019      * Handle oi.
6020      */
6021     imm = ldst->oi;
6022     loc = &info->in[next_arg];
6023     type = TCG_TYPE_I32;
6024     switch (loc->kind) {
6025     case TCG_CALL_ARG_NORMAL:
6026         break;
6027     case TCG_CALL_ARG_EXTEND_U:
6028     case TCG_CALL_ARG_EXTEND_S:
6029         /* No extension required for MemOpIdx. */
6030         tcg_debug_assert(imm <= INT32_MAX);
6031         type = TCG_TYPE_REG;
6032         break;
6033     default:
6034         g_assert_not_reached();
6035     }
6036     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6037     next_arg++;
6038 
6039     /*
6040      * Handle ra.
6041      */
6042     loc = &info->in[next_arg];
6043     slot = loc->arg_slot;
6044     if (parm->ra_gen) {
6045         int arg_reg = -1;
6046         TCGReg ra_reg;
6047 
6048         if (arg_slot_reg_p(slot)) {
6049             arg_reg = tcg_target_call_iarg_regs[slot];
6050         }
6051         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6052 
6053         ptr_mov.dst = slot;
6054         ptr_mov.src = ra_reg;
6055         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6056     } else {
6057         imm = (uintptr_t)ldst->raddr;
6058         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6059     }
6060 }
6061 
6062 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6063                                        const TCGCallArgumentLoc *loc,
6064                                        TCGType dst_type, TCGType src_type,
6065                                        TCGReg lo, TCGReg hi)
6066 {
6067     MemOp reg_mo;
6068 
6069     if (dst_type <= TCG_TYPE_REG) {
6070         MemOp src_ext;
6071 
6072         switch (loc->kind) {
6073         case TCG_CALL_ARG_NORMAL:
6074             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6075             break;
6076         case TCG_CALL_ARG_EXTEND_U:
6077             dst_type = TCG_TYPE_REG;
6078             src_ext = MO_UL;
6079             break;
6080         case TCG_CALL_ARG_EXTEND_S:
6081             dst_type = TCG_TYPE_REG;
6082             src_ext = MO_SL;
6083             break;
6084         default:
6085             g_assert_not_reached();
6086         }
6087 
6088         mov[0].dst = loc->arg_slot;
6089         mov[0].dst_type = dst_type;
6090         mov[0].src = lo;
6091         mov[0].src_type = src_type;
6092         mov[0].src_ext = src_ext;
6093         return 1;
6094     }
6095 
6096     if (TCG_TARGET_REG_BITS == 32) {
6097         assert(dst_type == TCG_TYPE_I64);
6098         reg_mo = MO_32;
6099     } else {
6100         assert(dst_type == TCG_TYPE_I128);
6101         reg_mo = MO_64;
6102     }
6103 
6104     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6105     mov[0].src = lo;
6106     mov[0].dst_type = TCG_TYPE_REG;
6107     mov[0].src_type = TCG_TYPE_REG;
6108     mov[0].src_ext = reg_mo;
6109 
6110     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6111     mov[1].src = hi;
6112     mov[1].dst_type = TCG_TYPE_REG;
6113     mov[1].src_type = TCG_TYPE_REG;
6114     mov[1].src_ext = reg_mo;
6115 
6116     return 2;
6117 }
6118 
6119 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6120                                    const TCGLdstHelperParam *parm)
6121 {
6122     const TCGHelperInfo *info;
6123     const TCGCallArgumentLoc *loc;
6124     TCGMovExtend mov[2];
6125     unsigned next_arg, nmov;
6126     MemOp mop = get_memop(ldst->oi);
6127 
6128     switch (mop & MO_SIZE) {
6129     case MO_8:
6130     case MO_16:
6131     case MO_32:
6132         info = &info_helper_ld32_mmu;
6133         break;
6134     case MO_64:
6135         info = &info_helper_ld64_mmu;
6136         break;
6137     case MO_128:
6138         info = &info_helper_ld128_mmu;
6139         break;
6140     default:
6141         g_assert_not_reached();
6142     }
6143 
6144     /* Defer env argument. */
6145     next_arg = 1;
6146 
6147     loc = &info->in[next_arg];
6148     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6149         /*
6150          * 32-bit host with 32-bit guest: zero-extend the guest address
6151          * to 64-bits for the helper by storing the low part, then
6152          * load a zero for the high part.
6153          */
6154         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6155                                TCG_TYPE_I32, TCG_TYPE_I32,
6156                                ldst->addr_reg, -1);
6157         tcg_out_helper_load_slots(s, 1, mov, parm);
6158 
6159         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6160                                 TCG_TYPE_I32, 0, parm);
6161         next_arg += 2;
6162     } else {
6163         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6164                                       ldst->addr_reg, -1);
6165         tcg_out_helper_load_slots(s, nmov, mov, parm);
6166         next_arg += nmov;
6167     }
6168 
6169     switch (info->out_kind) {
6170     case TCG_CALL_RET_NORMAL:
6171     case TCG_CALL_RET_BY_VEC:
6172         break;
6173     case TCG_CALL_RET_BY_REF:
6174         /*
6175          * The return reference is in the first argument slot.
6176          * We need memory in which to return: re-use the top of stack.
6177          */
6178         {
6179             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6180 
6181             if (arg_slot_reg_p(0)) {
6182                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6183                                  TCG_REG_CALL_STACK, ofs_slot0);
6184             } else {
6185                 tcg_debug_assert(parm->ntmp != 0);
6186                 tcg_out_addi_ptr(s, parm->tmp[0],
6187                                  TCG_REG_CALL_STACK, ofs_slot0);
6188                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6189                            TCG_REG_CALL_STACK, ofs_slot0);
6190             }
6191         }
6192         break;
6193     default:
6194         g_assert_not_reached();
6195     }
6196 
6197     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6198 }
6199 
6200 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6201                                   bool load_sign,
6202                                   const TCGLdstHelperParam *parm)
6203 {
6204     MemOp mop = get_memop(ldst->oi);
6205     TCGMovExtend mov[2];
6206     int ofs_slot0;
6207 
6208     switch (ldst->type) {
6209     case TCG_TYPE_I64:
6210         if (TCG_TARGET_REG_BITS == 32) {
6211             break;
6212         }
6213         /* fall through */
6214 
6215     case TCG_TYPE_I32:
6216         mov[0].dst = ldst->datalo_reg;
6217         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6218         mov[0].dst_type = ldst->type;
6219         mov[0].src_type = TCG_TYPE_REG;
6220 
6221         /*
6222          * If load_sign, then we allowed the helper to perform the
6223          * appropriate sign extension to tcg_target_ulong, and all
6224          * we need now is a plain move.
6225          *
6226          * If they do not, then we expect the relevant extension
6227          * instruction to be no more expensive than a move, and
6228          * we thus save the icache etc by only using one of two
6229          * helper functions.
6230          */
6231         if (load_sign || !(mop & MO_SIGN)) {
6232             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6233                 mov[0].src_ext = MO_32;
6234             } else {
6235                 mov[0].src_ext = MO_64;
6236             }
6237         } else {
6238             mov[0].src_ext = mop & MO_SSIZE;
6239         }
6240         tcg_out_movext1(s, mov);
6241         return;
6242 
6243     case TCG_TYPE_I128:
6244         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6245         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6246         switch (TCG_TARGET_CALL_RET_I128) {
6247         case TCG_CALL_RET_NORMAL:
6248             break;
6249         case TCG_CALL_RET_BY_VEC:
6250             tcg_out_st(s, TCG_TYPE_V128,
6251                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6252                        TCG_REG_CALL_STACK, ofs_slot0);
6253             /* fall through */
6254         case TCG_CALL_RET_BY_REF:
6255             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6256                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6257             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6258                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6259             return;
6260         default:
6261             g_assert_not_reached();
6262         }
6263         break;
6264 
6265     default:
6266         g_assert_not_reached();
6267     }
6268 
6269     mov[0].dst = ldst->datalo_reg;
6270     mov[0].src =
6271         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6272     mov[0].dst_type = TCG_TYPE_REG;
6273     mov[0].src_type = TCG_TYPE_REG;
6274     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6275 
6276     mov[1].dst = ldst->datahi_reg;
6277     mov[1].src =
6278         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6279     mov[1].dst_type = TCG_TYPE_REG;
6280     mov[1].src_type = TCG_TYPE_REG;
6281     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6282 
6283     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6284 }
6285 
6286 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6287                                    const TCGLdstHelperParam *parm)
6288 {
6289     const TCGHelperInfo *info;
6290     const TCGCallArgumentLoc *loc;
6291     TCGMovExtend mov[4];
6292     TCGType data_type;
6293     unsigned next_arg, nmov, n;
6294     MemOp mop = get_memop(ldst->oi);
6295 
6296     switch (mop & MO_SIZE) {
6297     case MO_8:
6298     case MO_16:
6299     case MO_32:
6300         info = &info_helper_st32_mmu;
6301         data_type = TCG_TYPE_I32;
6302         break;
6303     case MO_64:
6304         info = &info_helper_st64_mmu;
6305         data_type = TCG_TYPE_I64;
6306         break;
6307     case MO_128:
6308         info = &info_helper_st128_mmu;
6309         data_type = TCG_TYPE_I128;
6310         break;
6311     default:
6312         g_assert_not_reached();
6313     }
6314 
6315     /* Defer env argument. */
6316     next_arg = 1;
6317     nmov = 0;
6318 
6319     /* Handle addr argument. */
6320     loc = &info->in[next_arg];
6321     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6322     if (TCG_TARGET_REG_BITS == 32) {
6323         /*
6324          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6325          * to 64-bits for the helper by storing the low part.  Later,
6326          * after we have processed the register inputs, we will load a
6327          * zero for the high part.
6328          */
6329         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6330                                TCG_TYPE_I32, TCG_TYPE_I32,
6331                                ldst->addr_reg, -1);
6332         next_arg += 2;
6333         nmov += 1;
6334     } else {
6335         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6336                                    ldst->addr_reg, -1);
6337         next_arg += n;
6338         nmov += n;
6339     }
6340 
6341     /* Handle data argument. */
6342     loc = &info->in[next_arg];
6343     switch (loc->kind) {
6344     case TCG_CALL_ARG_NORMAL:
6345     case TCG_CALL_ARG_EXTEND_U:
6346     case TCG_CALL_ARG_EXTEND_S:
6347         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6348                                    ldst->datalo_reg, ldst->datahi_reg);
6349         next_arg += n;
6350         nmov += n;
6351         tcg_out_helper_load_slots(s, nmov, mov, parm);
6352         break;
6353 
6354     case TCG_CALL_ARG_BY_REF:
6355         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6356         tcg_debug_assert(data_type == TCG_TYPE_I128);
6357         tcg_out_st(s, TCG_TYPE_I64,
6358                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6359                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6360         tcg_out_st(s, TCG_TYPE_I64,
6361                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6362                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6363 
6364         tcg_out_helper_load_slots(s, nmov, mov, parm);
6365 
6366         if (arg_slot_reg_p(loc->arg_slot)) {
6367             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6368                              TCG_REG_CALL_STACK,
6369                              arg_slot_stk_ofs(loc->ref_slot));
6370         } else {
6371             tcg_debug_assert(parm->ntmp != 0);
6372             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6373                              arg_slot_stk_ofs(loc->ref_slot));
6374             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6375                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6376         }
6377         next_arg += 2;
6378         break;
6379 
6380     default:
6381         g_assert_not_reached();
6382     }
6383 
6384     if (TCG_TARGET_REG_BITS == 32) {
6385         /* Zero extend the address by loading a zero for the high part. */
6386         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6387         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6388     }
6389 
6390     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6391 }
6392 
6393 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6394 {
6395     int i, start_words, num_insns;
6396     TCGOp *op;
6397 
6398     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6399                  && qemu_log_in_addr_range(pc_start))) {
6400         FILE *logfile = qemu_log_trylock();
6401         if (logfile) {
6402             fprintf(logfile, "OP:\n");
6403             tcg_dump_ops(s, logfile, false);
6404             fprintf(logfile, "\n");
6405             qemu_log_unlock(logfile);
6406         }
6407     }
6408 
6409 #ifdef CONFIG_DEBUG_TCG
6410     /* Ensure all labels referenced have been emitted.  */
6411     {
6412         TCGLabel *l;
6413         bool error = false;
6414 
6415         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6416             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6417                 qemu_log_mask(CPU_LOG_TB_OP,
6418                               "$L%d referenced but not present.\n", l->id);
6419                 error = true;
6420             }
6421         }
6422         assert(!error);
6423     }
6424 #endif
6425 
6426     /* Do not reuse any EBB that may be allocated within the TB. */
6427     tcg_temp_ebb_reset_freed(s);
6428 
6429     tcg_optimize(s);
6430 
6431     reachable_code_pass(s);
6432     liveness_pass_0(s);
6433     liveness_pass_1(s);
6434 
6435     if (s->nb_indirects > 0) {
6436         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6437                      && qemu_log_in_addr_range(pc_start))) {
6438             FILE *logfile = qemu_log_trylock();
6439             if (logfile) {
6440                 fprintf(logfile, "OP before indirect lowering:\n");
6441                 tcg_dump_ops(s, logfile, false);
6442                 fprintf(logfile, "\n");
6443                 qemu_log_unlock(logfile);
6444             }
6445         }
6446 
6447         /* Replace indirect temps with direct temps.  */
6448         if (liveness_pass_2(s)) {
6449             /* If changes were made, re-run liveness.  */
6450             liveness_pass_1(s);
6451         }
6452     }
6453 
6454     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6455                  && qemu_log_in_addr_range(pc_start))) {
6456         FILE *logfile = qemu_log_trylock();
6457         if (logfile) {
6458             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6459             tcg_dump_ops(s, logfile, true);
6460             fprintf(logfile, "\n");
6461             qemu_log_unlock(logfile);
6462         }
6463     }
6464 
6465     /* Initialize goto_tb jump offsets. */
6466     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6467     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6468     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6469     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6470 
6471     tcg_reg_alloc_start(s);
6472 
6473     /*
6474      * Reset the buffer pointers when restarting after overflow.
6475      * TODO: Move this into translate-all.c with the rest of the
6476      * buffer management.  Having only this done here is confusing.
6477      */
6478     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6479     s->code_ptr = s->code_buf;
6480     s->data_gen_ptr = NULL;
6481 
6482     QSIMPLEQ_INIT(&s->ldst_labels);
6483     s->pool_labels = NULL;
6484 
6485     start_words = s->insn_start_words;
6486     s->gen_insn_data =
6487         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6488 
6489     tcg_out_tb_start(s);
6490 
6491     num_insns = -1;
6492     QTAILQ_FOREACH(op, &s->ops, link) {
6493         TCGOpcode opc = op->opc;
6494 
6495         switch (opc) {
6496         case INDEX_op_mov:
6497         case INDEX_op_mov_vec:
6498             tcg_reg_alloc_mov(s, op);
6499             break;
6500         case INDEX_op_dup_vec:
6501             tcg_reg_alloc_dup(s, op);
6502             break;
6503         case INDEX_op_insn_start:
6504             if (num_insns >= 0) {
6505                 size_t off = tcg_current_code_size(s);
6506                 s->gen_insn_end_off[num_insns] = off;
6507                 /* Assert that we do not overflow our stored offset.  */
6508                 assert(s->gen_insn_end_off[num_insns] == off);
6509             }
6510             num_insns++;
6511             for (i = 0; i < start_words; ++i) {
6512                 s->gen_insn_data[num_insns * start_words + i] =
6513                     tcg_get_insn_start_param(op, i);
6514             }
6515             break;
6516         case INDEX_op_discard:
6517             temp_dead(s, arg_temp(op->args[0]));
6518             break;
6519         case INDEX_op_set_label:
6520             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6521             tcg_out_label(s, arg_label(op->args[0]));
6522             break;
6523         case INDEX_op_call:
6524             tcg_reg_alloc_call(s, op);
6525             break;
6526         case INDEX_op_exit_tb:
6527             tcg_out_exit_tb(s, op->args[0]);
6528             break;
6529         case INDEX_op_goto_tb:
6530             tcg_out_goto_tb(s, op->args[0]);
6531             break;
6532         case INDEX_op_dup2_vec:
6533             if (tcg_reg_alloc_dup2(s, op)) {
6534                 break;
6535             }
6536             /* fall through */
6537         default:
6538             /* Sanity check that we've not introduced any unhandled opcodes. */
6539             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6540                                               TCGOP_FLAGS(op)));
6541             /* Note: in order to speed up the code, it would be much
6542                faster to have specialized register allocator functions for
6543                some common argument patterns */
6544             tcg_reg_alloc_op(s, op);
6545             break;
6546         }
6547         /* Test for (pending) buffer overflow.  The assumption is that any
6548            one operation beginning below the high water mark cannot overrun
6549            the buffer completely.  Thus we can test for overflow after
6550            generating code without having to check during generation.  */
6551         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6552             return -1;
6553         }
6554         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6555         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6556             return -2;
6557         }
6558     }
6559     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6560     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6561 
6562     /* Generate TB finalization at the end of block */
6563     i = tcg_out_ldst_finalize(s);
6564     if (i < 0) {
6565         return i;
6566     }
6567     i = tcg_out_pool_finalize(s);
6568     if (i < 0) {
6569         return i;
6570     }
6571     if (!tcg_resolve_relocs(s)) {
6572         return -2;
6573     }
6574 
6575 #ifndef CONFIG_TCG_INTERPRETER
6576     /* flush instruction cache */
6577     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6578                         (uintptr_t)s->code_buf,
6579                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6580 #endif
6581 
6582     return tcg_current_code_size(s);
6583 }
6584 
6585 #ifdef ELF_HOST_MACHINE
6586 /* In order to use this feature, the backend needs to do three things:
6587 
6588    (1) Define ELF_HOST_MACHINE to indicate both what value to
6589        put into the ELF image and to indicate support for the feature.
6590 
6591    (2) Define tcg_register_jit.  This should create a buffer containing
6592        the contents of a .debug_frame section that describes the post-
6593        prologue unwind info for the tcg machine.
6594 
6595    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6596 */
6597 
6598 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6599 typedef enum {
6600     JIT_NOACTION = 0,
6601     JIT_REGISTER_FN,
6602     JIT_UNREGISTER_FN
6603 } jit_actions_t;
6604 
6605 struct jit_code_entry {
6606     struct jit_code_entry *next_entry;
6607     struct jit_code_entry *prev_entry;
6608     const void *symfile_addr;
6609     uint64_t symfile_size;
6610 };
6611 
6612 struct jit_descriptor {
6613     uint32_t version;
6614     uint32_t action_flag;
6615     struct jit_code_entry *relevant_entry;
6616     struct jit_code_entry *first_entry;
6617 };
6618 
6619 void __jit_debug_register_code(void) __attribute__((noinline));
6620 void __jit_debug_register_code(void)
6621 {
6622     asm("");
6623 }
6624 
6625 /* Must statically initialize the version, because GDB may check
6626    the version before we can set it.  */
6627 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6628 
6629 /* End GDB interface.  */
6630 
6631 static int find_string(const char *strtab, const char *str)
6632 {
6633     const char *p = strtab + 1;
6634 
6635     while (1) {
6636         if (strcmp(p, str) == 0) {
6637             return p - strtab;
6638         }
6639         p += strlen(p) + 1;
6640     }
6641 }
6642 
6643 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6644                                  const void *debug_frame,
6645                                  size_t debug_frame_size)
6646 {
6647     struct __attribute__((packed)) DebugInfo {
6648         uint32_t  len;
6649         uint16_t  version;
6650         uint32_t  abbrev;
6651         uint8_t   ptr_size;
6652         uint8_t   cu_die;
6653         uint16_t  cu_lang;
6654         uintptr_t cu_low_pc;
6655         uintptr_t cu_high_pc;
6656         uint8_t   fn_die;
6657         char      fn_name[16];
6658         uintptr_t fn_low_pc;
6659         uintptr_t fn_high_pc;
6660         uint8_t   cu_eoc;
6661     };
6662 
6663     struct ElfImage {
6664         ElfW(Ehdr) ehdr;
6665         ElfW(Phdr) phdr;
6666         ElfW(Shdr) shdr[7];
6667         ElfW(Sym)  sym[2];
6668         struct DebugInfo di;
6669         uint8_t    da[24];
6670         char       str[80];
6671     };
6672 
6673     struct ElfImage *img;
6674 
6675     static const struct ElfImage img_template = {
6676         .ehdr = {
6677             .e_ident[EI_MAG0] = ELFMAG0,
6678             .e_ident[EI_MAG1] = ELFMAG1,
6679             .e_ident[EI_MAG2] = ELFMAG2,
6680             .e_ident[EI_MAG3] = ELFMAG3,
6681             .e_ident[EI_CLASS] = ELF_CLASS,
6682             .e_ident[EI_DATA] = ELF_DATA,
6683             .e_ident[EI_VERSION] = EV_CURRENT,
6684             .e_type = ET_EXEC,
6685             .e_machine = ELF_HOST_MACHINE,
6686             .e_version = EV_CURRENT,
6687             .e_phoff = offsetof(struct ElfImage, phdr),
6688             .e_shoff = offsetof(struct ElfImage, shdr),
6689             .e_ehsize = sizeof(ElfW(Shdr)),
6690             .e_phentsize = sizeof(ElfW(Phdr)),
6691             .e_phnum = 1,
6692             .e_shentsize = sizeof(ElfW(Shdr)),
6693             .e_shnum = ARRAY_SIZE(img->shdr),
6694             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6695 #ifdef ELF_HOST_FLAGS
6696             .e_flags = ELF_HOST_FLAGS,
6697 #endif
6698 #ifdef ELF_OSABI
6699             .e_ident[EI_OSABI] = ELF_OSABI,
6700 #endif
6701         },
6702         .phdr = {
6703             .p_type = PT_LOAD,
6704             .p_flags = PF_X,
6705         },
6706         .shdr = {
6707             [0] = { .sh_type = SHT_NULL },
6708             /* Trick: The contents of code_gen_buffer are not present in
6709                this fake ELF file; that got allocated elsewhere.  Therefore
6710                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6711                will not look for contents.  We can record any address.  */
6712             [1] = { /* .text */
6713                 .sh_type = SHT_NOBITS,
6714                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6715             },
6716             [2] = { /* .debug_info */
6717                 .sh_type = SHT_PROGBITS,
6718                 .sh_offset = offsetof(struct ElfImage, di),
6719                 .sh_size = sizeof(struct DebugInfo),
6720             },
6721             [3] = { /* .debug_abbrev */
6722                 .sh_type = SHT_PROGBITS,
6723                 .sh_offset = offsetof(struct ElfImage, da),
6724                 .sh_size = sizeof(img->da),
6725             },
6726             [4] = { /* .debug_frame */
6727                 .sh_type = SHT_PROGBITS,
6728                 .sh_offset = sizeof(struct ElfImage),
6729             },
6730             [5] = { /* .symtab */
6731                 .sh_type = SHT_SYMTAB,
6732                 .sh_offset = offsetof(struct ElfImage, sym),
6733                 .sh_size = sizeof(img->sym),
6734                 .sh_info = 1,
6735                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6736                 .sh_entsize = sizeof(ElfW(Sym)),
6737             },
6738             [6] = { /* .strtab */
6739                 .sh_type = SHT_STRTAB,
6740                 .sh_offset = offsetof(struct ElfImage, str),
6741                 .sh_size = sizeof(img->str),
6742             }
6743         },
6744         .sym = {
6745             [1] = { /* code_gen_buffer */
6746                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6747                 .st_shndx = 1,
6748             }
6749         },
6750         .di = {
6751             .len = sizeof(struct DebugInfo) - 4,
6752             .version = 2,
6753             .ptr_size = sizeof(void *),
6754             .cu_die = 1,
6755             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6756             .fn_die = 2,
6757             .fn_name = "code_gen_buffer"
6758         },
6759         .da = {
6760             1,          /* abbrev number (the cu) */
6761             0x11, 1,    /* DW_TAG_compile_unit, has children */
6762             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6763             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6764             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6765             0, 0,       /* end of abbrev */
6766             2,          /* abbrev number (the fn) */
6767             0x2e, 0,    /* DW_TAG_subprogram, no children */
6768             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6769             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6770             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6771             0, 0,       /* end of abbrev */
6772             0           /* no more abbrev */
6773         },
6774         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6775                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6776     };
6777 
6778     /* We only need a single jit entry; statically allocate it.  */
6779     static struct jit_code_entry one_entry;
6780 
6781     uintptr_t buf = (uintptr_t)buf_ptr;
6782     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6783     DebugFrameHeader *dfh;
6784 
6785     img = g_malloc(img_size);
6786     *img = img_template;
6787 
6788     img->phdr.p_vaddr = buf;
6789     img->phdr.p_paddr = buf;
6790     img->phdr.p_memsz = buf_size;
6791 
6792     img->shdr[1].sh_name = find_string(img->str, ".text");
6793     img->shdr[1].sh_addr = buf;
6794     img->shdr[1].sh_size = buf_size;
6795 
6796     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6797     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6798 
6799     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6800     img->shdr[4].sh_size = debug_frame_size;
6801 
6802     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6803     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6804 
6805     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6806     img->sym[1].st_value = buf;
6807     img->sym[1].st_size = buf_size;
6808 
6809     img->di.cu_low_pc = buf;
6810     img->di.cu_high_pc = buf + buf_size;
6811     img->di.fn_low_pc = buf;
6812     img->di.fn_high_pc = buf + buf_size;
6813 
6814     dfh = (DebugFrameHeader *)(img + 1);
6815     memcpy(dfh, debug_frame, debug_frame_size);
6816     dfh->fde.func_start = buf;
6817     dfh->fde.func_len = buf_size;
6818 
6819 #ifdef DEBUG_JIT
6820     /* Enable this block to be able to debug the ELF image file creation.
6821        One can use readelf, objdump, or other inspection utilities.  */
6822     {
6823         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6824         FILE *f = fopen(jit, "w+b");
6825         if (f) {
6826             if (fwrite(img, img_size, 1, f) != img_size) {
6827                 /* Avoid stupid unused return value warning for fwrite.  */
6828             }
6829             fclose(f);
6830         }
6831     }
6832 #endif
6833 
6834     one_entry.symfile_addr = img;
6835     one_entry.symfile_size = img_size;
6836 
6837     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6838     __jit_debug_descriptor.relevant_entry = &one_entry;
6839     __jit_debug_descriptor.first_entry = &one_entry;
6840     __jit_debug_register_code();
6841 }
6842 #else
6843 /* No support for the feature.  Provide the entry point expected by exec.c,
6844    and implement the internal function we declared earlier.  */
6845 
6846 static void tcg_register_jit_int(const void *buf, size_t size,
6847                                  const void *debug_frame,
6848                                  size_t debug_frame_size)
6849 {
6850 }
6851 
6852 void tcg_register_jit(const void *buf, size_t buf_size)
6853 {
6854 }
6855 #endif /* ELF_HOST_MACHINE */
6856 
6857 #if !TCG_TARGET_MAYBE_vec
6858 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6859 {
6860     g_assert_not_reached();
6861 }
6862 #endif
6863