xref: /openbmc/qemu/tcg/tcg.c (revision aa28c9ef8e109db40d4781d82452805486f2a2bf)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpUnary {
990     TCGOutOp base;
991     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
992 } TCGOutOpUnary;
993 
994 typedef struct TCGOutOpSubtract {
995     TCGOutOp base;
996     void (*out_rrr)(TCGContext *s, TCGType type,
997                     TCGReg a0, TCGReg a1, TCGReg a2);
998     void (*out_rir)(TCGContext *s, TCGType type,
999                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1000 } TCGOutOpSubtract;
1001 
1002 #include "tcg-target.c.inc"
1003 
1004 #ifndef CONFIG_TCG_INTERPRETER
1005 /* Validate CPUTLBDescFast placement. */
1006 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1007                         sizeof(CPUNegativeOffsetState))
1008                   < MIN_TLB_MASK_TABLE_OFS);
1009 #endif
1010 
1011 /*
1012  * Register V as the TCGOutOp for O.
1013  * This verifies that V is of type T, otherwise give a nice compiler error.
1014  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1015  */
1016 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1017 
1018 /* Register allocation descriptions for every TCGOpcode. */
1019 static const TCGOutOp * const all_outop[NB_OPS] = {
1020     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1021     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1022     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1023     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1024     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1025     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1026     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1027     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1028     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1029     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1030     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1031     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1032     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1033     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1034 };
1035 
1036 #undef OUTOP
1037 
1038 /*
1039  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1040  * and registered the target's TCG globals) must register with this function
1041  * before initiating translation.
1042  *
1043  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1044  * of tcg_region_init() for the reasoning behind this.
1045  *
1046  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1047  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1048  * is not used anymore for translation once this function is called.
1049  *
1050  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1051  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1052  * modes.
1053  */
1054 #ifdef CONFIG_USER_ONLY
1055 void tcg_register_thread(void)
1056 {
1057     tcg_ctx = &tcg_init_ctx;
1058 }
1059 #else
1060 void tcg_register_thread(void)
1061 {
1062     TCGContext *s = g_malloc(sizeof(*s));
1063     unsigned int i, n;
1064 
1065     *s = tcg_init_ctx;
1066 
1067     /* Relink mem_base.  */
1068     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1069         if (tcg_init_ctx.temps[i].mem_base) {
1070             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1071             tcg_debug_assert(b >= 0 && b < n);
1072             s->temps[i].mem_base = &s->temps[b];
1073         }
1074     }
1075 
1076     /* Claim an entry in tcg_ctxs */
1077     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1078     g_assert(n < tcg_max_ctxs);
1079     qatomic_set(&tcg_ctxs[n], s);
1080 
1081     if (n > 0) {
1082         tcg_region_initial_alloc(s);
1083     }
1084 
1085     tcg_ctx = s;
1086 }
1087 #endif /* !CONFIG_USER_ONLY */
1088 
1089 /* pool based memory allocation */
1090 void *tcg_malloc_internal(TCGContext *s, int size)
1091 {
1092     TCGPool *p;
1093     int pool_size;
1094 
1095     if (size > TCG_POOL_CHUNK_SIZE) {
1096         /* big malloc: insert a new pool (XXX: could optimize) */
1097         p = g_malloc(sizeof(TCGPool) + size);
1098         p->size = size;
1099         p->next = s->pool_first_large;
1100         s->pool_first_large = p;
1101         return p->data;
1102     } else {
1103         p = s->pool_current;
1104         if (!p) {
1105             p = s->pool_first;
1106             if (!p)
1107                 goto new_pool;
1108         } else {
1109             if (!p->next) {
1110             new_pool:
1111                 pool_size = TCG_POOL_CHUNK_SIZE;
1112                 p = g_malloc(sizeof(TCGPool) + pool_size);
1113                 p->size = pool_size;
1114                 p->next = NULL;
1115                 if (s->pool_current) {
1116                     s->pool_current->next = p;
1117                 } else {
1118                     s->pool_first = p;
1119                 }
1120             } else {
1121                 p = p->next;
1122             }
1123         }
1124     }
1125     s->pool_current = p;
1126     s->pool_cur = p->data + size;
1127     s->pool_end = p->data + p->size;
1128     return p->data;
1129 }
1130 
1131 void tcg_pool_reset(TCGContext *s)
1132 {
1133     TCGPool *p, *t;
1134     for (p = s->pool_first_large; p; p = t) {
1135         t = p->next;
1136         g_free(p);
1137     }
1138     s->pool_first_large = NULL;
1139     s->pool_cur = s->pool_end = NULL;
1140     s->pool_current = NULL;
1141 }
1142 
1143 /*
1144  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1145  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1146  * We only use these for layout in tcg_out_ld_helper_ret and
1147  * tcg_out_st_helper_args, and share them between several of
1148  * the helpers, with the end result that it's easier to build manually.
1149  */
1150 
1151 #if TCG_TARGET_REG_BITS == 32
1152 # define dh_typecode_ttl  dh_typecode_i32
1153 #else
1154 # define dh_typecode_ttl  dh_typecode_i64
1155 #endif
1156 
1157 static TCGHelperInfo info_helper_ld32_mmu = {
1158     .flags = TCG_CALL_NO_WG,
1159     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1160               | dh_typemask(env, 1)
1161               | dh_typemask(i64, 2)  /* uint64_t addr */
1162               | dh_typemask(i32, 3)  /* unsigned oi */
1163               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1164 };
1165 
1166 static TCGHelperInfo info_helper_ld64_mmu = {
1167     .flags = TCG_CALL_NO_WG,
1168     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1169               | dh_typemask(env, 1)
1170               | dh_typemask(i64, 2)  /* uint64_t addr */
1171               | dh_typemask(i32, 3)  /* unsigned oi */
1172               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1173 };
1174 
1175 static TCGHelperInfo info_helper_ld128_mmu = {
1176     .flags = TCG_CALL_NO_WG,
1177     .typemask = dh_typemask(i128, 0) /* return Int128 */
1178               | dh_typemask(env, 1)
1179               | dh_typemask(i64, 2)  /* uint64_t addr */
1180               | dh_typemask(i32, 3)  /* unsigned oi */
1181               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1182 };
1183 
1184 static TCGHelperInfo info_helper_st32_mmu = {
1185     .flags = TCG_CALL_NO_WG,
1186     .typemask = dh_typemask(void, 0)
1187               | dh_typemask(env, 1)
1188               | dh_typemask(i64, 2)  /* uint64_t addr */
1189               | dh_typemask(i32, 3)  /* uint32_t data */
1190               | dh_typemask(i32, 4)  /* unsigned oi */
1191               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1192 };
1193 
1194 static TCGHelperInfo info_helper_st64_mmu = {
1195     .flags = TCG_CALL_NO_WG,
1196     .typemask = dh_typemask(void, 0)
1197               | dh_typemask(env, 1)
1198               | dh_typemask(i64, 2)  /* uint64_t addr */
1199               | dh_typemask(i64, 3)  /* uint64_t data */
1200               | dh_typemask(i32, 4)  /* unsigned oi */
1201               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1202 };
1203 
1204 static TCGHelperInfo info_helper_st128_mmu = {
1205     .flags = TCG_CALL_NO_WG,
1206     .typemask = dh_typemask(void, 0)
1207               | dh_typemask(env, 1)
1208               | dh_typemask(i64, 2)  /* uint64_t addr */
1209               | dh_typemask(i128, 3) /* Int128 data */
1210               | dh_typemask(i32, 4)  /* unsigned oi */
1211               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1212 };
1213 
1214 #ifdef CONFIG_TCG_INTERPRETER
1215 static ffi_type *typecode_to_ffi(int argmask)
1216 {
1217     /*
1218      * libffi does not support __int128_t, so we have forced Int128
1219      * to use the structure definition instead of the builtin type.
1220      */
1221     static ffi_type *ffi_type_i128_elements[3] = {
1222         &ffi_type_uint64,
1223         &ffi_type_uint64,
1224         NULL
1225     };
1226     static ffi_type ffi_type_i128 = {
1227         .size = 16,
1228         .alignment = __alignof__(Int128),
1229         .type = FFI_TYPE_STRUCT,
1230         .elements = ffi_type_i128_elements,
1231     };
1232 
1233     switch (argmask) {
1234     case dh_typecode_void:
1235         return &ffi_type_void;
1236     case dh_typecode_i32:
1237         return &ffi_type_uint32;
1238     case dh_typecode_s32:
1239         return &ffi_type_sint32;
1240     case dh_typecode_i64:
1241         return &ffi_type_uint64;
1242     case dh_typecode_s64:
1243         return &ffi_type_sint64;
1244     case dh_typecode_ptr:
1245         return &ffi_type_pointer;
1246     case dh_typecode_i128:
1247         return &ffi_type_i128;
1248     }
1249     g_assert_not_reached();
1250 }
1251 
1252 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1253 {
1254     unsigned typemask = info->typemask;
1255     struct {
1256         ffi_cif cif;
1257         ffi_type *args[];
1258     } *ca;
1259     ffi_status status;
1260     int nargs;
1261 
1262     /* Ignoring the return type, find the last non-zero field. */
1263     nargs = 32 - clz32(typemask >> 3);
1264     nargs = DIV_ROUND_UP(nargs, 3);
1265     assert(nargs <= MAX_CALL_IARGS);
1266 
1267     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1268     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1269     ca->cif.nargs = nargs;
1270 
1271     if (nargs != 0) {
1272         ca->cif.arg_types = ca->args;
1273         for (int j = 0; j < nargs; ++j) {
1274             int typecode = extract32(typemask, (j + 1) * 3, 3);
1275             ca->args[j] = typecode_to_ffi(typecode);
1276         }
1277     }
1278 
1279     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1280                           ca->cif.rtype, ca->cif.arg_types);
1281     assert(status == FFI_OK);
1282 
1283     return &ca->cif;
1284 }
1285 
1286 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1287 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1288 #else
1289 #define HELPER_INFO_INIT(I)      (&(I)->init)
1290 #define HELPER_INFO_INIT_VAL(I)  1
1291 #endif /* CONFIG_TCG_INTERPRETER */
1292 
1293 static inline bool arg_slot_reg_p(unsigned arg_slot)
1294 {
1295     /*
1296      * Split the sizeof away from the comparison to avoid Werror from
1297      * "unsigned < 0 is always false", when iarg_regs is empty.
1298      */
1299     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1300     return arg_slot < nreg;
1301 }
1302 
1303 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1304 {
1305     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1306     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1307 
1308     tcg_debug_assert(stk_slot < max);
1309     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1310 }
1311 
1312 typedef struct TCGCumulativeArgs {
1313     int arg_idx;                /* tcg_gen_callN args[] */
1314     int info_in_idx;            /* TCGHelperInfo in[] */
1315     int arg_slot;               /* regs+stack slot */
1316     int ref_slot;               /* stack slots for references */
1317 } TCGCumulativeArgs;
1318 
1319 static void layout_arg_even(TCGCumulativeArgs *cum)
1320 {
1321     cum->arg_slot += cum->arg_slot & 1;
1322 }
1323 
1324 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1325                          TCGCallArgumentKind kind)
1326 {
1327     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1328 
1329     *loc = (TCGCallArgumentLoc){
1330         .kind = kind,
1331         .arg_idx = cum->arg_idx,
1332         .arg_slot = cum->arg_slot,
1333     };
1334     cum->info_in_idx++;
1335     cum->arg_slot++;
1336 }
1337 
1338 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1339                                 TCGHelperInfo *info, int n)
1340 {
1341     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1342 
1343     for (int i = 0; i < n; ++i) {
1344         /* Layout all using the same arg_idx, adjusting the subindex. */
1345         loc[i] = (TCGCallArgumentLoc){
1346             .kind = TCG_CALL_ARG_NORMAL,
1347             .arg_idx = cum->arg_idx,
1348             .tmp_subindex = i,
1349             .arg_slot = cum->arg_slot + i,
1350         };
1351     }
1352     cum->info_in_idx += n;
1353     cum->arg_slot += n;
1354 }
1355 
1356 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1357 {
1358     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1359     int n = 128 / TCG_TARGET_REG_BITS;
1360 
1361     /* The first subindex carries the pointer. */
1362     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1363 
1364     /*
1365      * The callee is allowed to clobber memory associated with
1366      * structure pass by-reference.  Therefore we must make copies.
1367      * Allocate space from "ref_slot", which will be adjusted to
1368      * follow the parameters on the stack.
1369      */
1370     loc[0].ref_slot = cum->ref_slot;
1371 
1372     /*
1373      * Subsequent words also go into the reference slot, but
1374      * do not accumulate into the regular arguments.
1375      */
1376     for (int i = 1; i < n; ++i) {
1377         loc[i] = (TCGCallArgumentLoc){
1378             .kind = TCG_CALL_ARG_BY_REF_N,
1379             .arg_idx = cum->arg_idx,
1380             .tmp_subindex = i,
1381             .ref_slot = cum->ref_slot + i,
1382         };
1383     }
1384     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1385     cum->ref_slot += n;
1386 }
1387 
1388 static void init_call_layout(TCGHelperInfo *info)
1389 {
1390     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1391     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1392     unsigned typemask = info->typemask;
1393     unsigned typecode;
1394     TCGCumulativeArgs cum = { };
1395 
1396     /*
1397      * Parse and place any function return value.
1398      */
1399     typecode = typemask & 7;
1400     switch (typecode) {
1401     case dh_typecode_void:
1402         info->nr_out = 0;
1403         break;
1404     case dh_typecode_i32:
1405     case dh_typecode_s32:
1406     case dh_typecode_ptr:
1407         info->nr_out = 1;
1408         info->out_kind = TCG_CALL_RET_NORMAL;
1409         break;
1410     case dh_typecode_i64:
1411     case dh_typecode_s64:
1412         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1413         info->out_kind = TCG_CALL_RET_NORMAL;
1414         /* Query the last register now to trigger any assert early. */
1415         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1416         break;
1417     case dh_typecode_i128:
1418         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1419         info->out_kind = TCG_TARGET_CALL_RET_I128;
1420         switch (TCG_TARGET_CALL_RET_I128) {
1421         case TCG_CALL_RET_NORMAL:
1422             /* Query the last register now to trigger any assert early. */
1423             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1424             break;
1425         case TCG_CALL_RET_BY_VEC:
1426             /* Query the single register now to trigger any assert early. */
1427             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1428             break;
1429         case TCG_CALL_RET_BY_REF:
1430             /*
1431              * Allocate the first argument to the output.
1432              * We don't need to store this anywhere, just make it
1433              * unavailable for use in the input loop below.
1434              */
1435             cum.arg_slot = 1;
1436             break;
1437         default:
1438             qemu_build_not_reached();
1439         }
1440         break;
1441     default:
1442         g_assert_not_reached();
1443     }
1444 
1445     /*
1446      * Parse and place function arguments.
1447      */
1448     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1449         TCGCallArgumentKind kind;
1450         TCGType type;
1451 
1452         typecode = typemask & 7;
1453         switch (typecode) {
1454         case dh_typecode_i32:
1455         case dh_typecode_s32:
1456             type = TCG_TYPE_I32;
1457             break;
1458         case dh_typecode_i64:
1459         case dh_typecode_s64:
1460             type = TCG_TYPE_I64;
1461             break;
1462         case dh_typecode_ptr:
1463             type = TCG_TYPE_PTR;
1464             break;
1465         case dh_typecode_i128:
1466             type = TCG_TYPE_I128;
1467             break;
1468         default:
1469             g_assert_not_reached();
1470         }
1471 
1472         switch (type) {
1473         case TCG_TYPE_I32:
1474             switch (TCG_TARGET_CALL_ARG_I32) {
1475             case TCG_CALL_ARG_EVEN:
1476                 layout_arg_even(&cum);
1477                 /* fall through */
1478             case TCG_CALL_ARG_NORMAL:
1479                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1480                 break;
1481             case TCG_CALL_ARG_EXTEND:
1482                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1483                 layout_arg_1(&cum, info, kind);
1484                 break;
1485             default:
1486                 qemu_build_not_reached();
1487             }
1488             break;
1489 
1490         case TCG_TYPE_I64:
1491             switch (TCG_TARGET_CALL_ARG_I64) {
1492             case TCG_CALL_ARG_EVEN:
1493                 layout_arg_even(&cum);
1494                 /* fall through */
1495             case TCG_CALL_ARG_NORMAL:
1496                 if (TCG_TARGET_REG_BITS == 32) {
1497                     layout_arg_normal_n(&cum, info, 2);
1498                 } else {
1499                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1500                 }
1501                 break;
1502             default:
1503                 qemu_build_not_reached();
1504             }
1505             break;
1506 
1507         case TCG_TYPE_I128:
1508             switch (TCG_TARGET_CALL_ARG_I128) {
1509             case TCG_CALL_ARG_EVEN:
1510                 layout_arg_even(&cum);
1511                 /* fall through */
1512             case TCG_CALL_ARG_NORMAL:
1513                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1514                 break;
1515             case TCG_CALL_ARG_BY_REF:
1516                 layout_arg_by_ref(&cum, info);
1517                 break;
1518             default:
1519                 qemu_build_not_reached();
1520             }
1521             break;
1522 
1523         default:
1524             g_assert_not_reached();
1525         }
1526     }
1527     info->nr_in = cum.info_in_idx;
1528 
1529     /* Validate that we didn't overrun the input array. */
1530     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1531     /* Validate the backend has enough argument space. */
1532     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1533 
1534     /*
1535      * Relocate the "ref_slot" area to the end of the parameters.
1536      * Minimizing this stack offset helps code size for x86,
1537      * which has a signed 8-bit offset encoding.
1538      */
1539     if (cum.ref_slot != 0) {
1540         int ref_base = 0;
1541 
1542         if (cum.arg_slot > max_reg_slots) {
1543             int align = __alignof(Int128) / sizeof(tcg_target_long);
1544 
1545             ref_base = cum.arg_slot - max_reg_slots;
1546             if (align > 1) {
1547                 ref_base = ROUND_UP(ref_base, align);
1548             }
1549         }
1550         assert(ref_base + cum.ref_slot <= max_stk_slots);
1551         ref_base += max_reg_slots;
1552 
1553         if (ref_base != 0) {
1554             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1555                 TCGCallArgumentLoc *loc = &info->in[i];
1556                 switch (loc->kind) {
1557                 case TCG_CALL_ARG_BY_REF:
1558                 case TCG_CALL_ARG_BY_REF_N:
1559                     loc->ref_slot += ref_base;
1560                     break;
1561                 default:
1562                     break;
1563                 }
1564             }
1565         }
1566     }
1567 }
1568 
1569 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1570 static void process_constraint_sets(void);
1571 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1572                                             TCGReg reg, const char *name);
1573 
1574 static void tcg_context_init(unsigned max_threads)
1575 {
1576     TCGContext *s = &tcg_init_ctx;
1577     int n, i;
1578     TCGTemp *ts;
1579 
1580     memset(s, 0, sizeof(*s));
1581     s->nb_globals = 0;
1582 
1583     init_call_layout(&info_helper_ld32_mmu);
1584     init_call_layout(&info_helper_ld64_mmu);
1585     init_call_layout(&info_helper_ld128_mmu);
1586     init_call_layout(&info_helper_st32_mmu);
1587     init_call_layout(&info_helper_st64_mmu);
1588     init_call_layout(&info_helper_st128_mmu);
1589 
1590     tcg_target_init(s);
1591     process_constraint_sets();
1592 
1593     /* Reverse the order of the saved registers, assuming they're all at
1594        the start of tcg_target_reg_alloc_order.  */
1595     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1596         int r = tcg_target_reg_alloc_order[n];
1597         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1598             break;
1599         }
1600     }
1601     for (i = 0; i < n; ++i) {
1602         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1603     }
1604     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1605         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1606     }
1607 
1608     tcg_ctx = s;
1609     /*
1610      * In user-mode we simply share the init context among threads, since we
1611      * use a single region. See the documentation tcg_region_init() for the
1612      * reasoning behind this.
1613      * In system-mode we will have at most max_threads TCG threads.
1614      */
1615 #ifdef CONFIG_USER_ONLY
1616     tcg_ctxs = &tcg_ctx;
1617     tcg_cur_ctxs = 1;
1618     tcg_max_ctxs = 1;
1619 #else
1620     tcg_max_ctxs = max_threads;
1621     tcg_ctxs = g_new0(TCGContext *, max_threads);
1622 #endif
1623 
1624     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1625     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1626     tcg_env = temp_tcgv_ptr(ts);
1627 }
1628 
1629 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1630 {
1631     tcg_context_init(max_threads);
1632     tcg_region_init(tb_size, splitwx, max_threads);
1633 }
1634 
1635 /*
1636  * Allocate TBs right before their corresponding translated code, making
1637  * sure that TBs and code are on different cache lines.
1638  */
1639 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1640 {
1641     uintptr_t align = qemu_icache_linesize;
1642     TranslationBlock *tb;
1643     void *next;
1644 
1645  retry:
1646     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1647     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1648 
1649     if (unlikely(next > s->code_gen_highwater)) {
1650         if (tcg_region_alloc(s)) {
1651             return NULL;
1652         }
1653         goto retry;
1654     }
1655     qatomic_set(&s->code_gen_ptr, next);
1656     return tb;
1657 }
1658 
1659 void tcg_prologue_init(void)
1660 {
1661     TCGContext *s = tcg_ctx;
1662     size_t prologue_size;
1663 
1664     s->code_ptr = s->code_gen_ptr;
1665     s->code_buf = s->code_gen_ptr;
1666     s->data_gen_ptr = NULL;
1667 
1668 #ifndef CONFIG_TCG_INTERPRETER
1669     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1670 #endif
1671 
1672     s->pool_labels = NULL;
1673 
1674     qemu_thread_jit_write();
1675     /* Generate the prologue.  */
1676     tcg_target_qemu_prologue(s);
1677 
1678     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1679     {
1680         int result = tcg_out_pool_finalize(s);
1681         tcg_debug_assert(result == 0);
1682     }
1683 
1684     prologue_size = tcg_current_code_size(s);
1685     perf_report_prologue(s->code_gen_ptr, prologue_size);
1686 
1687 #ifndef CONFIG_TCG_INTERPRETER
1688     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1689                         (uintptr_t)s->code_buf, prologue_size);
1690 #endif
1691 
1692     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1693         FILE *logfile = qemu_log_trylock();
1694         if (logfile) {
1695             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1696             if (s->data_gen_ptr) {
1697                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1698                 size_t data_size = prologue_size - code_size;
1699                 size_t i;
1700 
1701                 disas(logfile, s->code_gen_ptr, code_size);
1702 
1703                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1704                     if (sizeof(tcg_target_ulong) == 8) {
1705                         fprintf(logfile,
1706                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1707                                 (uintptr_t)s->data_gen_ptr + i,
1708                                 *(uint64_t *)(s->data_gen_ptr + i));
1709                     } else {
1710                         fprintf(logfile,
1711                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1712                                 (uintptr_t)s->data_gen_ptr + i,
1713                                 *(uint32_t *)(s->data_gen_ptr + i));
1714                     }
1715                 }
1716             } else {
1717                 disas(logfile, s->code_gen_ptr, prologue_size);
1718             }
1719             fprintf(logfile, "\n");
1720             qemu_log_unlock(logfile);
1721         }
1722     }
1723 
1724 #ifndef CONFIG_TCG_INTERPRETER
1725     /*
1726      * Assert that goto_ptr is implemented completely, setting an epilogue.
1727      * For tci, we use NULL as the signal to return from the interpreter,
1728      * so skip this check.
1729      */
1730     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1731 #endif
1732 
1733     tcg_region_prologue_set(s);
1734 }
1735 
1736 void tcg_func_start(TCGContext *s)
1737 {
1738     tcg_pool_reset(s);
1739     s->nb_temps = s->nb_globals;
1740 
1741     /* No temps have been previously allocated for size or locality.  */
1742     tcg_temp_ebb_reset_freed(s);
1743 
1744     /* No constant temps have been previously allocated. */
1745     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1746         if (s->const_table[i]) {
1747             g_hash_table_remove_all(s->const_table[i]);
1748         }
1749     }
1750 
1751     s->nb_ops = 0;
1752     s->nb_labels = 0;
1753     s->current_frame_offset = s->frame_start;
1754 
1755 #ifdef CONFIG_DEBUG_TCG
1756     s->goto_tb_issue_mask = 0;
1757 #endif
1758 
1759     QTAILQ_INIT(&s->ops);
1760     QTAILQ_INIT(&s->free_ops);
1761     s->emit_before_op = NULL;
1762     QSIMPLEQ_INIT(&s->labels);
1763 
1764     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1765     tcg_debug_assert(s->insn_start_words > 0);
1766 }
1767 
1768 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1769 {
1770     int n = s->nb_temps++;
1771 
1772     if (n >= TCG_MAX_TEMPS) {
1773         tcg_raise_tb_overflow(s);
1774     }
1775     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1776 }
1777 
1778 static TCGTemp *tcg_global_alloc(TCGContext *s)
1779 {
1780     TCGTemp *ts;
1781 
1782     tcg_debug_assert(s->nb_globals == s->nb_temps);
1783     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1784     s->nb_globals++;
1785     ts = tcg_temp_alloc(s);
1786     ts->kind = TEMP_GLOBAL;
1787 
1788     return ts;
1789 }
1790 
1791 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1792                                             TCGReg reg, const char *name)
1793 {
1794     TCGTemp *ts;
1795 
1796     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1797 
1798     ts = tcg_global_alloc(s);
1799     ts->base_type = type;
1800     ts->type = type;
1801     ts->kind = TEMP_FIXED;
1802     ts->reg = reg;
1803     ts->name = name;
1804     tcg_regset_set_reg(s->reserved_regs, reg);
1805 
1806     return ts;
1807 }
1808 
1809 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1810 {
1811     s->frame_start = start;
1812     s->frame_end = start + size;
1813     s->frame_temp
1814         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1815 }
1816 
1817 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1818                                             const char *name, TCGType type)
1819 {
1820     TCGContext *s = tcg_ctx;
1821     TCGTemp *base_ts = tcgv_ptr_temp(base);
1822     TCGTemp *ts = tcg_global_alloc(s);
1823     int indirect_reg = 0;
1824 
1825     switch (base_ts->kind) {
1826     case TEMP_FIXED:
1827         break;
1828     case TEMP_GLOBAL:
1829         /* We do not support double-indirect registers.  */
1830         tcg_debug_assert(!base_ts->indirect_reg);
1831         base_ts->indirect_base = 1;
1832         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1833                             ? 2 : 1);
1834         indirect_reg = 1;
1835         break;
1836     default:
1837         g_assert_not_reached();
1838     }
1839 
1840     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1841         TCGTemp *ts2 = tcg_global_alloc(s);
1842         char buf[64];
1843 
1844         ts->base_type = TCG_TYPE_I64;
1845         ts->type = TCG_TYPE_I32;
1846         ts->indirect_reg = indirect_reg;
1847         ts->mem_allocated = 1;
1848         ts->mem_base = base_ts;
1849         ts->mem_offset = offset;
1850         pstrcpy(buf, sizeof(buf), name);
1851         pstrcat(buf, sizeof(buf), "_0");
1852         ts->name = strdup(buf);
1853 
1854         tcg_debug_assert(ts2 == ts + 1);
1855         ts2->base_type = TCG_TYPE_I64;
1856         ts2->type = TCG_TYPE_I32;
1857         ts2->indirect_reg = indirect_reg;
1858         ts2->mem_allocated = 1;
1859         ts2->mem_base = base_ts;
1860         ts2->mem_offset = offset + 4;
1861         ts2->temp_subindex = 1;
1862         pstrcpy(buf, sizeof(buf), name);
1863         pstrcat(buf, sizeof(buf), "_1");
1864         ts2->name = strdup(buf);
1865     } else {
1866         ts->base_type = type;
1867         ts->type = type;
1868         ts->indirect_reg = indirect_reg;
1869         ts->mem_allocated = 1;
1870         ts->mem_base = base_ts;
1871         ts->mem_offset = offset;
1872         ts->name = name;
1873     }
1874     return ts;
1875 }
1876 
1877 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1878 {
1879     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1880     return temp_tcgv_i32(ts);
1881 }
1882 
1883 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1884 {
1885     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1886     return temp_tcgv_i64(ts);
1887 }
1888 
1889 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1890 {
1891     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1892     return temp_tcgv_ptr(ts);
1893 }
1894 
1895 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1896 {
1897     TCGContext *s = tcg_ctx;
1898     TCGTemp *ts;
1899     int n;
1900 
1901     if (kind == TEMP_EBB) {
1902         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1903 
1904         if (idx < TCG_MAX_TEMPS) {
1905             /* There is already an available temp with the right type.  */
1906             clear_bit(idx, s->free_temps[type].l);
1907 
1908             ts = &s->temps[idx];
1909             ts->temp_allocated = 1;
1910             tcg_debug_assert(ts->base_type == type);
1911             tcg_debug_assert(ts->kind == kind);
1912             return ts;
1913         }
1914     } else {
1915         tcg_debug_assert(kind == TEMP_TB);
1916     }
1917 
1918     switch (type) {
1919     case TCG_TYPE_I32:
1920     case TCG_TYPE_V64:
1921     case TCG_TYPE_V128:
1922     case TCG_TYPE_V256:
1923         n = 1;
1924         break;
1925     case TCG_TYPE_I64:
1926         n = 64 / TCG_TARGET_REG_BITS;
1927         break;
1928     case TCG_TYPE_I128:
1929         n = 128 / TCG_TARGET_REG_BITS;
1930         break;
1931     default:
1932         g_assert_not_reached();
1933     }
1934 
1935     ts = tcg_temp_alloc(s);
1936     ts->base_type = type;
1937     ts->temp_allocated = 1;
1938     ts->kind = kind;
1939 
1940     if (n == 1) {
1941         ts->type = type;
1942     } else {
1943         ts->type = TCG_TYPE_REG;
1944 
1945         for (int i = 1; i < n; ++i) {
1946             TCGTemp *ts2 = tcg_temp_alloc(s);
1947 
1948             tcg_debug_assert(ts2 == ts + i);
1949             ts2->base_type = type;
1950             ts2->type = TCG_TYPE_REG;
1951             ts2->temp_allocated = 1;
1952             ts2->temp_subindex = i;
1953             ts2->kind = kind;
1954         }
1955     }
1956     return ts;
1957 }
1958 
1959 TCGv_i32 tcg_temp_new_i32(void)
1960 {
1961     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1962 }
1963 
1964 TCGv_i32 tcg_temp_ebb_new_i32(void)
1965 {
1966     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1967 }
1968 
1969 TCGv_i64 tcg_temp_new_i64(void)
1970 {
1971     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1972 }
1973 
1974 TCGv_i64 tcg_temp_ebb_new_i64(void)
1975 {
1976     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1977 }
1978 
1979 TCGv_ptr tcg_temp_new_ptr(void)
1980 {
1981     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1982 }
1983 
1984 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1985 {
1986     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1987 }
1988 
1989 TCGv_i128 tcg_temp_new_i128(void)
1990 {
1991     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1992 }
1993 
1994 TCGv_i128 tcg_temp_ebb_new_i128(void)
1995 {
1996     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1997 }
1998 
1999 TCGv_vec tcg_temp_new_vec(TCGType type)
2000 {
2001     TCGTemp *t;
2002 
2003 #ifdef CONFIG_DEBUG_TCG
2004     switch (type) {
2005     case TCG_TYPE_V64:
2006         assert(TCG_TARGET_HAS_v64);
2007         break;
2008     case TCG_TYPE_V128:
2009         assert(TCG_TARGET_HAS_v128);
2010         break;
2011     case TCG_TYPE_V256:
2012         assert(TCG_TARGET_HAS_v256);
2013         break;
2014     default:
2015         g_assert_not_reached();
2016     }
2017 #endif
2018 
2019     t = tcg_temp_new_internal(type, TEMP_EBB);
2020     return temp_tcgv_vec(t);
2021 }
2022 
2023 /* Create a new temp of the same type as an existing temp.  */
2024 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2025 {
2026     TCGTemp *t = tcgv_vec_temp(match);
2027 
2028     tcg_debug_assert(t->temp_allocated != 0);
2029 
2030     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2031     return temp_tcgv_vec(t);
2032 }
2033 
2034 void tcg_temp_free_internal(TCGTemp *ts)
2035 {
2036     TCGContext *s = tcg_ctx;
2037 
2038     switch (ts->kind) {
2039     case TEMP_CONST:
2040     case TEMP_TB:
2041         /* Silently ignore free. */
2042         break;
2043     case TEMP_EBB:
2044         tcg_debug_assert(ts->temp_allocated != 0);
2045         ts->temp_allocated = 0;
2046         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2047         break;
2048     default:
2049         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2050         g_assert_not_reached();
2051     }
2052 }
2053 
2054 void tcg_temp_free_i32(TCGv_i32 arg)
2055 {
2056     tcg_temp_free_internal(tcgv_i32_temp(arg));
2057 }
2058 
2059 void tcg_temp_free_i64(TCGv_i64 arg)
2060 {
2061     tcg_temp_free_internal(tcgv_i64_temp(arg));
2062 }
2063 
2064 void tcg_temp_free_i128(TCGv_i128 arg)
2065 {
2066     tcg_temp_free_internal(tcgv_i128_temp(arg));
2067 }
2068 
2069 void tcg_temp_free_ptr(TCGv_ptr arg)
2070 {
2071     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2072 }
2073 
2074 void tcg_temp_free_vec(TCGv_vec arg)
2075 {
2076     tcg_temp_free_internal(tcgv_vec_temp(arg));
2077 }
2078 
2079 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2080 {
2081     TCGContext *s = tcg_ctx;
2082     GHashTable *h = s->const_table[type];
2083     TCGTemp *ts;
2084 
2085     if (h == NULL) {
2086         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2087         s->const_table[type] = h;
2088     }
2089 
2090     ts = g_hash_table_lookup(h, &val);
2091     if (ts == NULL) {
2092         int64_t *val_ptr;
2093 
2094         ts = tcg_temp_alloc(s);
2095 
2096         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2097             TCGTemp *ts2 = tcg_temp_alloc(s);
2098 
2099             tcg_debug_assert(ts2 == ts + 1);
2100 
2101             ts->base_type = TCG_TYPE_I64;
2102             ts->type = TCG_TYPE_I32;
2103             ts->kind = TEMP_CONST;
2104             ts->temp_allocated = 1;
2105 
2106             ts2->base_type = TCG_TYPE_I64;
2107             ts2->type = TCG_TYPE_I32;
2108             ts2->kind = TEMP_CONST;
2109             ts2->temp_allocated = 1;
2110             ts2->temp_subindex = 1;
2111 
2112             /*
2113              * Retain the full value of the 64-bit constant in the low
2114              * part, so that the hash table works.  Actual uses will
2115              * truncate the value to the low part.
2116              */
2117             ts[HOST_BIG_ENDIAN].val = val;
2118             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2119             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2120         } else {
2121             ts->base_type = type;
2122             ts->type = type;
2123             ts->kind = TEMP_CONST;
2124             ts->temp_allocated = 1;
2125             ts->val = val;
2126             val_ptr = &ts->val;
2127         }
2128         g_hash_table_insert(h, val_ptr, ts);
2129     }
2130 
2131     return ts;
2132 }
2133 
2134 TCGv_i32 tcg_constant_i32(int32_t val)
2135 {
2136     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2137 }
2138 
2139 TCGv_i64 tcg_constant_i64(int64_t val)
2140 {
2141     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2142 }
2143 
2144 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2145 {
2146     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2147 }
2148 
2149 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2150 {
2151     val = dup_const(vece, val);
2152     return temp_tcgv_vec(tcg_constant_internal(type, val));
2153 }
2154 
2155 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2156 {
2157     TCGTemp *t = tcgv_vec_temp(match);
2158 
2159     tcg_debug_assert(t->temp_allocated != 0);
2160     return tcg_constant_vec(t->base_type, vece, val);
2161 }
2162 
2163 #ifdef CONFIG_DEBUG_TCG
2164 size_t temp_idx(TCGTemp *ts)
2165 {
2166     ptrdiff_t n = ts - tcg_ctx->temps;
2167     assert(n >= 0 && n < tcg_ctx->nb_temps);
2168     return n;
2169 }
2170 
2171 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2172 {
2173     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2174 
2175     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2176     assert(o % sizeof(TCGTemp) == 0);
2177 
2178     return (void *)tcg_ctx + (uintptr_t)v;
2179 }
2180 #endif /* CONFIG_DEBUG_TCG */
2181 
2182 /*
2183  * Return true if OP may appear in the opcode stream with TYPE.
2184  * Test the runtime variable that controls each opcode.
2185  */
2186 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2187 {
2188     bool has_type;
2189 
2190     switch (type) {
2191     case TCG_TYPE_I32:
2192         has_type = true;
2193         break;
2194     case TCG_TYPE_I64:
2195         has_type = TCG_TARGET_REG_BITS == 64;
2196         break;
2197     case TCG_TYPE_V64:
2198         has_type = TCG_TARGET_HAS_v64;
2199         break;
2200     case TCG_TYPE_V128:
2201         has_type = TCG_TARGET_HAS_v128;
2202         break;
2203     case TCG_TYPE_V256:
2204         has_type = TCG_TARGET_HAS_v256;
2205         break;
2206     default:
2207         has_type = false;
2208         break;
2209     }
2210 
2211     switch (op) {
2212     case INDEX_op_discard:
2213     case INDEX_op_set_label:
2214     case INDEX_op_call:
2215     case INDEX_op_br:
2216     case INDEX_op_mb:
2217     case INDEX_op_insn_start:
2218     case INDEX_op_exit_tb:
2219     case INDEX_op_goto_tb:
2220     case INDEX_op_goto_ptr:
2221     case INDEX_op_qemu_ld_i32:
2222     case INDEX_op_qemu_st_i32:
2223     case INDEX_op_qemu_ld_i64:
2224     case INDEX_op_qemu_st_i64:
2225         return true;
2226 
2227     case INDEX_op_qemu_st8_i32:
2228         return TCG_TARGET_HAS_qemu_st8_i32;
2229 
2230     case INDEX_op_qemu_ld_i128:
2231     case INDEX_op_qemu_st_i128:
2232         return TCG_TARGET_HAS_qemu_ldst_i128;
2233 
2234     case INDEX_op_add:
2235     case INDEX_op_and:
2236     case INDEX_op_mov:
2237     case INDEX_op_or:
2238     case INDEX_op_xor:
2239         return has_type;
2240 
2241     case INDEX_op_setcond_i32:
2242     case INDEX_op_brcond_i32:
2243     case INDEX_op_movcond_i32:
2244     case INDEX_op_ld8u_i32:
2245     case INDEX_op_ld8s_i32:
2246     case INDEX_op_ld16u_i32:
2247     case INDEX_op_ld16s_i32:
2248     case INDEX_op_ld_i32:
2249     case INDEX_op_st8_i32:
2250     case INDEX_op_st16_i32:
2251     case INDEX_op_st_i32:
2252     case INDEX_op_shl_i32:
2253     case INDEX_op_shr_i32:
2254     case INDEX_op_sar_i32:
2255     case INDEX_op_extract_i32:
2256     case INDEX_op_sextract_i32:
2257     case INDEX_op_deposit_i32:
2258         return true;
2259 
2260     case INDEX_op_negsetcond_i32:
2261         return TCG_TARGET_HAS_negsetcond_i32;
2262     case INDEX_op_div_i32:
2263     case INDEX_op_divu_i32:
2264         return TCG_TARGET_HAS_div_i32;
2265     case INDEX_op_rem_i32:
2266     case INDEX_op_remu_i32:
2267         return TCG_TARGET_HAS_rem_i32;
2268     case INDEX_op_div2_i32:
2269     case INDEX_op_divu2_i32:
2270         return TCG_TARGET_HAS_div2_i32;
2271     case INDEX_op_rotl_i32:
2272     case INDEX_op_rotr_i32:
2273         return TCG_TARGET_HAS_rot_i32;
2274     case INDEX_op_extract2_i32:
2275         return TCG_TARGET_HAS_extract2_i32;
2276     case INDEX_op_add2_i32:
2277         return TCG_TARGET_HAS_add2_i32;
2278     case INDEX_op_sub2_i32:
2279         return TCG_TARGET_HAS_sub2_i32;
2280     case INDEX_op_mulu2_i32:
2281         return TCG_TARGET_HAS_mulu2_i32;
2282     case INDEX_op_muls2_i32:
2283         return TCG_TARGET_HAS_muls2_i32;
2284     case INDEX_op_mulsh_i32:
2285         return TCG_TARGET_HAS_mulsh_i32;
2286     case INDEX_op_bswap16_i32:
2287         return TCG_TARGET_HAS_bswap16_i32;
2288     case INDEX_op_bswap32_i32:
2289         return TCG_TARGET_HAS_bswap32_i32;
2290     case INDEX_op_clz_i32:
2291         return TCG_TARGET_HAS_clz_i32;
2292     case INDEX_op_ctz_i32:
2293         return TCG_TARGET_HAS_ctz_i32;
2294     case INDEX_op_ctpop_i32:
2295         return TCG_TARGET_HAS_ctpop_i32;
2296 
2297     case INDEX_op_brcond2_i32:
2298     case INDEX_op_setcond2_i32:
2299         return TCG_TARGET_REG_BITS == 32;
2300 
2301     case INDEX_op_setcond_i64:
2302     case INDEX_op_brcond_i64:
2303     case INDEX_op_movcond_i64:
2304     case INDEX_op_ld8u_i64:
2305     case INDEX_op_ld8s_i64:
2306     case INDEX_op_ld16u_i64:
2307     case INDEX_op_ld16s_i64:
2308     case INDEX_op_ld32u_i64:
2309     case INDEX_op_ld32s_i64:
2310     case INDEX_op_ld_i64:
2311     case INDEX_op_st8_i64:
2312     case INDEX_op_st16_i64:
2313     case INDEX_op_st32_i64:
2314     case INDEX_op_st_i64:
2315     case INDEX_op_shl_i64:
2316     case INDEX_op_shr_i64:
2317     case INDEX_op_sar_i64:
2318     case INDEX_op_ext_i32_i64:
2319     case INDEX_op_extu_i32_i64:
2320     case INDEX_op_extract_i64:
2321     case INDEX_op_sextract_i64:
2322     case INDEX_op_deposit_i64:
2323         return TCG_TARGET_REG_BITS == 64;
2324 
2325     case INDEX_op_negsetcond_i64:
2326         return TCG_TARGET_HAS_negsetcond_i64;
2327     case INDEX_op_div_i64:
2328     case INDEX_op_divu_i64:
2329         return TCG_TARGET_HAS_div_i64;
2330     case INDEX_op_rem_i64:
2331     case INDEX_op_remu_i64:
2332         return TCG_TARGET_HAS_rem_i64;
2333     case INDEX_op_div2_i64:
2334     case INDEX_op_divu2_i64:
2335         return TCG_TARGET_HAS_div2_i64;
2336     case INDEX_op_rotl_i64:
2337     case INDEX_op_rotr_i64:
2338         return TCG_TARGET_HAS_rot_i64;
2339     case INDEX_op_extract2_i64:
2340         return TCG_TARGET_HAS_extract2_i64;
2341     case INDEX_op_extrl_i64_i32:
2342     case INDEX_op_extrh_i64_i32:
2343         return TCG_TARGET_HAS_extr_i64_i32;
2344     case INDEX_op_bswap16_i64:
2345         return TCG_TARGET_HAS_bswap16_i64;
2346     case INDEX_op_bswap32_i64:
2347         return TCG_TARGET_HAS_bswap32_i64;
2348     case INDEX_op_bswap64_i64:
2349         return TCG_TARGET_HAS_bswap64_i64;
2350     case INDEX_op_clz_i64:
2351         return TCG_TARGET_HAS_clz_i64;
2352     case INDEX_op_ctz_i64:
2353         return TCG_TARGET_HAS_ctz_i64;
2354     case INDEX_op_ctpop_i64:
2355         return TCG_TARGET_HAS_ctpop_i64;
2356     case INDEX_op_add2_i64:
2357         return TCG_TARGET_HAS_add2_i64;
2358     case INDEX_op_sub2_i64:
2359         return TCG_TARGET_HAS_sub2_i64;
2360     case INDEX_op_mulu2_i64:
2361         return TCG_TARGET_HAS_mulu2_i64;
2362     case INDEX_op_muls2_i64:
2363         return TCG_TARGET_HAS_muls2_i64;
2364     case INDEX_op_mulsh_i64:
2365         return TCG_TARGET_HAS_mulsh_i64;
2366 
2367     case INDEX_op_mov_vec:
2368     case INDEX_op_dup_vec:
2369     case INDEX_op_dupm_vec:
2370     case INDEX_op_ld_vec:
2371     case INDEX_op_st_vec:
2372     case INDEX_op_add_vec:
2373     case INDEX_op_sub_vec:
2374     case INDEX_op_and_vec:
2375     case INDEX_op_or_vec:
2376     case INDEX_op_xor_vec:
2377     case INDEX_op_cmp_vec:
2378         return has_type;
2379     case INDEX_op_dup2_vec:
2380         return has_type && TCG_TARGET_REG_BITS == 32;
2381     case INDEX_op_not_vec:
2382         return has_type && TCG_TARGET_HAS_not_vec;
2383     case INDEX_op_neg_vec:
2384         return has_type && TCG_TARGET_HAS_neg_vec;
2385     case INDEX_op_abs_vec:
2386         return has_type && TCG_TARGET_HAS_abs_vec;
2387     case INDEX_op_andc_vec:
2388         return has_type && TCG_TARGET_HAS_andc_vec;
2389     case INDEX_op_orc_vec:
2390         return has_type && TCG_TARGET_HAS_orc_vec;
2391     case INDEX_op_nand_vec:
2392         return has_type && TCG_TARGET_HAS_nand_vec;
2393     case INDEX_op_nor_vec:
2394         return has_type && TCG_TARGET_HAS_nor_vec;
2395     case INDEX_op_eqv_vec:
2396         return has_type && TCG_TARGET_HAS_eqv_vec;
2397     case INDEX_op_mul_vec:
2398         return has_type && TCG_TARGET_HAS_mul_vec;
2399     case INDEX_op_shli_vec:
2400     case INDEX_op_shri_vec:
2401     case INDEX_op_sari_vec:
2402         return has_type && TCG_TARGET_HAS_shi_vec;
2403     case INDEX_op_shls_vec:
2404     case INDEX_op_shrs_vec:
2405     case INDEX_op_sars_vec:
2406         return has_type && TCG_TARGET_HAS_shs_vec;
2407     case INDEX_op_shlv_vec:
2408     case INDEX_op_shrv_vec:
2409     case INDEX_op_sarv_vec:
2410         return has_type && TCG_TARGET_HAS_shv_vec;
2411     case INDEX_op_rotli_vec:
2412         return has_type && TCG_TARGET_HAS_roti_vec;
2413     case INDEX_op_rotls_vec:
2414         return has_type && TCG_TARGET_HAS_rots_vec;
2415     case INDEX_op_rotlv_vec:
2416     case INDEX_op_rotrv_vec:
2417         return has_type && TCG_TARGET_HAS_rotv_vec;
2418     case INDEX_op_ssadd_vec:
2419     case INDEX_op_usadd_vec:
2420     case INDEX_op_sssub_vec:
2421     case INDEX_op_ussub_vec:
2422         return has_type && TCG_TARGET_HAS_sat_vec;
2423     case INDEX_op_smin_vec:
2424     case INDEX_op_umin_vec:
2425     case INDEX_op_smax_vec:
2426     case INDEX_op_umax_vec:
2427         return has_type && TCG_TARGET_HAS_minmax_vec;
2428     case INDEX_op_bitsel_vec:
2429         return has_type && TCG_TARGET_HAS_bitsel_vec;
2430     case INDEX_op_cmpsel_vec:
2431         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2432 
2433     default:
2434         if (op < INDEX_op_last_generic) {
2435             const TCGOutOp *outop;
2436             TCGConstraintSetIndex con_set;
2437 
2438             if (!has_type) {
2439                 return false;
2440             }
2441 
2442             outop = all_outop[op];
2443             tcg_debug_assert(outop != NULL);
2444 
2445             con_set = outop->static_constraint;
2446             if (con_set == C_Dynamic) {
2447                 con_set = outop->dynamic_constraint(type, flags);
2448             }
2449             if (con_set >= 0) {
2450                 return true;
2451             }
2452             tcg_debug_assert(con_set == C_NotImplemented);
2453             return false;
2454         }
2455         tcg_debug_assert(op < NB_OPS);
2456         return true;
2457 
2458     case INDEX_op_last_generic:
2459         g_assert_not_reached();
2460     }
2461 }
2462 
2463 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2464 {
2465     unsigned width;
2466 
2467     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2468     width = (type == TCG_TYPE_I32 ? 32 : 64);
2469 
2470     tcg_debug_assert(ofs < width);
2471     tcg_debug_assert(len > 0);
2472     tcg_debug_assert(len <= width - ofs);
2473 
2474     return TCG_TARGET_deposit_valid(type, ofs, len);
2475 }
2476 
2477 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2478 
2479 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2480                           TCGTemp *ret, TCGTemp **args)
2481 {
2482     TCGv_i64 extend_free[MAX_CALL_IARGS];
2483     int n_extend = 0;
2484     TCGOp *op;
2485     int i, n, pi = 0, total_args;
2486 
2487     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2488         init_call_layout(info);
2489         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2490     }
2491 
2492     total_args = info->nr_out + info->nr_in + 2;
2493     op = tcg_op_alloc(INDEX_op_call, total_args);
2494 
2495 #ifdef CONFIG_PLUGIN
2496     /* Flag helpers that may affect guest state */
2497     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2498         tcg_ctx->plugin_insn->calls_helpers = true;
2499     }
2500 #endif
2501 
2502     TCGOP_CALLO(op) = n = info->nr_out;
2503     switch (n) {
2504     case 0:
2505         tcg_debug_assert(ret == NULL);
2506         break;
2507     case 1:
2508         tcg_debug_assert(ret != NULL);
2509         op->args[pi++] = temp_arg(ret);
2510         break;
2511     case 2:
2512     case 4:
2513         tcg_debug_assert(ret != NULL);
2514         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2515         tcg_debug_assert(ret->temp_subindex == 0);
2516         for (i = 0; i < n; ++i) {
2517             op->args[pi++] = temp_arg(ret + i);
2518         }
2519         break;
2520     default:
2521         g_assert_not_reached();
2522     }
2523 
2524     TCGOP_CALLI(op) = n = info->nr_in;
2525     for (i = 0; i < n; i++) {
2526         const TCGCallArgumentLoc *loc = &info->in[i];
2527         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2528 
2529         switch (loc->kind) {
2530         case TCG_CALL_ARG_NORMAL:
2531         case TCG_CALL_ARG_BY_REF:
2532         case TCG_CALL_ARG_BY_REF_N:
2533             op->args[pi++] = temp_arg(ts);
2534             break;
2535 
2536         case TCG_CALL_ARG_EXTEND_U:
2537         case TCG_CALL_ARG_EXTEND_S:
2538             {
2539                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2540                 TCGv_i32 orig = temp_tcgv_i32(ts);
2541 
2542                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2543                     tcg_gen_ext_i32_i64(temp, orig);
2544                 } else {
2545                     tcg_gen_extu_i32_i64(temp, orig);
2546                 }
2547                 op->args[pi++] = tcgv_i64_arg(temp);
2548                 extend_free[n_extend++] = temp;
2549             }
2550             break;
2551 
2552         default:
2553             g_assert_not_reached();
2554         }
2555     }
2556     op->args[pi++] = (uintptr_t)func;
2557     op->args[pi++] = (uintptr_t)info;
2558     tcg_debug_assert(pi == total_args);
2559 
2560     if (tcg_ctx->emit_before_op) {
2561         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2562     } else {
2563         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2564     }
2565 
2566     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2567     for (i = 0; i < n_extend; ++i) {
2568         tcg_temp_free_i64(extend_free[i]);
2569     }
2570 }
2571 
2572 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2573 {
2574     tcg_gen_callN(func, info, ret, NULL);
2575 }
2576 
2577 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2578 {
2579     tcg_gen_callN(func, info, ret, &t1);
2580 }
2581 
2582 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2583                    TCGTemp *t1, TCGTemp *t2)
2584 {
2585     TCGTemp *args[2] = { t1, t2 };
2586     tcg_gen_callN(func, info, ret, args);
2587 }
2588 
2589 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2590                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2591 {
2592     TCGTemp *args[3] = { t1, t2, t3 };
2593     tcg_gen_callN(func, info, ret, args);
2594 }
2595 
2596 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2597                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2598 {
2599     TCGTemp *args[4] = { t1, t2, t3, t4 };
2600     tcg_gen_callN(func, info, ret, args);
2601 }
2602 
2603 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2604                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2605 {
2606     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2607     tcg_gen_callN(func, info, ret, args);
2608 }
2609 
2610 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2611                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2612                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2613 {
2614     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2615     tcg_gen_callN(func, info, ret, args);
2616 }
2617 
2618 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2619                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2620                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2621 {
2622     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2623     tcg_gen_callN(func, info, ret, args);
2624 }
2625 
2626 static void tcg_reg_alloc_start(TCGContext *s)
2627 {
2628     int i, n;
2629 
2630     for (i = 0, n = s->nb_temps; i < n; i++) {
2631         TCGTemp *ts = &s->temps[i];
2632         TCGTempVal val = TEMP_VAL_MEM;
2633 
2634         switch (ts->kind) {
2635         case TEMP_CONST:
2636             val = TEMP_VAL_CONST;
2637             break;
2638         case TEMP_FIXED:
2639             val = TEMP_VAL_REG;
2640             break;
2641         case TEMP_GLOBAL:
2642             break;
2643         case TEMP_EBB:
2644             val = TEMP_VAL_DEAD;
2645             /* fall through */
2646         case TEMP_TB:
2647             ts->mem_allocated = 0;
2648             break;
2649         default:
2650             g_assert_not_reached();
2651         }
2652         ts->val_type = val;
2653     }
2654 
2655     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2656 }
2657 
2658 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2659                                  TCGTemp *ts)
2660 {
2661     int idx = temp_idx(ts);
2662 
2663     switch (ts->kind) {
2664     case TEMP_FIXED:
2665     case TEMP_GLOBAL:
2666         pstrcpy(buf, buf_size, ts->name);
2667         break;
2668     case TEMP_TB:
2669         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2670         break;
2671     case TEMP_EBB:
2672         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2673         break;
2674     case TEMP_CONST:
2675         switch (ts->type) {
2676         case TCG_TYPE_I32:
2677             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2678             break;
2679 #if TCG_TARGET_REG_BITS > 32
2680         case TCG_TYPE_I64:
2681             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2682             break;
2683 #endif
2684         case TCG_TYPE_V64:
2685         case TCG_TYPE_V128:
2686         case TCG_TYPE_V256:
2687             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2688                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2689             break;
2690         default:
2691             g_assert_not_reached();
2692         }
2693         break;
2694     }
2695     return buf;
2696 }
2697 
2698 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2699                              int buf_size, TCGArg arg)
2700 {
2701     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2702 }
2703 
2704 static const char * const cond_name[] =
2705 {
2706     [TCG_COND_NEVER] = "never",
2707     [TCG_COND_ALWAYS] = "always",
2708     [TCG_COND_EQ] = "eq",
2709     [TCG_COND_NE] = "ne",
2710     [TCG_COND_LT] = "lt",
2711     [TCG_COND_GE] = "ge",
2712     [TCG_COND_LE] = "le",
2713     [TCG_COND_GT] = "gt",
2714     [TCG_COND_LTU] = "ltu",
2715     [TCG_COND_GEU] = "geu",
2716     [TCG_COND_LEU] = "leu",
2717     [TCG_COND_GTU] = "gtu",
2718     [TCG_COND_TSTEQ] = "tsteq",
2719     [TCG_COND_TSTNE] = "tstne",
2720 };
2721 
2722 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2723 {
2724     [MO_UB]   = "ub",
2725     [MO_SB]   = "sb",
2726     [MO_LEUW] = "leuw",
2727     [MO_LESW] = "lesw",
2728     [MO_LEUL] = "leul",
2729     [MO_LESL] = "lesl",
2730     [MO_LEUQ] = "leq",
2731     [MO_BEUW] = "beuw",
2732     [MO_BESW] = "besw",
2733     [MO_BEUL] = "beul",
2734     [MO_BESL] = "besl",
2735     [MO_BEUQ] = "beq",
2736     [MO_128 + MO_BE] = "beo",
2737     [MO_128 + MO_LE] = "leo",
2738 };
2739 
2740 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2741     [MO_UNALN >> MO_ASHIFT]    = "un+",
2742     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2743     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2744     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2745     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2746     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2747     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2748     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2749 };
2750 
2751 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2752     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2753     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2754     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2755     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2756     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2757     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2758 };
2759 
2760 static const char bswap_flag_name[][6] = {
2761     [TCG_BSWAP_IZ] = "iz",
2762     [TCG_BSWAP_OZ] = "oz",
2763     [TCG_BSWAP_OS] = "os",
2764     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2765     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2766 };
2767 
2768 #ifdef CONFIG_PLUGIN
2769 static const char * const plugin_from_name[] = {
2770     "from-tb",
2771     "from-insn",
2772     "after-insn",
2773     "after-tb",
2774 };
2775 #endif
2776 
2777 static inline bool tcg_regset_single(TCGRegSet d)
2778 {
2779     return (d & (d - 1)) == 0;
2780 }
2781 
2782 static inline TCGReg tcg_regset_first(TCGRegSet d)
2783 {
2784     if (TCG_TARGET_NB_REGS <= 32) {
2785         return ctz32(d);
2786     } else {
2787         return ctz64(d);
2788     }
2789 }
2790 
2791 /* Return only the number of characters output -- no error return. */
2792 #define ne_fprintf(...) \
2793     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2794 
2795 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2796 {
2797     char buf[128];
2798     TCGOp *op;
2799 
2800     QTAILQ_FOREACH(op, &s->ops, link) {
2801         int i, k, nb_oargs, nb_iargs, nb_cargs;
2802         const TCGOpDef *def;
2803         TCGOpcode c;
2804         int col = 0;
2805 
2806         c = op->opc;
2807         def = &tcg_op_defs[c];
2808 
2809         if (c == INDEX_op_insn_start) {
2810             nb_oargs = 0;
2811             col += ne_fprintf(f, "\n ----");
2812 
2813             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2814                 col += ne_fprintf(f, " %016" PRIx64,
2815                                   tcg_get_insn_start_param(op, i));
2816             }
2817         } else if (c == INDEX_op_call) {
2818             const TCGHelperInfo *info = tcg_call_info(op);
2819             void *func = tcg_call_func(op);
2820 
2821             /* variable number of arguments */
2822             nb_oargs = TCGOP_CALLO(op);
2823             nb_iargs = TCGOP_CALLI(op);
2824             nb_cargs = def->nb_cargs;
2825 
2826             col += ne_fprintf(f, " %s ", def->name);
2827 
2828             /*
2829              * Print the function name from TCGHelperInfo, if available.
2830              * Note that plugins have a template function for the info,
2831              * but the actual function pointer comes from the plugin.
2832              */
2833             if (func == info->func) {
2834                 col += ne_fprintf(f, "%s", info->name);
2835             } else {
2836                 col += ne_fprintf(f, "plugin(%p)", func);
2837             }
2838 
2839             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2840             for (i = 0; i < nb_oargs; i++) {
2841                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2842                                                             op->args[i]));
2843             }
2844             for (i = 0; i < nb_iargs; i++) {
2845                 TCGArg arg = op->args[nb_oargs + i];
2846                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2847                 col += ne_fprintf(f, ",%s", t);
2848             }
2849         } else {
2850             if (def->flags & TCG_OPF_INT) {
2851                 col += ne_fprintf(f, " %s_i%d ",
2852                                   def->name,
2853                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2854             } else if (def->flags & TCG_OPF_VECTOR) {
2855                 col += ne_fprintf(f, "%s v%d,e%d,",
2856                                   def->name,
2857                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2858                                   8 << TCGOP_VECE(op));
2859             } else {
2860                 col += ne_fprintf(f, " %s ", def->name);
2861             }
2862 
2863             nb_oargs = def->nb_oargs;
2864             nb_iargs = def->nb_iargs;
2865             nb_cargs = def->nb_cargs;
2866 
2867             k = 0;
2868             for (i = 0; i < nb_oargs; i++) {
2869                 const char *sep =  k ? "," : "";
2870                 col += ne_fprintf(f, "%s%s", sep,
2871                                   tcg_get_arg_str(s, buf, sizeof(buf),
2872                                                   op->args[k++]));
2873             }
2874             for (i = 0; i < nb_iargs; i++) {
2875                 const char *sep =  k ? "," : "";
2876                 col += ne_fprintf(f, "%s%s", sep,
2877                                   tcg_get_arg_str(s, buf, sizeof(buf),
2878                                                   op->args[k++]));
2879             }
2880             switch (c) {
2881             case INDEX_op_brcond_i32:
2882             case INDEX_op_setcond_i32:
2883             case INDEX_op_negsetcond_i32:
2884             case INDEX_op_movcond_i32:
2885             case INDEX_op_brcond2_i32:
2886             case INDEX_op_setcond2_i32:
2887             case INDEX_op_brcond_i64:
2888             case INDEX_op_setcond_i64:
2889             case INDEX_op_negsetcond_i64:
2890             case INDEX_op_movcond_i64:
2891             case INDEX_op_cmp_vec:
2892             case INDEX_op_cmpsel_vec:
2893                 if (op->args[k] < ARRAY_SIZE(cond_name)
2894                     && cond_name[op->args[k]]) {
2895                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2896                 } else {
2897                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2898                 }
2899                 i = 1;
2900                 break;
2901             case INDEX_op_qemu_ld_i32:
2902             case INDEX_op_qemu_st_i32:
2903             case INDEX_op_qemu_st8_i32:
2904             case INDEX_op_qemu_ld_i64:
2905             case INDEX_op_qemu_st_i64:
2906             case INDEX_op_qemu_ld_i128:
2907             case INDEX_op_qemu_st_i128:
2908                 {
2909                     const char *s_al, *s_op, *s_at;
2910                     MemOpIdx oi = op->args[k++];
2911                     MemOp mop = get_memop(oi);
2912                     unsigned ix = get_mmuidx(oi);
2913 
2914                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2915                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2916                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2917                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2918 
2919                     /* If all fields are accounted for, print symbolically. */
2920                     if (!mop && s_al && s_op && s_at) {
2921                         col += ne_fprintf(f, ",%s%s%s,%u",
2922                                           s_at, s_al, s_op, ix);
2923                     } else {
2924                         mop = get_memop(oi);
2925                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2926                     }
2927                     i = 1;
2928                 }
2929                 break;
2930             case INDEX_op_bswap16_i32:
2931             case INDEX_op_bswap16_i64:
2932             case INDEX_op_bswap32_i32:
2933             case INDEX_op_bswap32_i64:
2934             case INDEX_op_bswap64_i64:
2935                 {
2936                     TCGArg flags = op->args[k];
2937                     const char *name = NULL;
2938 
2939                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2940                         name = bswap_flag_name[flags];
2941                     }
2942                     if (name) {
2943                         col += ne_fprintf(f, ",%s", name);
2944                     } else {
2945                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2946                     }
2947                     i = k = 1;
2948                 }
2949                 break;
2950 #ifdef CONFIG_PLUGIN
2951             case INDEX_op_plugin_cb:
2952                 {
2953                     TCGArg from = op->args[k++];
2954                     const char *name = NULL;
2955 
2956                     if (from < ARRAY_SIZE(plugin_from_name)) {
2957                         name = plugin_from_name[from];
2958                     }
2959                     if (name) {
2960                         col += ne_fprintf(f, "%s", name);
2961                     } else {
2962                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2963                     }
2964                     i = 1;
2965                 }
2966                 break;
2967 #endif
2968             default:
2969                 i = 0;
2970                 break;
2971             }
2972             switch (c) {
2973             case INDEX_op_set_label:
2974             case INDEX_op_br:
2975             case INDEX_op_brcond_i32:
2976             case INDEX_op_brcond_i64:
2977             case INDEX_op_brcond2_i32:
2978                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2979                                   arg_label(op->args[k])->id);
2980                 i++, k++;
2981                 break;
2982             case INDEX_op_mb:
2983                 {
2984                     TCGBar membar = op->args[k];
2985                     const char *b_op, *m_op;
2986 
2987                     switch (membar & TCG_BAR_SC) {
2988                     case 0:
2989                         b_op = "none";
2990                         break;
2991                     case TCG_BAR_LDAQ:
2992                         b_op = "acq";
2993                         break;
2994                     case TCG_BAR_STRL:
2995                         b_op = "rel";
2996                         break;
2997                     case TCG_BAR_SC:
2998                         b_op = "seq";
2999                         break;
3000                     default:
3001                         g_assert_not_reached();
3002                     }
3003 
3004                     switch (membar & TCG_MO_ALL) {
3005                     case 0:
3006                         m_op = "none";
3007                         break;
3008                     case TCG_MO_LD_LD:
3009                         m_op = "rr";
3010                         break;
3011                     case TCG_MO_LD_ST:
3012                         m_op = "rw";
3013                         break;
3014                     case TCG_MO_ST_LD:
3015                         m_op = "wr";
3016                         break;
3017                     case TCG_MO_ST_ST:
3018                         m_op = "ww";
3019                         break;
3020                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3021                         m_op = "rr+rw";
3022                         break;
3023                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3024                         m_op = "rr+wr";
3025                         break;
3026                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3027                         m_op = "rr+ww";
3028                         break;
3029                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3030                         m_op = "rw+wr";
3031                         break;
3032                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3033                         m_op = "rw+ww";
3034                         break;
3035                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3036                         m_op = "wr+ww";
3037                         break;
3038                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3039                         m_op = "rr+rw+wr";
3040                         break;
3041                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3042                         m_op = "rr+rw+ww";
3043                         break;
3044                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3045                         m_op = "rr+wr+ww";
3046                         break;
3047                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3048                         m_op = "rw+wr+ww";
3049                         break;
3050                     case TCG_MO_ALL:
3051                         m_op = "all";
3052                         break;
3053                     default:
3054                         g_assert_not_reached();
3055                     }
3056 
3057                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3058                     i++, k++;
3059                 }
3060                 break;
3061             default:
3062                 break;
3063             }
3064             for (; i < nb_cargs; i++, k++) {
3065                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3066                                   op->args[k]);
3067             }
3068         }
3069 
3070         if (have_prefs || op->life) {
3071             for (; col < 40; ++col) {
3072                 putc(' ', f);
3073             }
3074         }
3075 
3076         if (op->life) {
3077             unsigned life = op->life;
3078 
3079             if (life & (SYNC_ARG * 3)) {
3080                 ne_fprintf(f, "  sync:");
3081                 for (i = 0; i < 2; ++i) {
3082                     if (life & (SYNC_ARG << i)) {
3083                         ne_fprintf(f, " %d", i);
3084                     }
3085                 }
3086             }
3087             life /= DEAD_ARG;
3088             if (life) {
3089                 ne_fprintf(f, "  dead:");
3090                 for (i = 0; life; ++i, life >>= 1) {
3091                     if (life & 1) {
3092                         ne_fprintf(f, " %d", i);
3093                     }
3094                 }
3095             }
3096         }
3097 
3098         if (have_prefs) {
3099             for (i = 0; i < nb_oargs; ++i) {
3100                 TCGRegSet set = output_pref(op, i);
3101 
3102                 if (i == 0) {
3103                     ne_fprintf(f, "  pref=");
3104                 } else {
3105                     ne_fprintf(f, ",");
3106                 }
3107                 if (set == 0) {
3108                     ne_fprintf(f, "none");
3109                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3110                     ne_fprintf(f, "all");
3111 #ifdef CONFIG_DEBUG_TCG
3112                 } else if (tcg_regset_single(set)) {
3113                     TCGReg reg = tcg_regset_first(set);
3114                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3115 #endif
3116                 } else if (TCG_TARGET_NB_REGS <= 32) {
3117                     ne_fprintf(f, "0x%x", (uint32_t)set);
3118                 } else {
3119                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3120                 }
3121             }
3122         }
3123 
3124         putc('\n', f);
3125     }
3126 }
3127 
3128 /* we give more priority to constraints with less registers */
3129 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3130 {
3131     int n;
3132 
3133     arg_ct += k;
3134     n = ctpop64(arg_ct->regs);
3135 
3136     /*
3137      * Sort constraints of a single register first, which includes output
3138      * aliases (which must exactly match the input already allocated).
3139      */
3140     if (n == 1 || arg_ct->oalias) {
3141         return INT_MAX;
3142     }
3143 
3144     /*
3145      * Sort register pairs next, first then second immediately after.
3146      * Arbitrarily sort multiple pairs by the index of the first reg;
3147      * there shouldn't be many pairs.
3148      */
3149     switch (arg_ct->pair) {
3150     case 1:
3151     case 3:
3152         return (k + 1) * 2;
3153     case 2:
3154         return (arg_ct->pair_index + 1) * 2 - 1;
3155     }
3156 
3157     /* Finally, sort by decreasing register count. */
3158     assert(n > 1);
3159     return -n;
3160 }
3161 
3162 /* sort from highest priority to lowest */
3163 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3164 {
3165     int i, j;
3166 
3167     for (i = 0; i < n; i++) {
3168         a[start + i].sort_index = start + i;
3169     }
3170     if (n <= 1) {
3171         return;
3172     }
3173     for (i = 0; i < n - 1; i++) {
3174         for (j = i + 1; j < n; j++) {
3175             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3176             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3177             if (p1 < p2) {
3178                 int tmp = a[start + i].sort_index;
3179                 a[start + i].sort_index = a[start + j].sort_index;
3180                 a[start + j].sort_index = tmp;
3181             }
3182         }
3183     }
3184 }
3185 
3186 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3187 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3188 
3189 static void process_constraint_sets(void)
3190 {
3191     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3192         const TCGConstraintSet *tdefs = &constraint_sets[c];
3193         TCGArgConstraint *args_ct = all_cts[c];
3194         int nb_oargs = tdefs->nb_oargs;
3195         int nb_iargs = tdefs->nb_iargs;
3196         int nb_args = nb_oargs + nb_iargs;
3197         bool saw_alias_pair = false;
3198 
3199         for (int i = 0; i < nb_args; i++) {
3200             const char *ct_str = tdefs->args_ct_str[i];
3201             bool input_p = i >= nb_oargs;
3202             int o;
3203 
3204             switch (*ct_str) {
3205             case '0' ... '9':
3206                 o = *ct_str - '0';
3207                 tcg_debug_assert(input_p);
3208                 tcg_debug_assert(o < nb_oargs);
3209                 tcg_debug_assert(args_ct[o].regs != 0);
3210                 tcg_debug_assert(!args_ct[o].oalias);
3211                 args_ct[i] = args_ct[o];
3212                 /* The output sets oalias.  */
3213                 args_ct[o].oalias = 1;
3214                 args_ct[o].alias_index = i;
3215                 /* The input sets ialias. */
3216                 args_ct[i].ialias = 1;
3217                 args_ct[i].alias_index = o;
3218                 if (args_ct[i].pair) {
3219                     saw_alias_pair = true;
3220                 }
3221                 tcg_debug_assert(ct_str[1] == '\0');
3222                 continue;
3223 
3224             case '&':
3225                 tcg_debug_assert(!input_p);
3226                 args_ct[i].newreg = true;
3227                 ct_str++;
3228                 break;
3229 
3230             case 'p': /* plus */
3231                 /* Allocate to the register after the previous. */
3232                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3233                 o = i - 1;
3234                 tcg_debug_assert(!args_ct[o].pair);
3235                 tcg_debug_assert(!args_ct[o].ct);
3236                 args_ct[i] = (TCGArgConstraint){
3237                     .pair = 2,
3238                     .pair_index = o,
3239                     .regs = args_ct[o].regs << 1,
3240                     .newreg = args_ct[o].newreg,
3241                 };
3242                 args_ct[o].pair = 1;
3243                 args_ct[o].pair_index = i;
3244                 tcg_debug_assert(ct_str[1] == '\0');
3245                 continue;
3246 
3247             case 'm': /* minus */
3248                 /* Allocate to the register before the previous. */
3249                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3250                 o = i - 1;
3251                 tcg_debug_assert(!args_ct[o].pair);
3252                 tcg_debug_assert(!args_ct[o].ct);
3253                 args_ct[i] = (TCGArgConstraint){
3254                     .pair = 1,
3255                     .pair_index = o,
3256                     .regs = args_ct[o].regs >> 1,
3257                     .newreg = args_ct[o].newreg,
3258                 };
3259                 args_ct[o].pair = 2;
3260                 args_ct[o].pair_index = i;
3261                 tcg_debug_assert(ct_str[1] == '\0');
3262                 continue;
3263             }
3264 
3265             do {
3266                 switch (*ct_str) {
3267                 case 'i':
3268                     args_ct[i].ct |= TCG_CT_CONST;
3269                     break;
3270 #ifdef TCG_REG_ZERO
3271                 case 'z':
3272                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3273                     break;
3274 #endif
3275 
3276                 /* Include all of the target-specific constraints. */
3277 
3278 #undef CONST
3279 #define CONST(CASE, MASK) \
3280     case CASE: args_ct[i].ct |= MASK; break;
3281 #define REGS(CASE, MASK) \
3282     case CASE: args_ct[i].regs |= MASK; break;
3283 
3284 #include "tcg-target-con-str.h"
3285 
3286 #undef REGS
3287 #undef CONST
3288                 default:
3289                 case '0' ... '9':
3290                 case '&':
3291                 case 'p':
3292                 case 'm':
3293                     /* Typo in TCGConstraintSet constraint. */
3294                     g_assert_not_reached();
3295                 }
3296             } while (*++ct_str != '\0');
3297         }
3298 
3299         /*
3300          * Fix up output pairs that are aliased with inputs.
3301          * When we created the alias, we copied pair from the output.
3302          * There are three cases:
3303          *    (1a) Pairs of inputs alias pairs of outputs.
3304          *    (1b) One input aliases the first of a pair of outputs.
3305          *    (2)  One input aliases the second of a pair of outputs.
3306          *
3307          * Case 1a is handled by making sure that the pair_index'es are
3308          * properly updated so that they appear the same as a pair of inputs.
3309          *
3310          * Case 1b is handled by setting the pair_index of the input to
3311          * itself, simply so it doesn't point to an unrelated argument.
3312          * Since we don't encounter the "second" during the input allocation
3313          * phase, nothing happens with the second half of the input pair.
3314          *
3315          * Case 2 is handled by setting the second input to pair=3, the
3316          * first output to pair=3, and the pair_index'es to match.
3317          */
3318         if (saw_alias_pair) {
3319             for (int i = nb_oargs; i < nb_args; i++) {
3320                 int o, o2, i2;
3321 
3322                 /*
3323                  * Since [0-9pm] must be alone in the constraint string,
3324                  * the only way they can both be set is if the pair comes
3325                  * from the output alias.
3326                  */
3327                 if (!args_ct[i].ialias) {
3328                     continue;
3329                 }
3330                 switch (args_ct[i].pair) {
3331                 case 0:
3332                     break;
3333                 case 1:
3334                     o = args_ct[i].alias_index;
3335                     o2 = args_ct[o].pair_index;
3336                     tcg_debug_assert(args_ct[o].pair == 1);
3337                     tcg_debug_assert(args_ct[o2].pair == 2);
3338                     if (args_ct[o2].oalias) {
3339                         /* Case 1a */
3340                         i2 = args_ct[o2].alias_index;
3341                         tcg_debug_assert(args_ct[i2].pair == 2);
3342                         args_ct[i2].pair_index = i;
3343                         args_ct[i].pair_index = i2;
3344                     } else {
3345                         /* Case 1b */
3346                         args_ct[i].pair_index = i;
3347                     }
3348                     break;
3349                 case 2:
3350                     o = args_ct[i].alias_index;
3351                     o2 = args_ct[o].pair_index;
3352                     tcg_debug_assert(args_ct[o].pair == 2);
3353                     tcg_debug_assert(args_ct[o2].pair == 1);
3354                     if (args_ct[o2].oalias) {
3355                         /* Case 1a */
3356                         i2 = args_ct[o2].alias_index;
3357                         tcg_debug_assert(args_ct[i2].pair == 1);
3358                         args_ct[i2].pair_index = i;
3359                         args_ct[i].pair_index = i2;
3360                     } else {
3361                         /* Case 2 */
3362                         args_ct[i].pair = 3;
3363                         args_ct[o2].pair = 3;
3364                         args_ct[i].pair_index = o2;
3365                         args_ct[o2].pair_index = i;
3366                     }
3367                     break;
3368                 default:
3369                     g_assert_not_reached();
3370                 }
3371             }
3372         }
3373 
3374         /* sort the constraints (XXX: this is just an heuristic) */
3375         sort_constraints(args_ct, 0, nb_oargs);
3376         sort_constraints(args_ct, nb_oargs, nb_iargs);
3377     }
3378 }
3379 
3380 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3381 {
3382     TCGOpcode opc = op->opc;
3383     TCGType type = TCGOP_TYPE(op);
3384     unsigned flags = TCGOP_FLAGS(op);
3385     const TCGOpDef *def = &tcg_op_defs[opc];
3386     const TCGOutOp *outop = all_outop[opc];
3387     TCGConstraintSetIndex con_set;
3388 
3389     if (def->flags & TCG_OPF_NOT_PRESENT) {
3390         return empty_cts;
3391     }
3392 
3393     if (outop) {
3394         con_set = outop->static_constraint;
3395         if (con_set == C_Dynamic) {
3396             con_set = outop->dynamic_constraint(type, flags);
3397         }
3398     } else {
3399         con_set = tcg_target_op_def(opc, type, flags);
3400     }
3401     tcg_debug_assert(con_set >= 0);
3402     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3403 
3404     /* The constraint arguments must match TCGOpcode arguments. */
3405     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3406     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3407 
3408     return all_cts[con_set];
3409 }
3410 
3411 static void remove_label_use(TCGOp *op, int idx)
3412 {
3413     TCGLabel *label = arg_label(op->args[idx]);
3414     TCGLabelUse *use;
3415 
3416     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3417         if (use->op == op) {
3418             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3419             return;
3420         }
3421     }
3422     g_assert_not_reached();
3423 }
3424 
3425 void tcg_op_remove(TCGContext *s, TCGOp *op)
3426 {
3427     switch (op->opc) {
3428     case INDEX_op_br:
3429         remove_label_use(op, 0);
3430         break;
3431     case INDEX_op_brcond_i32:
3432     case INDEX_op_brcond_i64:
3433         remove_label_use(op, 3);
3434         break;
3435     case INDEX_op_brcond2_i32:
3436         remove_label_use(op, 5);
3437         break;
3438     default:
3439         break;
3440     }
3441 
3442     QTAILQ_REMOVE(&s->ops, op, link);
3443     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3444     s->nb_ops--;
3445 }
3446 
3447 void tcg_remove_ops_after(TCGOp *op)
3448 {
3449     TCGContext *s = tcg_ctx;
3450 
3451     while (true) {
3452         TCGOp *last = tcg_last_op();
3453         if (last == op) {
3454             return;
3455         }
3456         tcg_op_remove(s, last);
3457     }
3458 }
3459 
3460 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3461 {
3462     TCGContext *s = tcg_ctx;
3463     TCGOp *op = NULL;
3464 
3465     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3466         QTAILQ_FOREACH(op, &s->free_ops, link) {
3467             if (nargs <= op->nargs) {
3468                 QTAILQ_REMOVE(&s->free_ops, op, link);
3469                 nargs = op->nargs;
3470                 goto found;
3471             }
3472         }
3473     }
3474 
3475     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3476     nargs = MAX(4, nargs);
3477     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3478 
3479  found:
3480     memset(op, 0, offsetof(TCGOp, link));
3481     op->opc = opc;
3482     op->nargs = nargs;
3483 
3484     /* Check for bitfield overflow. */
3485     tcg_debug_assert(op->nargs == nargs);
3486 
3487     s->nb_ops++;
3488     return op;
3489 }
3490 
3491 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3492 {
3493     TCGOp *op = tcg_op_alloc(opc, nargs);
3494 
3495     if (tcg_ctx->emit_before_op) {
3496         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3497     } else {
3498         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3499     }
3500     return op;
3501 }
3502 
3503 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3504                             TCGOpcode opc, TCGType type, unsigned nargs)
3505 {
3506     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3507 
3508     TCGOP_TYPE(new_op) = type;
3509     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3510     return new_op;
3511 }
3512 
3513 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3514                            TCGOpcode opc, TCGType type, unsigned nargs)
3515 {
3516     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3517 
3518     TCGOP_TYPE(new_op) = type;
3519     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3520     return new_op;
3521 }
3522 
3523 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3524 {
3525     TCGLabelUse *u;
3526 
3527     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3528         TCGOp *op = u->op;
3529         switch (op->opc) {
3530         case INDEX_op_br:
3531             op->args[0] = label_arg(to);
3532             break;
3533         case INDEX_op_brcond_i32:
3534         case INDEX_op_brcond_i64:
3535             op->args[3] = label_arg(to);
3536             break;
3537         case INDEX_op_brcond2_i32:
3538             op->args[5] = label_arg(to);
3539             break;
3540         default:
3541             g_assert_not_reached();
3542         }
3543     }
3544 
3545     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3546 }
3547 
3548 /* Reachable analysis : remove unreachable code.  */
3549 static void __attribute__((noinline))
3550 reachable_code_pass(TCGContext *s)
3551 {
3552     TCGOp *op, *op_next, *op_prev;
3553     bool dead = false;
3554 
3555     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3556         bool remove = dead;
3557         TCGLabel *label;
3558 
3559         switch (op->opc) {
3560         case INDEX_op_set_label:
3561             label = arg_label(op->args[0]);
3562 
3563             /*
3564              * Note that the first op in the TB is always a load,
3565              * so there is always something before a label.
3566              */
3567             op_prev = QTAILQ_PREV(op, link);
3568 
3569             /*
3570              * If we find two sequential labels, move all branches to
3571              * reference the second label and remove the first label.
3572              * Do this before branch to next optimization, so that the
3573              * middle label is out of the way.
3574              */
3575             if (op_prev->opc == INDEX_op_set_label) {
3576                 move_label_uses(label, arg_label(op_prev->args[0]));
3577                 tcg_op_remove(s, op_prev);
3578                 op_prev = QTAILQ_PREV(op, link);
3579             }
3580 
3581             /*
3582              * Optimization can fold conditional branches to unconditional.
3583              * If we find a label which is preceded by an unconditional
3584              * branch to next, remove the branch.  We couldn't do this when
3585              * processing the branch because any dead code between the branch
3586              * and label had not yet been removed.
3587              */
3588             if (op_prev->opc == INDEX_op_br &&
3589                 label == arg_label(op_prev->args[0])) {
3590                 tcg_op_remove(s, op_prev);
3591                 /* Fall through means insns become live again.  */
3592                 dead = false;
3593             }
3594 
3595             if (QSIMPLEQ_EMPTY(&label->branches)) {
3596                 /*
3597                  * While there is an occasional backward branch, virtually
3598                  * all branches generated by the translators are forward.
3599                  * Which means that generally we will have already removed
3600                  * all references to the label that will be, and there is
3601                  * little to be gained by iterating.
3602                  */
3603                 remove = true;
3604             } else {
3605                 /* Once we see a label, insns become live again.  */
3606                 dead = false;
3607                 remove = false;
3608             }
3609             break;
3610 
3611         case INDEX_op_br:
3612         case INDEX_op_exit_tb:
3613         case INDEX_op_goto_ptr:
3614             /* Unconditional branches; everything following is dead.  */
3615             dead = true;
3616             break;
3617 
3618         case INDEX_op_call:
3619             /* Notice noreturn helper calls, raising exceptions.  */
3620             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3621                 dead = true;
3622             }
3623             break;
3624 
3625         case INDEX_op_insn_start:
3626             /* Never remove -- we need to keep these for unwind.  */
3627             remove = false;
3628             break;
3629 
3630         default:
3631             break;
3632         }
3633 
3634         if (remove) {
3635             tcg_op_remove(s, op);
3636         }
3637     }
3638 }
3639 
3640 #define TS_DEAD  1
3641 #define TS_MEM   2
3642 
3643 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3644 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3645 
3646 /* For liveness_pass_1, the register preferences for a given temp.  */
3647 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3648 {
3649     return ts->state_ptr;
3650 }
3651 
3652 /* For liveness_pass_1, reset the preferences for a given temp to the
3653  * maximal regset for its type.
3654  */
3655 static inline void la_reset_pref(TCGTemp *ts)
3656 {
3657     *la_temp_pref(ts)
3658         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3659 }
3660 
3661 /* liveness analysis: end of function: all temps are dead, and globals
3662    should be in memory. */
3663 static void la_func_end(TCGContext *s, int ng, int nt)
3664 {
3665     int i;
3666 
3667     for (i = 0; i < ng; ++i) {
3668         s->temps[i].state = TS_DEAD | TS_MEM;
3669         la_reset_pref(&s->temps[i]);
3670     }
3671     for (i = ng; i < nt; ++i) {
3672         s->temps[i].state = TS_DEAD;
3673         la_reset_pref(&s->temps[i]);
3674     }
3675 }
3676 
3677 /* liveness analysis: end of basic block: all temps are dead, globals
3678    and local temps should be in memory. */
3679 static void la_bb_end(TCGContext *s, int ng, int nt)
3680 {
3681     int i;
3682 
3683     for (i = 0; i < nt; ++i) {
3684         TCGTemp *ts = &s->temps[i];
3685         int state;
3686 
3687         switch (ts->kind) {
3688         case TEMP_FIXED:
3689         case TEMP_GLOBAL:
3690         case TEMP_TB:
3691             state = TS_DEAD | TS_MEM;
3692             break;
3693         case TEMP_EBB:
3694         case TEMP_CONST:
3695             state = TS_DEAD;
3696             break;
3697         default:
3698             g_assert_not_reached();
3699         }
3700         ts->state = state;
3701         la_reset_pref(ts);
3702     }
3703 }
3704 
3705 /* liveness analysis: sync globals back to memory.  */
3706 static void la_global_sync(TCGContext *s, int ng)
3707 {
3708     int i;
3709 
3710     for (i = 0; i < ng; ++i) {
3711         int state = s->temps[i].state;
3712         s->temps[i].state = state | TS_MEM;
3713         if (state == TS_DEAD) {
3714             /* If the global was previously dead, reset prefs.  */
3715             la_reset_pref(&s->temps[i]);
3716         }
3717     }
3718 }
3719 
3720 /*
3721  * liveness analysis: conditional branch: all temps are dead unless
3722  * explicitly live-across-conditional-branch, globals and local temps
3723  * should be synced.
3724  */
3725 static void la_bb_sync(TCGContext *s, int ng, int nt)
3726 {
3727     la_global_sync(s, ng);
3728 
3729     for (int i = ng; i < nt; ++i) {
3730         TCGTemp *ts = &s->temps[i];
3731         int state;
3732 
3733         switch (ts->kind) {
3734         case TEMP_TB:
3735             state = ts->state;
3736             ts->state = state | TS_MEM;
3737             if (state != TS_DEAD) {
3738                 continue;
3739             }
3740             break;
3741         case TEMP_EBB:
3742         case TEMP_CONST:
3743             continue;
3744         default:
3745             g_assert_not_reached();
3746         }
3747         la_reset_pref(&s->temps[i]);
3748     }
3749 }
3750 
3751 /* liveness analysis: sync globals back to memory and kill.  */
3752 static void la_global_kill(TCGContext *s, int ng)
3753 {
3754     int i;
3755 
3756     for (i = 0; i < ng; i++) {
3757         s->temps[i].state = TS_DEAD | TS_MEM;
3758         la_reset_pref(&s->temps[i]);
3759     }
3760 }
3761 
3762 /* liveness analysis: note live globals crossing calls.  */
3763 static void la_cross_call(TCGContext *s, int nt)
3764 {
3765     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3766     int i;
3767 
3768     for (i = 0; i < nt; i++) {
3769         TCGTemp *ts = &s->temps[i];
3770         if (!(ts->state & TS_DEAD)) {
3771             TCGRegSet *pset = la_temp_pref(ts);
3772             TCGRegSet set = *pset;
3773 
3774             set &= mask;
3775             /* If the combination is not possible, restart.  */
3776             if (set == 0) {
3777                 set = tcg_target_available_regs[ts->type] & mask;
3778             }
3779             *pset = set;
3780         }
3781     }
3782 }
3783 
3784 /*
3785  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3786  * to TEMP_EBB, if possible.
3787  */
3788 static void __attribute__((noinline))
3789 liveness_pass_0(TCGContext *s)
3790 {
3791     void * const multiple_ebb = (void *)(uintptr_t)-1;
3792     int nb_temps = s->nb_temps;
3793     TCGOp *op, *ebb;
3794 
3795     for (int i = s->nb_globals; i < nb_temps; ++i) {
3796         s->temps[i].state_ptr = NULL;
3797     }
3798 
3799     /*
3800      * Represent each EBB by the op at which it begins.  In the case of
3801      * the first EBB, this is the first op, otherwise it is a label.
3802      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3803      * within a single EBB, else MULTIPLE_EBB.
3804      */
3805     ebb = QTAILQ_FIRST(&s->ops);
3806     QTAILQ_FOREACH(op, &s->ops, link) {
3807         const TCGOpDef *def;
3808         int nb_oargs, nb_iargs;
3809 
3810         switch (op->opc) {
3811         case INDEX_op_set_label:
3812             ebb = op;
3813             continue;
3814         case INDEX_op_discard:
3815             continue;
3816         case INDEX_op_call:
3817             nb_oargs = TCGOP_CALLO(op);
3818             nb_iargs = TCGOP_CALLI(op);
3819             break;
3820         default:
3821             def = &tcg_op_defs[op->opc];
3822             nb_oargs = def->nb_oargs;
3823             nb_iargs = def->nb_iargs;
3824             break;
3825         }
3826 
3827         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3828             TCGTemp *ts = arg_temp(op->args[i]);
3829 
3830             if (ts->kind != TEMP_TB) {
3831                 continue;
3832             }
3833             if (ts->state_ptr == NULL) {
3834                 ts->state_ptr = ebb;
3835             } else if (ts->state_ptr != ebb) {
3836                 ts->state_ptr = multiple_ebb;
3837             }
3838         }
3839     }
3840 
3841     /*
3842      * For TEMP_TB that turned out not to be used beyond one EBB,
3843      * reduce the liveness to TEMP_EBB.
3844      */
3845     for (int i = s->nb_globals; i < nb_temps; ++i) {
3846         TCGTemp *ts = &s->temps[i];
3847         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3848             ts->kind = TEMP_EBB;
3849         }
3850     }
3851 }
3852 
3853 /* Liveness analysis : update the opc_arg_life array to tell if a
3854    given input arguments is dead. Instructions updating dead
3855    temporaries are removed. */
3856 static void __attribute__((noinline))
3857 liveness_pass_1(TCGContext *s)
3858 {
3859     int nb_globals = s->nb_globals;
3860     int nb_temps = s->nb_temps;
3861     TCGOp *op, *op_prev;
3862     TCGRegSet *prefs;
3863     int i;
3864 
3865     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3866     for (i = 0; i < nb_temps; ++i) {
3867         s->temps[i].state_ptr = prefs + i;
3868     }
3869 
3870     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3871     la_func_end(s, nb_globals, nb_temps);
3872 
3873     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3874         int nb_iargs, nb_oargs;
3875         TCGOpcode opc_new, opc_new2;
3876         TCGLifeData arg_life = 0;
3877         TCGTemp *ts;
3878         TCGOpcode opc = op->opc;
3879         const TCGOpDef *def = &tcg_op_defs[opc];
3880         const TCGArgConstraint *args_ct;
3881 
3882         switch (opc) {
3883         case INDEX_op_call:
3884             {
3885                 const TCGHelperInfo *info = tcg_call_info(op);
3886                 int call_flags = tcg_call_flags(op);
3887 
3888                 nb_oargs = TCGOP_CALLO(op);
3889                 nb_iargs = TCGOP_CALLI(op);
3890 
3891                 /* pure functions can be removed if their result is unused */
3892                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3893                     for (i = 0; i < nb_oargs; i++) {
3894                         ts = arg_temp(op->args[i]);
3895                         if (ts->state != TS_DEAD) {
3896                             goto do_not_remove_call;
3897                         }
3898                     }
3899                     goto do_remove;
3900                 }
3901             do_not_remove_call:
3902 
3903                 /* Output args are dead.  */
3904                 for (i = 0; i < nb_oargs; i++) {
3905                     ts = arg_temp(op->args[i]);
3906                     if (ts->state & TS_DEAD) {
3907                         arg_life |= DEAD_ARG << i;
3908                     }
3909                     if (ts->state & TS_MEM) {
3910                         arg_life |= SYNC_ARG << i;
3911                     }
3912                     ts->state = TS_DEAD;
3913                     la_reset_pref(ts);
3914                 }
3915 
3916                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3917                 memset(op->output_pref, 0, sizeof(op->output_pref));
3918 
3919                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3920                                     TCG_CALL_NO_READ_GLOBALS))) {
3921                     la_global_kill(s, nb_globals);
3922                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3923                     la_global_sync(s, nb_globals);
3924                 }
3925 
3926                 /* Record arguments that die in this helper.  */
3927                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3928                     ts = arg_temp(op->args[i]);
3929                     if (ts->state & TS_DEAD) {
3930                         arg_life |= DEAD_ARG << i;
3931                     }
3932                 }
3933 
3934                 /* For all live registers, remove call-clobbered prefs.  */
3935                 la_cross_call(s, nb_temps);
3936 
3937                 /*
3938                  * Input arguments are live for preceding opcodes.
3939                  *
3940                  * For those arguments that die, and will be allocated in
3941                  * registers, clear the register set for that arg, to be
3942                  * filled in below.  For args that will be on the stack,
3943                  * reset to any available reg.  Process arguments in reverse
3944                  * order so that if a temp is used more than once, the stack
3945                  * reset to max happens before the register reset to 0.
3946                  */
3947                 for (i = nb_iargs - 1; i >= 0; i--) {
3948                     const TCGCallArgumentLoc *loc = &info->in[i];
3949                     ts = arg_temp(op->args[nb_oargs + i]);
3950 
3951                     if (ts->state & TS_DEAD) {
3952                         switch (loc->kind) {
3953                         case TCG_CALL_ARG_NORMAL:
3954                         case TCG_CALL_ARG_EXTEND_U:
3955                         case TCG_CALL_ARG_EXTEND_S:
3956                             if (arg_slot_reg_p(loc->arg_slot)) {
3957                                 *la_temp_pref(ts) = 0;
3958                                 break;
3959                             }
3960                             /* fall through */
3961                         default:
3962                             *la_temp_pref(ts) =
3963                                 tcg_target_available_regs[ts->type];
3964                             break;
3965                         }
3966                         ts->state &= ~TS_DEAD;
3967                     }
3968                 }
3969 
3970                 /*
3971                  * For each input argument, add its input register to prefs.
3972                  * If a temp is used once, this produces a single set bit;
3973                  * if a temp is used multiple times, this produces a set.
3974                  */
3975                 for (i = 0; i < nb_iargs; i++) {
3976                     const TCGCallArgumentLoc *loc = &info->in[i];
3977                     ts = arg_temp(op->args[nb_oargs + i]);
3978 
3979                     switch (loc->kind) {
3980                     case TCG_CALL_ARG_NORMAL:
3981                     case TCG_CALL_ARG_EXTEND_U:
3982                     case TCG_CALL_ARG_EXTEND_S:
3983                         if (arg_slot_reg_p(loc->arg_slot)) {
3984                             tcg_regset_set_reg(*la_temp_pref(ts),
3985                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3986                         }
3987                         break;
3988                     default:
3989                         break;
3990                     }
3991                 }
3992             }
3993             break;
3994         case INDEX_op_insn_start:
3995             break;
3996         case INDEX_op_discard:
3997             /* mark the temporary as dead */
3998             ts = arg_temp(op->args[0]);
3999             ts->state = TS_DEAD;
4000             la_reset_pref(ts);
4001             break;
4002 
4003         case INDEX_op_add2_i32:
4004         case INDEX_op_add2_i64:
4005             opc_new = INDEX_op_add;
4006             goto do_addsub2;
4007         case INDEX_op_sub2_i32:
4008         case INDEX_op_sub2_i64:
4009             opc_new = INDEX_op_sub;
4010         do_addsub2:
4011             nb_iargs = 4;
4012             nb_oargs = 2;
4013             /* Test if the high part of the operation is dead, but not
4014                the low part.  The result can be optimized to a simple
4015                add or sub.  This happens often for x86_64 guest when the
4016                cpu mode is set to 32 bit.  */
4017             if (arg_temp(op->args[1])->state == TS_DEAD) {
4018                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4019                     goto do_remove;
4020                 }
4021                 /* Replace the opcode and adjust the args in place,
4022                    leaving 3 unused args at the end.  */
4023                 op->opc = opc = opc_new;
4024                 op->args[1] = op->args[2];
4025                 op->args[2] = op->args[4];
4026                 /* Fall through and mark the single-word operation live.  */
4027                 nb_iargs = 2;
4028                 nb_oargs = 1;
4029             }
4030             goto do_not_remove;
4031 
4032         case INDEX_op_muls2_i32:
4033             opc_new = INDEX_op_mul;
4034             opc_new2 = INDEX_op_mulsh_i32;
4035             goto do_mul2;
4036         case INDEX_op_mulu2_i32:
4037         case INDEX_op_mulu2_i64:
4038             opc_new = INDEX_op_mul;
4039             opc_new2 = INDEX_op_muluh;
4040             goto do_mul2;
4041         case INDEX_op_muls2_i64:
4042             opc_new = INDEX_op_mul;
4043             opc_new2 = INDEX_op_mulsh_i64;
4044             goto do_mul2;
4045         do_mul2:
4046             nb_iargs = 2;
4047             nb_oargs = 2;
4048             if (arg_temp(op->args[1])->state == TS_DEAD) {
4049                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4050                     /* Both parts of the operation are dead.  */
4051                     goto do_remove;
4052                 }
4053                 /* The high part of the operation is dead; generate the low. */
4054                 op->opc = opc = opc_new;
4055                 op->args[1] = op->args[2];
4056                 op->args[2] = op->args[3];
4057             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4058                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4059                 /* The low part of the operation is dead; generate the high. */
4060                 op->opc = opc = opc_new2;
4061                 op->args[0] = op->args[1];
4062                 op->args[1] = op->args[2];
4063                 op->args[2] = op->args[3];
4064             } else {
4065                 goto do_not_remove;
4066             }
4067             /* Mark the single-word operation live.  */
4068             nb_oargs = 1;
4069             goto do_not_remove;
4070 
4071         default:
4072             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4073             nb_iargs = def->nb_iargs;
4074             nb_oargs = def->nb_oargs;
4075 
4076             /* Test if the operation can be removed because all
4077                its outputs are dead. We assume that nb_oargs == 0
4078                implies side effects */
4079             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4080                 for (i = 0; i < nb_oargs; i++) {
4081                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4082                         goto do_not_remove;
4083                     }
4084                 }
4085                 goto do_remove;
4086             }
4087             goto do_not_remove;
4088 
4089         do_remove:
4090             tcg_op_remove(s, op);
4091             break;
4092 
4093         do_not_remove:
4094             for (i = 0; i < nb_oargs; i++) {
4095                 ts = arg_temp(op->args[i]);
4096 
4097                 /* Remember the preference of the uses that followed.  */
4098                 if (i < ARRAY_SIZE(op->output_pref)) {
4099                     op->output_pref[i] = *la_temp_pref(ts);
4100                 }
4101 
4102                 /* Output args are dead.  */
4103                 if (ts->state & TS_DEAD) {
4104                     arg_life |= DEAD_ARG << i;
4105                 }
4106                 if (ts->state & TS_MEM) {
4107                     arg_life |= SYNC_ARG << i;
4108                 }
4109                 ts->state = TS_DEAD;
4110                 la_reset_pref(ts);
4111             }
4112 
4113             /* If end of basic block, update.  */
4114             if (def->flags & TCG_OPF_BB_EXIT) {
4115                 la_func_end(s, nb_globals, nb_temps);
4116             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4117                 la_bb_sync(s, nb_globals, nb_temps);
4118             } else if (def->flags & TCG_OPF_BB_END) {
4119                 la_bb_end(s, nb_globals, nb_temps);
4120             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4121                 la_global_sync(s, nb_globals);
4122                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4123                     la_cross_call(s, nb_temps);
4124                 }
4125             }
4126 
4127             /* Record arguments that die in this opcode.  */
4128             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4129                 ts = arg_temp(op->args[i]);
4130                 if (ts->state & TS_DEAD) {
4131                     arg_life |= DEAD_ARG << i;
4132                 }
4133             }
4134 
4135             /* Input arguments are live for preceding opcodes.  */
4136             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4137                 ts = arg_temp(op->args[i]);
4138                 if (ts->state & TS_DEAD) {
4139                     /* For operands that were dead, initially allow
4140                        all regs for the type.  */
4141                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4142                     ts->state &= ~TS_DEAD;
4143                 }
4144             }
4145 
4146             /* Incorporate constraints for this operand.  */
4147             switch (opc) {
4148             case INDEX_op_mov:
4149                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4150                    have proper constraints.  That said, special case
4151                    moves to propagate preferences backward.  */
4152                 if (IS_DEAD_ARG(1)) {
4153                     *la_temp_pref(arg_temp(op->args[0]))
4154                         = *la_temp_pref(arg_temp(op->args[1]));
4155                 }
4156                 break;
4157 
4158             default:
4159                 args_ct = opcode_args_ct(op);
4160                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4161                     const TCGArgConstraint *ct = &args_ct[i];
4162                     TCGRegSet set, *pset;
4163 
4164                     ts = arg_temp(op->args[i]);
4165                     pset = la_temp_pref(ts);
4166                     set = *pset;
4167 
4168                     set &= ct->regs;
4169                     if (ct->ialias) {
4170                         set &= output_pref(op, ct->alias_index);
4171                     }
4172                     /* If the combination is not possible, restart.  */
4173                     if (set == 0) {
4174                         set = ct->regs;
4175                     }
4176                     *pset = set;
4177                 }
4178                 break;
4179             }
4180             break;
4181         }
4182         op->life = arg_life;
4183     }
4184 }
4185 
4186 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4187 static bool __attribute__((noinline))
4188 liveness_pass_2(TCGContext *s)
4189 {
4190     int nb_globals = s->nb_globals;
4191     int nb_temps, i;
4192     bool changes = false;
4193     TCGOp *op, *op_next;
4194 
4195     /* Create a temporary for each indirect global.  */
4196     for (i = 0; i < nb_globals; ++i) {
4197         TCGTemp *its = &s->temps[i];
4198         if (its->indirect_reg) {
4199             TCGTemp *dts = tcg_temp_alloc(s);
4200             dts->type = its->type;
4201             dts->base_type = its->base_type;
4202             dts->temp_subindex = its->temp_subindex;
4203             dts->kind = TEMP_EBB;
4204             its->state_ptr = dts;
4205         } else {
4206             its->state_ptr = NULL;
4207         }
4208         /* All globals begin dead.  */
4209         its->state = TS_DEAD;
4210     }
4211     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4212         TCGTemp *its = &s->temps[i];
4213         its->state_ptr = NULL;
4214         its->state = TS_DEAD;
4215     }
4216 
4217     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4218         TCGOpcode opc = op->opc;
4219         const TCGOpDef *def = &tcg_op_defs[opc];
4220         TCGLifeData arg_life = op->life;
4221         int nb_iargs, nb_oargs, call_flags;
4222         TCGTemp *arg_ts, *dir_ts;
4223 
4224         if (opc == INDEX_op_call) {
4225             nb_oargs = TCGOP_CALLO(op);
4226             nb_iargs = TCGOP_CALLI(op);
4227             call_flags = tcg_call_flags(op);
4228         } else {
4229             nb_iargs = def->nb_iargs;
4230             nb_oargs = def->nb_oargs;
4231 
4232             /* Set flags similar to how calls require.  */
4233             if (def->flags & TCG_OPF_COND_BRANCH) {
4234                 /* Like reading globals: sync_globals */
4235                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4236             } else if (def->flags & TCG_OPF_BB_END) {
4237                 /* Like writing globals: save_globals */
4238                 call_flags = 0;
4239             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4240                 /* Like reading globals: sync_globals */
4241                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4242             } else {
4243                 /* No effect on globals.  */
4244                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4245                               TCG_CALL_NO_WRITE_GLOBALS);
4246             }
4247         }
4248 
4249         /* Make sure that input arguments are available.  */
4250         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4251             arg_ts = arg_temp(op->args[i]);
4252             dir_ts = arg_ts->state_ptr;
4253             if (dir_ts && arg_ts->state == TS_DEAD) {
4254                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4255                                   ? INDEX_op_ld_i32
4256                                   : INDEX_op_ld_i64);
4257                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4258                                                   arg_ts->type, 3);
4259 
4260                 lop->args[0] = temp_arg(dir_ts);
4261                 lop->args[1] = temp_arg(arg_ts->mem_base);
4262                 lop->args[2] = arg_ts->mem_offset;
4263 
4264                 /* Loaded, but synced with memory.  */
4265                 arg_ts->state = TS_MEM;
4266             }
4267         }
4268 
4269         /* Perform input replacement, and mark inputs that became dead.
4270            No action is required except keeping temp_state up to date
4271            so that we reload when needed.  */
4272         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4273             arg_ts = arg_temp(op->args[i]);
4274             dir_ts = arg_ts->state_ptr;
4275             if (dir_ts) {
4276                 op->args[i] = temp_arg(dir_ts);
4277                 changes = true;
4278                 if (IS_DEAD_ARG(i)) {
4279                     arg_ts->state = TS_DEAD;
4280                 }
4281             }
4282         }
4283 
4284         /* Liveness analysis should ensure that the following are
4285            all correct, for call sites and basic block end points.  */
4286         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4287             /* Nothing to do */
4288         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4289             for (i = 0; i < nb_globals; ++i) {
4290                 /* Liveness should see that globals are synced back,
4291                    that is, either TS_DEAD or TS_MEM.  */
4292                 arg_ts = &s->temps[i];
4293                 tcg_debug_assert(arg_ts->state_ptr == 0
4294                                  || arg_ts->state != 0);
4295             }
4296         } else {
4297             for (i = 0; i < nb_globals; ++i) {
4298                 /* Liveness should see that globals are saved back,
4299                    that is, TS_DEAD, waiting to be reloaded.  */
4300                 arg_ts = &s->temps[i];
4301                 tcg_debug_assert(arg_ts->state_ptr == 0
4302                                  || arg_ts->state == TS_DEAD);
4303             }
4304         }
4305 
4306         /* Outputs become available.  */
4307         if (opc == INDEX_op_mov) {
4308             arg_ts = arg_temp(op->args[0]);
4309             dir_ts = arg_ts->state_ptr;
4310             if (dir_ts) {
4311                 op->args[0] = temp_arg(dir_ts);
4312                 changes = true;
4313 
4314                 /* The output is now live and modified.  */
4315                 arg_ts->state = 0;
4316 
4317                 if (NEED_SYNC_ARG(0)) {
4318                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4319                                       ? INDEX_op_st_i32
4320                                       : INDEX_op_st_i64);
4321                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4322                                                      arg_ts->type, 3);
4323                     TCGTemp *out_ts = dir_ts;
4324 
4325                     if (IS_DEAD_ARG(0)) {
4326                         out_ts = arg_temp(op->args[1]);
4327                         arg_ts->state = TS_DEAD;
4328                         tcg_op_remove(s, op);
4329                     } else {
4330                         arg_ts->state = TS_MEM;
4331                     }
4332 
4333                     sop->args[0] = temp_arg(out_ts);
4334                     sop->args[1] = temp_arg(arg_ts->mem_base);
4335                     sop->args[2] = arg_ts->mem_offset;
4336                 } else {
4337                     tcg_debug_assert(!IS_DEAD_ARG(0));
4338                 }
4339             }
4340         } else {
4341             for (i = 0; i < nb_oargs; i++) {
4342                 arg_ts = arg_temp(op->args[i]);
4343                 dir_ts = arg_ts->state_ptr;
4344                 if (!dir_ts) {
4345                     continue;
4346                 }
4347                 op->args[i] = temp_arg(dir_ts);
4348                 changes = true;
4349 
4350                 /* The output is now live and modified.  */
4351                 arg_ts->state = 0;
4352 
4353                 /* Sync outputs upon their last write.  */
4354                 if (NEED_SYNC_ARG(i)) {
4355                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4356                                       ? INDEX_op_st_i32
4357                                       : INDEX_op_st_i64);
4358                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4359                                                      arg_ts->type, 3);
4360 
4361                     sop->args[0] = temp_arg(dir_ts);
4362                     sop->args[1] = temp_arg(arg_ts->mem_base);
4363                     sop->args[2] = arg_ts->mem_offset;
4364 
4365                     arg_ts->state = TS_MEM;
4366                 }
4367                 /* Drop outputs that are dead.  */
4368                 if (IS_DEAD_ARG(i)) {
4369                     arg_ts->state = TS_DEAD;
4370                 }
4371             }
4372         }
4373     }
4374 
4375     return changes;
4376 }
4377 
4378 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4379 {
4380     intptr_t off;
4381     int size, align;
4382 
4383     /* When allocating an object, look at the full type. */
4384     size = tcg_type_size(ts->base_type);
4385     switch (ts->base_type) {
4386     case TCG_TYPE_I32:
4387         align = 4;
4388         break;
4389     case TCG_TYPE_I64:
4390     case TCG_TYPE_V64:
4391         align = 8;
4392         break;
4393     case TCG_TYPE_I128:
4394     case TCG_TYPE_V128:
4395     case TCG_TYPE_V256:
4396         /*
4397          * Note that we do not require aligned storage for V256,
4398          * and that we provide alignment for I128 to match V128,
4399          * even if that's above what the host ABI requires.
4400          */
4401         align = 16;
4402         break;
4403     default:
4404         g_assert_not_reached();
4405     }
4406 
4407     /*
4408      * Assume the stack is sufficiently aligned.
4409      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4410      * and do not require 16 byte vector alignment.  This seems slightly
4411      * easier than fully parameterizing the above switch statement.
4412      */
4413     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4414     off = ROUND_UP(s->current_frame_offset, align);
4415 
4416     /* If we've exhausted the stack frame, restart with a smaller TB. */
4417     if (off + size > s->frame_end) {
4418         tcg_raise_tb_overflow(s);
4419     }
4420     s->current_frame_offset = off + size;
4421 #if defined(__sparc__)
4422     off += TCG_TARGET_STACK_BIAS;
4423 #endif
4424 
4425     /* If the object was subdivided, assign memory to all the parts. */
4426     if (ts->base_type != ts->type) {
4427         int part_size = tcg_type_size(ts->type);
4428         int part_count = size / part_size;
4429 
4430         /*
4431          * Each part is allocated sequentially in tcg_temp_new_internal.
4432          * Jump back to the first part by subtracting the current index.
4433          */
4434         ts -= ts->temp_subindex;
4435         for (int i = 0; i < part_count; ++i) {
4436             ts[i].mem_offset = off + i * part_size;
4437             ts[i].mem_base = s->frame_temp;
4438             ts[i].mem_allocated = 1;
4439         }
4440     } else {
4441         ts->mem_offset = off;
4442         ts->mem_base = s->frame_temp;
4443         ts->mem_allocated = 1;
4444     }
4445 }
4446 
4447 /* Assign @reg to @ts, and update reg_to_temp[]. */
4448 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4449 {
4450     if (ts->val_type == TEMP_VAL_REG) {
4451         TCGReg old = ts->reg;
4452         tcg_debug_assert(s->reg_to_temp[old] == ts);
4453         if (old == reg) {
4454             return;
4455         }
4456         s->reg_to_temp[old] = NULL;
4457     }
4458     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4459     s->reg_to_temp[reg] = ts;
4460     ts->val_type = TEMP_VAL_REG;
4461     ts->reg = reg;
4462 }
4463 
4464 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4465 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4466 {
4467     tcg_debug_assert(type != TEMP_VAL_REG);
4468     if (ts->val_type == TEMP_VAL_REG) {
4469         TCGReg reg = ts->reg;
4470         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4471         s->reg_to_temp[reg] = NULL;
4472     }
4473     ts->val_type = type;
4474 }
4475 
4476 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4477 
4478 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4479    mark it free; otherwise mark it dead.  */
4480 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4481 {
4482     TCGTempVal new_type;
4483 
4484     switch (ts->kind) {
4485     case TEMP_FIXED:
4486         return;
4487     case TEMP_GLOBAL:
4488     case TEMP_TB:
4489         new_type = TEMP_VAL_MEM;
4490         break;
4491     case TEMP_EBB:
4492         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4493         break;
4494     case TEMP_CONST:
4495         new_type = TEMP_VAL_CONST;
4496         break;
4497     default:
4498         g_assert_not_reached();
4499     }
4500     set_temp_val_nonreg(s, ts, new_type);
4501 }
4502 
4503 /* Mark a temporary as dead.  */
4504 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4505 {
4506     temp_free_or_dead(s, ts, 1);
4507 }
4508 
4509 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4510    registers needs to be allocated to store a constant.  If 'free_or_dead'
4511    is non-zero, subsequently release the temporary; if it is positive, the
4512    temp is dead; if it is negative, the temp is free.  */
4513 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4514                       TCGRegSet preferred_regs, int free_or_dead)
4515 {
4516     if (!temp_readonly(ts) && !ts->mem_coherent) {
4517         if (!ts->mem_allocated) {
4518             temp_allocate_frame(s, ts);
4519         }
4520         switch (ts->val_type) {
4521         case TEMP_VAL_CONST:
4522             /* If we're going to free the temp immediately, then we won't
4523                require it later in a register, so attempt to store the
4524                constant to memory directly.  */
4525             if (free_or_dead
4526                 && tcg_out_sti(s, ts->type, ts->val,
4527                                ts->mem_base->reg, ts->mem_offset)) {
4528                 break;
4529             }
4530             temp_load(s, ts, tcg_target_available_regs[ts->type],
4531                       allocated_regs, preferred_regs);
4532             /* fallthrough */
4533 
4534         case TEMP_VAL_REG:
4535             tcg_out_st(s, ts->type, ts->reg,
4536                        ts->mem_base->reg, ts->mem_offset);
4537             break;
4538 
4539         case TEMP_VAL_MEM:
4540             break;
4541 
4542         case TEMP_VAL_DEAD:
4543         default:
4544             g_assert_not_reached();
4545         }
4546         ts->mem_coherent = 1;
4547     }
4548     if (free_or_dead) {
4549         temp_free_or_dead(s, ts, free_or_dead);
4550     }
4551 }
4552 
4553 /* free register 'reg' by spilling the corresponding temporary if necessary */
4554 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4555 {
4556     TCGTemp *ts = s->reg_to_temp[reg];
4557     if (ts != NULL) {
4558         temp_sync(s, ts, allocated_regs, 0, -1);
4559     }
4560 }
4561 
4562 /**
4563  * tcg_reg_alloc:
4564  * @required_regs: Set of registers in which we must allocate.
4565  * @allocated_regs: Set of registers which must be avoided.
4566  * @preferred_regs: Set of registers we should prefer.
4567  * @rev: True if we search the registers in "indirect" order.
4568  *
4569  * The allocated register must be in @required_regs & ~@allocated_regs,
4570  * but if we can put it in @preferred_regs we may save a move later.
4571  */
4572 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4573                             TCGRegSet allocated_regs,
4574                             TCGRegSet preferred_regs, bool rev)
4575 {
4576     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4577     TCGRegSet reg_ct[2];
4578     const int *order;
4579 
4580     reg_ct[1] = required_regs & ~allocated_regs;
4581     tcg_debug_assert(reg_ct[1] != 0);
4582     reg_ct[0] = reg_ct[1] & preferred_regs;
4583 
4584     /* Skip the preferred_regs option if it cannot be satisfied,
4585        or if the preference made no difference.  */
4586     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4587 
4588     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4589 
4590     /* Try free registers, preferences first.  */
4591     for (j = f; j < 2; j++) {
4592         TCGRegSet set = reg_ct[j];
4593 
4594         if (tcg_regset_single(set)) {
4595             /* One register in the set.  */
4596             TCGReg reg = tcg_regset_first(set);
4597             if (s->reg_to_temp[reg] == NULL) {
4598                 return reg;
4599             }
4600         } else {
4601             for (i = 0; i < n; i++) {
4602                 TCGReg reg = order[i];
4603                 if (s->reg_to_temp[reg] == NULL &&
4604                     tcg_regset_test_reg(set, reg)) {
4605                     return reg;
4606                 }
4607             }
4608         }
4609     }
4610 
4611     /* We must spill something.  */
4612     for (j = f; j < 2; j++) {
4613         TCGRegSet set = reg_ct[j];
4614 
4615         if (tcg_regset_single(set)) {
4616             /* One register in the set.  */
4617             TCGReg reg = tcg_regset_first(set);
4618             tcg_reg_free(s, reg, allocated_regs);
4619             return reg;
4620         } else {
4621             for (i = 0; i < n; i++) {
4622                 TCGReg reg = order[i];
4623                 if (tcg_regset_test_reg(set, reg)) {
4624                     tcg_reg_free(s, reg, allocated_regs);
4625                     return reg;
4626                 }
4627             }
4628         }
4629     }
4630 
4631     g_assert_not_reached();
4632 }
4633 
4634 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4635                                  TCGRegSet allocated_regs,
4636                                  TCGRegSet preferred_regs, bool rev)
4637 {
4638     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4639     TCGRegSet reg_ct[2];
4640     const int *order;
4641 
4642     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4643     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4644     tcg_debug_assert(reg_ct[1] != 0);
4645     reg_ct[0] = reg_ct[1] & preferred_regs;
4646 
4647     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4648 
4649     /*
4650      * Skip the preferred_regs option if it cannot be satisfied,
4651      * or if the preference made no difference.
4652      */
4653     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4654 
4655     /*
4656      * Minimize the number of flushes by looking for 2 free registers first,
4657      * then a single flush, then two flushes.
4658      */
4659     for (fmin = 2; fmin >= 0; fmin--) {
4660         for (j = k; j < 2; j++) {
4661             TCGRegSet set = reg_ct[j];
4662 
4663             for (i = 0; i < n; i++) {
4664                 TCGReg reg = order[i];
4665 
4666                 if (tcg_regset_test_reg(set, reg)) {
4667                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4668                     if (f >= fmin) {
4669                         tcg_reg_free(s, reg, allocated_regs);
4670                         tcg_reg_free(s, reg + 1, allocated_regs);
4671                         return reg;
4672                     }
4673                 }
4674             }
4675         }
4676     }
4677     g_assert_not_reached();
4678 }
4679 
4680 /* Make sure the temporary is in a register.  If needed, allocate the register
4681    from DESIRED while avoiding ALLOCATED.  */
4682 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4683                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4684 {
4685     TCGReg reg;
4686 
4687     switch (ts->val_type) {
4688     case TEMP_VAL_REG:
4689         return;
4690     case TEMP_VAL_CONST:
4691         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4692                             preferred_regs, ts->indirect_base);
4693         if (ts->type <= TCG_TYPE_I64) {
4694             tcg_out_movi(s, ts->type, reg, ts->val);
4695         } else {
4696             uint64_t val = ts->val;
4697             MemOp vece = MO_64;
4698 
4699             /*
4700              * Find the minimal vector element that matches the constant.
4701              * The targets will, in general, have to do this search anyway,
4702              * do this generically.
4703              */
4704             if (val == dup_const(MO_8, val)) {
4705                 vece = MO_8;
4706             } else if (val == dup_const(MO_16, val)) {
4707                 vece = MO_16;
4708             } else if (val == dup_const(MO_32, val)) {
4709                 vece = MO_32;
4710             }
4711 
4712             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4713         }
4714         ts->mem_coherent = 0;
4715         break;
4716     case TEMP_VAL_MEM:
4717         if (!ts->mem_allocated) {
4718             temp_allocate_frame(s, ts);
4719         }
4720         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4721                             preferred_regs, ts->indirect_base);
4722         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4723         ts->mem_coherent = 1;
4724         break;
4725     case TEMP_VAL_DEAD:
4726     default:
4727         g_assert_not_reached();
4728     }
4729     set_temp_val_reg(s, ts, reg);
4730 }
4731 
4732 /* Save a temporary to memory. 'allocated_regs' is used in case a
4733    temporary registers needs to be allocated to store a constant.  */
4734 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4735 {
4736     /* The liveness analysis already ensures that globals are back
4737        in memory. Keep an tcg_debug_assert for safety. */
4738     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4739 }
4740 
4741 /* save globals to their canonical location and assume they can be
4742    modified be the following code. 'allocated_regs' is used in case a
4743    temporary registers needs to be allocated to store a constant. */
4744 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4745 {
4746     int i, n;
4747 
4748     for (i = 0, n = s->nb_globals; i < n; i++) {
4749         temp_save(s, &s->temps[i], allocated_regs);
4750     }
4751 }
4752 
4753 /* sync globals to their canonical location and assume they can be
4754    read by the following code. 'allocated_regs' is used in case a
4755    temporary registers needs to be allocated to store a constant. */
4756 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4757 {
4758     int i, n;
4759 
4760     for (i = 0, n = s->nb_globals; i < n; i++) {
4761         TCGTemp *ts = &s->temps[i];
4762         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4763                          || ts->kind == TEMP_FIXED
4764                          || ts->mem_coherent);
4765     }
4766 }
4767 
4768 /* at the end of a basic block, we assume all temporaries are dead and
4769    all globals are stored at their canonical location. */
4770 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4771 {
4772     int i;
4773 
4774     for (i = s->nb_globals; i < s->nb_temps; i++) {
4775         TCGTemp *ts = &s->temps[i];
4776 
4777         switch (ts->kind) {
4778         case TEMP_TB:
4779             temp_save(s, ts, allocated_regs);
4780             break;
4781         case TEMP_EBB:
4782             /* The liveness analysis already ensures that temps are dead.
4783                Keep an tcg_debug_assert for safety. */
4784             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4785             break;
4786         case TEMP_CONST:
4787             /* Similarly, we should have freed any allocated register. */
4788             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4789             break;
4790         default:
4791             g_assert_not_reached();
4792         }
4793     }
4794 
4795     save_globals(s, allocated_regs);
4796 }
4797 
4798 /*
4799  * At a conditional branch, we assume all temporaries are dead unless
4800  * explicitly live-across-conditional-branch; all globals and local
4801  * temps are synced to their location.
4802  */
4803 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4804 {
4805     sync_globals(s, allocated_regs);
4806 
4807     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4808         TCGTemp *ts = &s->temps[i];
4809         /*
4810          * The liveness analysis already ensures that temps are dead.
4811          * Keep tcg_debug_asserts for safety.
4812          */
4813         switch (ts->kind) {
4814         case TEMP_TB:
4815             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4816             break;
4817         case TEMP_EBB:
4818         case TEMP_CONST:
4819             break;
4820         default:
4821             g_assert_not_reached();
4822         }
4823     }
4824 }
4825 
4826 /*
4827  * Specialized code generation for INDEX_op_mov_* with a constant.
4828  */
4829 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4830                                   tcg_target_ulong val, TCGLifeData arg_life,
4831                                   TCGRegSet preferred_regs)
4832 {
4833     /* ENV should not be modified.  */
4834     tcg_debug_assert(!temp_readonly(ots));
4835 
4836     /* The movi is not explicitly generated here.  */
4837     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4838     ots->val = val;
4839     ots->mem_coherent = 0;
4840     if (NEED_SYNC_ARG(0)) {
4841         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4842     } else if (IS_DEAD_ARG(0)) {
4843         temp_dead(s, ots);
4844     }
4845 }
4846 
4847 /*
4848  * Specialized code generation for INDEX_op_mov_*.
4849  */
4850 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4851 {
4852     const TCGLifeData arg_life = op->life;
4853     TCGRegSet allocated_regs, preferred_regs;
4854     TCGTemp *ts, *ots;
4855     TCGType otype, itype;
4856     TCGReg oreg, ireg;
4857 
4858     allocated_regs = s->reserved_regs;
4859     preferred_regs = output_pref(op, 0);
4860     ots = arg_temp(op->args[0]);
4861     ts = arg_temp(op->args[1]);
4862 
4863     /* ENV should not be modified.  */
4864     tcg_debug_assert(!temp_readonly(ots));
4865 
4866     /* Note that otype != itype for no-op truncation.  */
4867     otype = ots->type;
4868     itype = ts->type;
4869 
4870     if (ts->val_type == TEMP_VAL_CONST) {
4871         /* propagate constant or generate sti */
4872         tcg_target_ulong val = ts->val;
4873         if (IS_DEAD_ARG(1)) {
4874             temp_dead(s, ts);
4875         }
4876         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4877         return;
4878     }
4879 
4880     /* If the source value is in memory we're going to be forced
4881        to have it in a register in order to perform the copy.  Copy
4882        the SOURCE value into its own register first, that way we
4883        don't have to reload SOURCE the next time it is used. */
4884     if (ts->val_type == TEMP_VAL_MEM) {
4885         temp_load(s, ts, tcg_target_available_regs[itype],
4886                   allocated_regs, preferred_regs);
4887     }
4888     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4889     ireg = ts->reg;
4890 
4891     if (IS_DEAD_ARG(0)) {
4892         /* mov to a non-saved dead register makes no sense (even with
4893            liveness analysis disabled). */
4894         tcg_debug_assert(NEED_SYNC_ARG(0));
4895         if (!ots->mem_allocated) {
4896             temp_allocate_frame(s, ots);
4897         }
4898         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4899         if (IS_DEAD_ARG(1)) {
4900             temp_dead(s, ts);
4901         }
4902         temp_dead(s, ots);
4903         return;
4904     }
4905 
4906     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4907         /*
4908          * The mov can be suppressed.  Kill input first, so that it
4909          * is unlinked from reg_to_temp, then set the output to the
4910          * reg that we saved from the input.
4911          */
4912         temp_dead(s, ts);
4913         oreg = ireg;
4914     } else {
4915         if (ots->val_type == TEMP_VAL_REG) {
4916             oreg = ots->reg;
4917         } else {
4918             /* Make sure to not spill the input register during allocation. */
4919             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4920                                  allocated_regs | ((TCGRegSet)1 << ireg),
4921                                  preferred_regs, ots->indirect_base);
4922         }
4923         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4924             /*
4925              * Cross register class move not supported.
4926              * Store the source register into the destination slot
4927              * and leave the destination temp as TEMP_VAL_MEM.
4928              */
4929             assert(!temp_readonly(ots));
4930             if (!ts->mem_allocated) {
4931                 temp_allocate_frame(s, ots);
4932             }
4933             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4934             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4935             ots->mem_coherent = 1;
4936             return;
4937         }
4938     }
4939     set_temp_val_reg(s, ots, oreg);
4940     ots->mem_coherent = 0;
4941 
4942     if (NEED_SYNC_ARG(0)) {
4943         temp_sync(s, ots, allocated_regs, 0, 0);
4944     }
4945 }
4946 
4947 /*
4948  * Specialized code generation for INDEX_op_dup_vec.
4949  */
4950 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4951 {
4952     const TCGLifeData arg_life = op->life;
4953     TCGRegSet dup_out_regs, dup_in_regs;
4954     const TCGArgConstraint *dup_args_ct;
4955     TCGTemp *its, *ots;
4956     TCGType itype, vtype;
4957     unsigned vece;
4958     int lowpart_ofs;
4959     bool ok;
4960 
4961     ots = arg_temp(op->args[0]);
4962     its = arg_temp(op->args[1]);
4963 
4964     /* ENV should not be modified.  */
4965     tcg_debug_assert(!temp_readonly(ots));
4966 
4967     itype = its->type;
4968     vece = TCGOP_VECE(op);
4969     vtype = TCGOP_TYPE(op);
4970 
4971     if (its->val_type == TEMP_VAL_CONST) {
4972         /* Propagate constant via movi -> dupi.  */
4973         tcg_target_ulong val = its->val;
4974         if (IS_DEAD_ARG(1)) {
4975             temp_dead(s, its);
4976         }
4977         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4978         return;
4979     }
4980 
4981     dup_args_ct = opcode_args_ct(op);
4982     dup_out_regs = dup_args_ct[0].regs;
4983     dup_in_regs = dup_args_ct[1].regs;
4984 
4985     /* Allocate the output register now.  */
4986     if (ots->val_type != TEMP_VAL_REG) {
4987         TCGRegSet allocated_regs = s->reserved_regs;
4988         TCGReg oreg;
4989 
4990         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4991             /* Make sure to not spill the input register. */
4992             tcg_regset_set_reg(allocated_regs, its->reg);
4993         }
4994         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4995                              output_pref(op, 0), ots->indirect_base);
4996         set_temp_val_reg(s, ots, oreg);
4997     }
4998 
4999     switch (its->val_type) {
5000     case TEMP_VAL_REG:
5001         /*
5002          * The dup constriaints must be broad, covering all possible VECE.
5003          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5004          * to fail, indicating that extra moves are required for that case.
5005          */
5006         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5007             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5008                 goto done;
5009             }
5010             /* Try again from memory or a vector input register.  */
5011         }
5012         if (!its->mem_coherent) {
5013             /*
5014              * The input register is not synced, and so an extra store
5015              * would be required to use memory.  Attempt an integer-vector
5016              * register move first.  We do not have a TCGRegSet for this.
5017              */
5018             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5019                 break;
5020             }
5021             /* Sync the temp back to its slot and load from there.  */
5022             temp_sync(s, its, s->reserved_regs, 0, 0);
5023         }
5024         /* fall through */
5025 
5026     case TEMP_VAL_MEM:
5027         lowpart_ofs = 0;
5028         if (HOST_BIG_ENDIAN) {
5029             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5030         }
5031         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5032                              its->mem_offset + lowpart_ofs)) {
5033             goto done;
5034         }
5035         /* Load the input into the destination vector register. */
5036         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5037         break;
5038 
5039     default:
5040         g_assert_not_reached();
5041     }
5042 
5043     /* We now have a vector input register, so dup must succeed. */
5044     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5045     tcg_debug_assert(ok);
5046 
5047  done:
5048     ots->mem_coherent = 0;
5049     if (IS_DEAD_ARG(1)) {
5050         temp_dead(s, its);
5051     }
5052     if (NEED_SYNC_ARG(0)) {
5053         temp_sync(s, ots, s->reserved_regs, 0, 0);
5054     }
5055     if (IS_DEAD_ARG(0)) {
5056         temp_dead(s, ots);
5057     }
5058 }
5059 
5060 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5061 {
5062     const TCGLifeData arg_life = op->life;
5063     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5064     TCGRegSet i_allocated_regs;
5065     TCGRegSet o_allocated_regs;
5066     int i, k, nb_iargs, nb_oargs;
5067     TCGReg reg;
5068     TCGArg arg;
5069     const TCGArgConstraint *args_ct;
5070     const TCGArgConstraint *arg_ct;
5071     TCGTemp *ts;
5072     TCGArg new_args[TCG_MAX_OP_ARGS];
5073     int const_args[TCG_MAX_OP_ARGS];
5074     TCGCond op_cond;
5075 
5076     nb_oargs = def->nb_oargs;
5077     nb_iargs = def->nb_iargs;
5078 
5079     /* copy constants */
5080     memcpy(new_args + nb_oargs + nb_iargs,
5081            op->args + nb_oargs + nb_iargs,
5082            sizeof(TCGArg) * def->nb_cargs);
5083 
5084     i_allocated_regs = s->reserved_regs;
5085     o_allocated_regs = s->reserved_regs;
5086 
5087     switch (op->opc) {
5088     case INDEX_op_brcond_i32:
5089     case INDEX_op_brcond_i64:
5090         op_cond = op->args[2];
5091         break;
5092     case INDEX_op_setcond_i32:
5093     case INDEX_op_setcond_i64:
5094     case INDEX_op_negsetcond_i32:
5095     case INDEX_op_negsetcond_i64:
5096     case INDEX_op_cmp_vec:
5097         op_cond = op->args[3];
5098         break;
5099     case INDEX_op_brcond2_i32:
5100         op_cond = op->args[4];
5101         break;
5102     case INDEX_op_movcond_i32:
5103     case INDEX_op_movcond_i64:
5104     case INDEX_op_setcond2_i32:
5105     case INDEX_op_cmpsel_vec:
5106         op_cond = op->args[5];
5107         break;
5108     default:
5109         /* No condition within opcode. */
5110         op_cond = TCG_COND_ALWAYS;
5111         break;
5112     }
5113 
5114     args_ct = opcode_args_ct(op);
5115 
5116     /* satisfy input constraints */
5117     for (k = 0; k < nb_iargs; k++) {
5118         TCGRegSet i_preferred_regs, i_required_regs;
5119         bool allocate_new_reg, copyto_new_reg;
5120         TCGTemp *ts2;
5121         int i1, i2;
5122 
5123         i = args_ct[nb_oargs + k].sort_index;
5124         arg = op->args[i];
5125         arg_ct = &args_ct[i];
5126         ts = arg_temp(arg);
5127 
5128         if (ts->val_type == TEMP_VAL_CONST) {
5129 #ifdef TCG_REG_ZERO
5130             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5131                 /* Hardware zero register: indicate register via non-const. */
5132                 const_args[i] = 0;
5133                 new_args[i] = TCG_REG_ZERO;
5134                 continue;
5135             }
5136 #endif
5137 
5138             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5139                                        op_cond, TCGOP_VECE(op))) {
5140                 /* constant is OK for instruction */
5141                 const_args[i] = 1;
5142                 new_args[i] = ts->val;
5143                 continue;
5144             }
5145         }
5146 
5147         reg = ts->reg;
5148         i_preferred_regs = 0;
5149         i_required_regs = arg_ct->regs;
5150         allocate_new_reg = false;
5151         copyto_new_reg = false;
5152 
5153         switch (arg_ct->pair) {
5154         case 0: /* not paired */
5155             if (arg_ct->ialias) {
5156                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5157 
5158                 /*
5159                  * If the input is readonly, then it cannot also be an
5160                  * output and aliased to itself.  If the input is not
5161                  * dead after the instruction, we must allocate a new
5162                  * register and move it.
5163                  */
5164                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5165                     || args_ct[arg_ct->alias_index].newreg) {
5166                     allocate_new_reg = true;
5167                 } else if (ts->val_type == TEMP_VAL_REG) {
5168                     /*
5169                      * Check if the current register has already been
5170                      * allocated for another input.
5171                      */
5172                     allocate_new_reg =
5173                         tcg_regset_test_reg(i_allocated_regs, reg);
5174                 }
5175             }
5176             if (!allocate_new_reg) {
5177                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5178                           i_preferred_regs);
5179                 reg = ts->reg;
5180                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5181             }
5182             if (allocate_new_reg) {
5183                 /*
5184                  * Allocate a new register matching the constraint
5185                  * and move the temporary register into it.
5186                  */
5187                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5188                           i_allocated_regs, 0);
5189                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5190                                     i_preferred_regs, ts->indirect_base);
5191                 copyto_new_reg = true;
5192             }
5193             break;
5194 
5195         case 1:
5196             /* First of an input pair; if i1 == i2, the second is an output. */
5197             i1 = i;
5198             i2 = arg_ct->pair_index;
5199             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5200 
5201             /*
5202              * It is easier to default to allocating a new pair
5203              * and to identify a few cases where it's not required.
5204              */
5205             if (arg_ct->ialias) {
5206                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5207                 if (IS_DEAD_ARG(i1) &&
5208                     IS_DEAD_ARG(i2) &&
5209                     !temp_readonly(ts) &&
5210                     ts->val_type == TEMP_VAL_REG &&
5211                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5212                     tcg_regset_test_reg(i_required_regs, reg) &&
5213                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5214                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5215                     (ts2
5216                      ? ts2->val_type == TEMP_VAL_REG &&
5217                        ts2->reg == reg + 1 &&
5218                        !temp_readonly(ts2)
5219                      : s->reg_to_temp[reg + 1] == NULL)) {
5220                     break;
5221                 }
5222             } else {
5223                 /* Without aliasing, the pair must also be an input. */
5224                 tcg_debug_assert(ts2);
5225                 if (ts->val_type == TEMP_VAL_REG &&
5226                     ts2->val_type == TEMP_VAL_REG &&
5227                     ts2->reg == reg + 1 &&
5228                     tcg_regset_test_reg(i_required_regs, reg)) {
5229                     break;
5230                 }
5231             }
5232             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5233                                      0, ts->indirect_base);
5234             goto do_pair;
5235 
5236         case 2: /* pair second */
5237             reg = new_args[arg_ct->pair_index] + 1;
5238             goto do_pair;
5239 
5240         case 3: /* ialias with second output, no first input */
5241             tcg_debug_assert(arg_ct->ialias);
5242             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5243 
5244             if (IS_DEAD_ARG(i) &&
5245                 !temp_readonly(ts) &&
5246                 ts->val_type == TEMP_VAL_REG &&
5247                 reg > 0 &&
5248                 s->reg_to_temp[reg - 1] == NULL &&
5249                 tcg_regset_test_reg(i_required_regs, reg) &&
5250                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5251                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5252                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5253                 break;
5254             }
5255             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5256                                      i_allocated_regs, 0,
5257                                      ts->indirect_base);
5258             tcg_regset_set_reg(i_allocated_regs, reg);
5259             reg += 1;
5260             goto do_pair;
5261 
5262         do_pair:
5263             /*
5264              * If an aliased input is not dead after the instruction,
5265              * we must allocate a new register and move it.
5266              */
5267             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5268                 TCGRegSet t_allocated_regs = i_allocated_regs;
5269 
5270                 /*
5271                  * Because of the alias, and the continued life, make sure
5272                  * that the temp is somewhere *other* than the reg pair,
5273                  * and we get a copy in reg.
5274                  */
5275                 tcg_regset_set_reg(t_allocated_regs, reg);
5276                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5277                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5278                     /* If ts was already in reg, copy it somewhere else. */
5279                     TCGReg nr;
5280                     bool ok;
5281 
5282                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5283                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5284                                        t_allocated_regs, 0, ts->indirect_base);
5285                     ok = tcg_out_mov(s, ts->type, nr, reg);
5286                     tcg_debug_assert(ok);
5287 
5288                     set_temp_val_reg(s, ts, nr);
5289                 } else {
5290                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5291                               t_allocated_regs, 0);
5292                     copyto_new_reg = true;
5293                 }
5294             } else {
5295                 /* Preferably allocate to reg, otherwise copy. */
5296                 i_required_regs = (TCGRegSet)1 << reg;
5297                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5298                           i_preferred_regs);
5299                 copyto_new_reg = ts->reg != reg;
5300             }
5301             break;
5302 
5303         default:
5304             g_assert_not_reached();
5305         }
5306 
5307         if (copyto_new_reg) {
5308             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5309                 /*
5310                  * Cross register class move not supported.  Sync the
5311                  * temp back to its slot and load from there.
5312                  */
5313                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5314                 tcg_out_ld(s, ts->type, reg,
5315                            ts->mem_base->reg, ts->mem_offset);
5316             }
5317         }
5318         new_args[i] = reg;
5319         const_args[i] = 0;
5320         tcg_regset_set_reg(i_allocated_regs, reg);
5321     }
5322 
5323     /* mark dead temporaries and free the associated registers */
5324     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5325         if (IS_DEAD_ARG(i)) {
5326             temp_dead(s, arg_temp(op->args[i]));
5327         }
5328     }
5329 
5330     if (def->flags & TCG_OPF_COND_BRANCH) {
5331         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5332     } else if (def->flags & TCG_OPF_BB_END) {
5333         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5334     } else {
5335         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5336             /* XXX: permit generic clobber register list ? */
5337             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5338                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5339                     tcg_reg_free(s, i, i_allocated_regs);
5340                 }
5341             }
5342         }
5343         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5344             /* sync globals if the op has side effects and might trigger
5345                an exception. */
5346             sync_globals(s, i_allocated_regs);
5347         }
5348 
5349         /* satisfy the output constraints */
5350         for (k = 0; k < nb_oargs; k++) {
5351             i = args_ct[k].sort_index;
5352             arg = op->args[i];
5353             arg_ct = &args_ct[i];
5354             ts = arg_temp(arg);
5355 
5356             /* ENV should not be modified.  */
5357             tcg_debug_assert(!temp_readonly(ts));
5358 
5359             switch (arg_ct->pair) {
5360             case 0: /* not paired */
5361                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5362                     reg = new_args[arg_ct->alias_index];
5363                 } else if (arg_ct->newreg) {
5364                     reg = tcg_reg_alloc(s, arg_ct->regs,
5365                                         i_allocated_regs | o_allocated_regs,
5366                                         output_pref(op, k), ts->indirect_base);
5367                 } else {
5368                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5369                                         output_pref(op, k), ts->indirect_base);
5370                 }
5371                 break;
5372 
5373             case 1: /* first of pair */
5374                 if (arg_ct->oalias) {
5375                     reg = new_args[arg_ct->alias_index];
5376                 } else if (arg_ct->newreg) {
5377                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5378                                              i_allocated_regs | o_allocated_regs,
5379                                              output_pref(op, k),
5380                                              ts->indirect_base);
5381                 } else {
5382                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5383                                              output_pref(op, k),
5384                                              ts->indirect_base);
5385                 }
5386                 break;
5387 
5388             case 2: /* second of pair */
5389                 if (arg_ct->oalias) {
5390                     reg = new_args[arg_ct->alias_index];
5391                 } else {
5392                     reg = new_args[arg_ct->pair_index] + 1;
5393                 }
5394                 break;
5395 
5396             case 3: /* first of pair, aliasing with a second input */
5397                 tcg_debug_assert(!arg_ct->newreg);
5398                 reg = new_args[arg_ct->pair_index] - 1;
5399                 break;
5400 
5401             default:
5402                 g_assert_not_reached();
5403             }
5404             tcg_regset_set_reg(o_allocated_regs, reg);
5405             set_temp_val_reg(s, ts, reg);
5406             ts->mem_coherent = 0;
5407             new_args[i] = reg;
5408         }
5409     }
5410 
5411     /* emit instruction */
5412     TCGType type = TCGOP_TYPE(op);
5413     switch (op->opc) {
5414     case INDEX_op_ext_i32_i64:
5415         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5416         break;
5417     case INDEX_op_extu_i32_i64:
5418         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5419         break;
5420     case INDEX_op_extrl_i64_i32:
5421         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5422         break;
5423 
5424     case INDEX_op_add:
5425     case INDEX_op_and:
5426     case INDEX_op_andc:
5427     case INDEX_op_eqv:
5428     case INDEX_op_mul:
5429     case INDEX_op_muluh:
5430     case INDEX_op_nand:
5431     case INDEX_op_nor:
5432     case INDEX_op_or:
5433     case INDEX_op_orc:
5434     case INDEX_op_xor:
5435         {
5436             const TCGOutOpBinary *out =
5437                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5438 
5439             /* Constants should never appear in the first source operand. */
5440             tcg_debug_assert(!const_args[1]);
5441             if (const_args[2]) {
5442                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5443             } else {
5444                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5445             }
5446         }
5447         break;
5448 
5449     case INDEX_op_sub:
5450         {
5451             const TCGOutOpSubtract *out = &outop_sub;
5452 
5453             /*
5454              * Constants should never appear in the second source operand.
5455              * These are folded to add with negative constant.
5456              */
5457             tcg_debug_assert(!const_args[2]);
5458             if (const_args[1]) {
5459                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5460             } else {
5461                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5462             }
5463         }
5464         break;
5465 
5466     case INDEX_op_neg:
5467     case INDEX_op_not:
5468         {
5469             const TCGOutOpUnary *out =
5470                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5471 
5472             /* Constants should have been folded. */
5473             tcg_debug_assert(!const_args[1]);
5474             out->out_rr(s, type, new_args[0], new_args[1]);
5475         }
5476         break;
5477 
5478     default:
5479         if (def->flags & TCG_OPF_VECTOR) {
5480             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5481                            TCGOP_VECE(op), new_args, const_args);
5482         } else {
5483             tcg_out_op(s, op->opc, type, new_args, const_args);
5484         }
5485         break;
5486     }
5487 
5488     /* move the outputs in the correct register if needed */
5489     for(i = 0; i < nb_oargs; i++) {
5490         ts = arg_temp(op->args[i]);
5491 
5492         /* ENV should not be modified.  */
5493         tcg_debug_assert(!temp_readonly(ts));
5494 
5495         if (NEED_SYNC_ARG(i)) {
5496             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5497         } else if (IS_DEAD_ARG(i)) {
5498             temp_dead(s, ts);
5499         }
5500     }
5501 }
5502 
5503 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5504 {
5505     const TCGLifeData arg_life = op->life;
5506     TCGTemp *ots, *itsl, *itsh;
5507     TCGType vtype = TCGOP_TYPE(op);
5508 
5509     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5510     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5511     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5512 
5513     ots = arg_temp(op->args[0]);
5514     itsl = arg_temp(op->args[1]);
5515     itsh = arg_temp(op->args[2]);
5516 
5517     /* ENV should not be modified.  */
5518     tcg_debug_assert(!temp_readonly(ots));
5519 
5520     /* Allocate the output register now.  */
5521     if (ots->val_type != TEMP_VAL_REG) {
5522         TCGRegSet allocated_regs = s->reserved_regs;
5523         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5524         TCGReg oreg;
5525 
5526         /* Make sure to not spill the input registers. */
5527         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5528             tcg_regset_set_reg(allocated_regs, itsl->reg);
5529         }
5530         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5531             tcg_regset_set_reg(allocated_regs, itsh->reg);
5532         }
5533 
5534         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5535                              output_pref(op, 0), ots->indirect_base);
5536         set_temp_val_reg(s, ots, oreg);
5537     }
5538 
5539     /* Promote dup2 of immediates to dupi_vec. */
5540     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5541         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5542         MemOp vece = MO_64;
5543 
5544         if (val == dup_const(MO_8, val)) {
5545             vece = MO_8;
5546         } else if (val == dup_const(MO_16, val)) {
5547             vece = MO_16;
5548         } else if (val == dup_const(MO_32, val)) {
5549             vece = MO_32;
5550         }
5551 
5552         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5553         goto done;
5554     }
5555 
5556     /* If the two inputs form one 64-bit value, try dupm_vec. */
5557     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5558         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5559         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5560         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5561 
5562         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5563         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5564 
5565         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5566                              its->mem_base->reg, its->mem_offset)) {
5567             goto done;
5568         }
5569     }
5570 
5571     /* Fall back to generic expansion. */
5572     return false;
5573 
5574  done:
5575     ots->mem_coherent = 0;
5576     if (IS_DEAD_ARG(1)) {
5577         temp_dead(s, itsl);
5578     }
5579     if (IS_DEAD_ARG(2)) {
5580         temp_dead(s, itsh);
5581     }
5582     if (NEED_SYNC_ARG(0)) {
5583         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5584     } else if (IS_DEAD_ARG(0)) {
5585         temp_dead(s, ots);
5586     }
5587     return true;
5588 }
5589 
5590 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5591                          TCGRegSet allocated_regs)
5592 {
5593     if (ts->val_type == TEMP_VAL_REG) {
5594         if (ts->reg != reg) {
5595             tcg_reg_free(s, reg, allocated_regs);
5596             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5597                 /*
5598                  * Cross register class move not supported.  Sync the
5599                  * temp back to its slot and load from there.
5600                  */
5601                 temp_sync(s, ts, allocated_regs, 0, 0);
5602                 tcg_out_ld(s, ts->type, reg,
5603                            ts->mem_base->reg, ts->mem_offset);
5604             }
5605         }
5606     } else {
5607         TCGRegSet arg_set = 0;
5608 
5609         tcg_reg_free(s, reg, allocated_regs);
5610         tcg_regset_set_reg(arg_set, reg);
5611         temp_load(s, ts, arg_set, allocated_regs, 0);
5612     }
5613 }
5614 
5615 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5616                          TCGRegSet allocated_regs)
5617 {
5618     /*
5619      * When the destination is on the stack, load up the temp and store.
5620      * If there are many call-saved registers, the temp might live to
5621      * see another use; otherwise it'll be discarded.
5622      */
5623     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5624     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5625                arg_slot_stk_ofs(arg_slot));
5626 }
5627 
5628 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5629                             TCGTemp *ts, TCGRegSet *allocated_regs)
5630 {
5631     if (arg_slot_reg_p(l->arg_slot)) {
5632         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5633         load_arg_reg(s, reg, ts, *allocated_regs);
5634         tcg_regset_set_reg(*allocated_regs, reg);
5635     } else {
5636         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5637     }
5638 }
5639 
5640 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5641                          intptr_t ref_off, TCGRegSet *allocated_regs)
5642 {
5643     TCGReg reg;
5644 
5645     if (arg_slot_reg_p(arg_slot)) {
5646         reg = tcg_target_call_iarg_regs[arg_slot];
5647         tcg_reg_free(s, reg, *allocated_regs);
5648         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5649         tcg_regset_set_reg(*allocated_regs, reg);
5650     } else {
5651         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5652                             *allocated_regs, 0, false);
5653         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5654         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5655                    arg_slot_stk_ofs(arg_slot));
5656     }
5657 }
5658 
5659 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5660 {
5661     const int nb_oargs = TCGOP_CALLO(op);
5662     const int nb_iargs = TCGOP_CALLI(op);
5663     const TCGLifeData arg_life = op->life;
5664     const TCGHelperInfo *info = tcg_call_info(op);
5665     TCGRegSet allocated_regs = s->reserved_regs;
5666     int i;
5667 
5668     /*
5669      * Move inputs into place in reverse order,
5670      * so that we place stacked arguments first.
5671      */
5672     for (i = nb_iargs - 1; i >= 0; --i) {
5673         const TCGCallArgumentLoc *loc = &info->in[i];
5674         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5675 
5676         switch (loc->kind) {
5677         case TCG_CALL_ARG_NORMAL:
5678         case TCG_CALL_ARG_EXTEND_U:
5679         case TCG_CALL_ARG_EXTEND_S:
5680             load_arg_normal(s, loc, ts, &allocated_regs);
5681             break;
5682         case TCG_CALL_ARG_BY_REF:
5683             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5684             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5685                          arg_slot_stk_ofs(loc->ref_slot),
5686                          &allocated_regs);
5687             break;
5688         case TCG_CALL_ARG_BY_REF_N:
5689             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5690             break;
5691         default:
5692             g_assert_not_reached();
5693         }
5694     }
5695 
5696     /* Mark dead temporaries and free the associated registers.  */
5697     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5698         if (IS_DEAD_ARG(i)) {
5699             temp_dead(s, arg_temp(op->args[i]));
5700         }
5701     }
5702 
5703     /* Clobber call registers.  */
5704     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5705         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5706             tcg_reg_free(s, i, allocated_regs);
5707         }
5708     }
5709 
5710     /*
5711      * Save globals if they might be written by the helper,
5712      * sync them if they might be read.
5713      */
5714     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5715         /* Nothing to do */
5716     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5717         sync_globals(s, allocated_regs);
5718     } else {
5719         save_globals(s, allocated_regs);
5720     }
5721 
5722     /*
5723      * If the ABI passes a pointer to the returned struct as the first
5724      * argument, load that now.  Pass a pointer to the output home slot.
5725      */
5726     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5727         TCGTemp *ts = arg_temp(op->args[0]);
5728 
5729         if (!ts->mem_allocated) {
5730             temp_allocate_frame(s, ts);
5731         }
5732         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5733     }
5734 
5735     tcg_out_call(s, tcg_call_func(op), info);
5736 
5737     /* Assign output registers and emit moves if needed.  */
5738     switch (info->out_kind) {
5739     case TCG_CALL_RET_NORMAL:
5740         for (i = 0; i < nb_oargs; i++) {
5741             TCGTemp *ts = arg_temp(op->args[i]);
5742             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5743 
5744             /* ENV should not be modified.  */
5745             tcg_debug_assert(!temp_readonly(ts));
5746 
5747             set_temp_val_reg(s, ts, reg);
5748             ts->mem_coherent = 0;
5749         }
5750         break;
5751 
5752     case TCG_CALL_RET_BY_VEC:
5753         {
5754             TCGTemp *ts = arg_temp(op->args[0]);
5755 
5756             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5757             tcg_debug_assert(ts->temp_subindex == 0);
5758             if (!ts->mem_allocated) {
5759                 temp_allocate_frame(s, ts);
5760             }
5761             tcg_out_st(s, TCG_TYPE_V128,
5762                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5763                        ts->mem_base->reg, ts->mem_offset);
5764         }
5765         /* fall through to mark all parts in memory */
5766 
5767     case TCG_CALL_RET_BY_REF:
5768         /* The callee has performed a write through the reference. */
5769         for (i = 0; i < nb_oargs; i++) {
5770             TCGTemp *ts = arg_temp(op->args[i]);
5771             ts->val_type = TEMP_VAL_MEM;
5772         }
5773         break;
5774 
5775     default:
5776         g_assert_not_reached();
5777     }
5778 
5779     /* Flush or discard output registers as needed. */
5780     for (i = 0; i < nb_oargs; i++) {
5781         TCGTemp *ts = arg_temp(op->args[i]);
5782         if (NEED_SYNC_ARG(i)) {
5783             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5784         } else if (IS_DEAD_ARG(i)) {
5785             temp_dead(s, ts);
5786         }
5787     }
5788 }
5789 
5790 /**
5791  * atom_and_align_for_opc:
5792  * @s: tcg context
5793  * @opc: memory operation code
5794  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5795  * @allow_two_ops: true if we are prepared to issue two operations
5796  *
5797  * Return the alignment and atomicity to use for the inline fast path
5798  * for the given memory operation.  The alignment may be larger than
5799  * that specified in @opc, and the correct alignment will be diagnosed
5800  * by the slow path helper.
5801  *
5802  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5803  * and issue two loads or stores for subalignment.
5804  */
5805 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5806                                            MemOp host_atom, bool allow_two_ops)
5807 {
5808     MemOp align = memop_alignment_bits(opc);
5809     MemOp size = opc & MO_SIZE;
5810     MemOp half = size ? size - 1 : 0;
5811     MemOp atom = opc & MO_ATOM_MASK;
5812     MemOp atmax;
5813 
5814     switch (atom) {
5815     case MO_ATOM_NONE:
5816         /* The operation requires no specific atomicity. */
5817         atmax = MO_8;
5818         break;
5819 
5820     case MO_ATOM_IFALIGN:
5821         atmax = size;
5822         break;
5823 
5824     case MO_ATOM_IFALIGN_PAIR:
5825         atmax = half;
5826         break;
5827 
5828     case MO_ATOM_WITHIN16:
5829         atmax = size;
5830         if (size == MO_128) {
5831             /* Misalignment implies !within16, and therefore no atomicity. */
5832         } else if (host_atom != MO_ATOM_WITHIN16) {
5833             /* The host does not implement within16, so require alignment. */
5834             align = MAX(align, size);
5835         }
5836         break;
5837 
5838     case MO_ATOM_WITHIN16_PAIR:
5839         atmax = size;
5840         /*
5841          * Misalignment implies !within16, and therefore half atomicity.
5842          * Any host prepared for two operations can implement this with
5843          * half alignment.
5844          */
5845         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5846             align = MAX(align, half);
5847         }
5848         break;
5849 
5850     case MO_ATOM_SUBALIGN:
5851         atmax = size;
5852         if (host_atom != MO_ATOM_SUBALIGN) {
5853             /* If unaligned but not odd, there are subobjects up to half. */
5854             if (allow_two_ops) {
5855                 align = MAX(align, half);
5856             } else {
5857                 align = MAX(align, size);
5858             }
5859         }
5860         break;
5861 
5862     default:
5863         g_assert_not_reached();
5864     }
5865 
5866     return (TCGAtomAlign){ .atom = atmax, .align = align };
5867 }
5868 
5869 /*
5870  * Similarly for qemu_ld/st slow path helpers.
5871  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5872  * using only the provided backend tcg_out_* functions.
5873  */
5874 
5875 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5876 {
5877     int ofs = arg_slot_stk_ofs(slot);
5878 
5879     /*
5880      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5881      * require extension to uint64_t, adjust the address for uint32_t.
5882      */
5883     if (HOST_BIG_ENDIAN &&
5884         TCG_TARGET_REG_BITS == 64 &&
5885         type == TCG_TYPE_I32) {
5886         ofs += 4;
5887     }
5888     return ofs;
5889 }
5890 
5891 static void tcg_out_helper_load_slots(TCGContext *s,
5892                                       unsigned nmov, TCGMovExtend *mov,
5893                                       const TCGLdstHelperParam *parm)
5894 {
5895     unsigned i;
5896     TCGReg dst3;
5897 
5898     /*
5899      * Start from the end, storing to the stack first.
5900      * This frees those registers, so we need not consider overlap.
5901      */
5902     for (i = nmov; i-- > 0; ) {
5903         unsigned slot = mov[i].dst;
5904 
5905         if (arg_slot_reg_p(slot)) {
5906             goto found_reg;
5907         }
5908 
5909         TCGReg src = mov[i].src;
5910         TCGType dst_type = mov[i].dst_type;
5911         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5912 
5913         /* The argument is going onto the stack; extend into scratch. */
5914         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5915             tcg_debug_assert(parm->ntmp != 0);
5916             mov[i].dst = src = parm->tmp[0];
5917             tcg_out_movext1(s, &mov[i]);
5918         }
5919 
5920         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5921                    tcg_out_helper_stk_ofs(dst_type, slot));
5922     }
5923     return;
5924 
5925  found_reg:
5926     /*
5927      * The remaining arguments are in registers.
5928      * Convert slot numbers to argument registers.
5929      */
5930     nmov = i + 1;
5931     for (i = 0; i < nmov; ++i) {
5932         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5933     }
5934 
5935     switch (nmov) {
5936     case 4:
5937         /* The backend must have provided enough temps for the worst case. */
5938         tcg_debug_assert(parm->ntmp >= 2);
5939 
5940         dst3 = mov[3].dst;
5941         for (unsigned j = 0; j < 3; ++j) {
5942             if (dst3 == mov[j].src) {
5943                 /*
5944                  * Conflict. Copy the source to a temporary, perform the
5945                  * remaining moves, then the extension from our scratch
5946                  * on the way out.
5947                  */
5948                 TCGReg scratch = parm->tmp[1];
5949 
5950                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5951                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5952                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5953                 break;
5954             }
5955         }
5956 
5957         /* No conflicts: perform this move and continue. */
5958         tcg_out_movext1(s, &mov[3]);
5959         /* fall through */
5960 
5961     case 3:
5962         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5963                         parm->ntmp ? parm->tmp[0] : -1);
5964         break;
5965     case 2:
5966         tcg_out_movext2(s, mov, mov + 1,
5967                         parm->ntmp ? parm->tmp[0] : -1);
5968         break;
5969     case 1:
5970         tcg_out_movext1(s, mov);
5971         break;
5972     default:
5973         g_assert_not_reached();
5974     }
5975 }
5976 
5977 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5978                                     TCGType type, tcg_target_long imm,
5979                                     const TCGLdstHelperParam *parm)
5980 {
5981     if (arg_slot_reg_p(slot)) {
5982         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5983     } else {
5984         int ofs = tcg_out_helper_stk_ofs(type, slot);
5985         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5986             tcg_debug_assert(parm->ntmp != 0);
5987             tcg_out_movi(s, type, parm->tmp[0], imm);
5988             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5989         }
5990     }
5991 }
5992 
5993 static void tcg_out_helper_load_common_args(TCGContext *s,
5994                                             const TCGLabelQemuLdst *ldst,
5995                                             const TCGLdstHelperParam *parm,
5996                                             const TCGHelperInfo *info,
5997                                             unsigned next_arg)
5998 {
5999     TCGMovExtend ptr_mov = {
6000         .dst_type = TCG_TYPE_PTR,
6001         .src_type = TCG_TYPE_PTR,
6002         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6003     };
6004     const TCGCallArgumentLoc *loc = &info->in[0];
6005     TCGType type;
6006     unsigned slot;
6007     tcg_target_ulong imm;
6008 
6009     /*
6010      * Handle env, which is always first.
6011      */
6012     ptr_mov.dst = loc->arg_slot;
6013     ptr_mov.src = TCG_AREG0;
6014     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6015 
6016     /*
6017      * Handle oi.
6018      */
6019     imm = ldst->oi;
6020     loc = &info->in[next_arg];
6021     type = TCG_TYPE_I32;
6022     switch (loc->kind) {
6023     case TCG_CALL_ARG_NORMAL:
6024         break;
6025     case TCG_CALL_ARG_EXTEND_U:
6026     case TCG_CALL_ARG_EXTEND_S:
6027         /* No extension required for MemOpIdx. */
6028         tcg_debug_assert(imm <= INT32_MAX);
6029         type = TCG_TYPE_REG;
6030         break;
6031     default:
6032         g_assert_not_reached();
6033     }
6034     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6035     next_arg++;
6036 
6037     /*
6038      * Handle ra.
6039      */
6040     loc = &info->in[next_arg];
6041     slot = loc->arg_slot;
6042     if (parm->ra_gen) {
6043         int arg_reg = -1;
6044         TCGReg ra_reg;
6045 
6046         if (arg_slot_reg_p(slot)) {
6047             arg_reg = tcg_target_call_iarg_regs[slot];
6048         }
6049         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6050 
6051         ptr_mov.dst = slot;
6052         ptr_mov.src = ra_reg;
6053         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6054     } else {
6055         imm = (uintptr_t)ldst->raddr;
6056         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6057     }
6058 }
6059 
6060 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6061                                        const TCGCallArgumentLoc *loc,
6062                                        TCGType dst_type, TCGType src_type,
6063                                        TCGReg lo, TCGReg hi)
6064 {
6065     MemOp reg_mo;
6066 
6067     if (dst_type <= TCG_TYPE_REG) {
6068         MemOp src_ext;
6069 
6070         switch (loc->kind) {
6071         case TCG_CALL_ARG_NORMAL:
6072             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6073             break;
6074         case TCG_CALL_ARG_EXTEND_U:
6075             dst_type = TCG_TYPE_REG;
6076             src_ext = MO_UL;
6077             break;
6078         case TCG_CALL_ARG_EXTEND_S:
6079             dst_type = TCG_TYPE_REG;
6080             src_ext = MO_SL;
6081             break;
6082         default:
6083             g_assert_not_reached();
6084         }
6085 
6086         mov[0].dst = loc->arg_slot;
6087         mov[0].dst_type = dst_type;
6088         mov[0].src = lo;
6089         mov[0].src_type = src_type;
6090         mov[0].src_ext = src_ext;
6091         return 1;
6092     }
6093 
6094     if (TCG_TARGET_REG_BITS == 32) {
6095         assert(dst_type == TCG_TYPE_I64);
6096         reg_mo = MO_32;
6097     } else {
6098         assert(dst_type == TCG_TYPE_I128);
6099         reg_mo = MO_64;
6100     }
6101 
6102     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6103     mov[0].src = lo;
6104     mov[0].dst_type = TCG_TYPE_REG;
6105     mov[0].src_type = TCG_TYPE_REG;
6106     mov[0].src_ext = reg_mo;
6107 
6108     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6109     mov[1].src = hi;
6110     mov[1].dst_type = TCG_TYPE_REG;
6111     mov[1].src_type = TCG_TYPE_REG;
6112     mov[1].src_ext = reg_mo;
6113 
6114     return 2;
6115 }
6116 
6117 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6118                                    const TCGLdstHelperParam *parm)
6119 {
6120     const TCGHelperInfo *info;
6121     const TCGCallArgumentLoc *loc;
6122     TCGMovExtend mov[2];
6123     unsigned next_arg, nmov;
6124     MemOp mop = get_memop(ldst->oi);
6125 
6126     switch (mop & MO_SIZE) {
6127     case MO_8:
6128     case MO_16:
6129     case MO_32:
6130         info = &info_helper_ld32_mmu;
6131         break;
6132     case MO_64:
6133         info = &info_helper_ld64_mmu;
6134         break;
6135     case MO_128:
6136         info = &info_helper_ld128_mmu;
6137         break;
6138     default:
6139         g_assert_not_reached();
6140     }
6141 
6142     /* Defer env argument. */
6143     next_arg = 1;
6144 
6145     loc = &info->in[next_arg];
6146     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6147         /*
6148          * 32-bit host with 32-bit guest: zero-extend the guest address
6149          * to 64-bits for the helper by storing the low part, then
6150          * load a zero for the high part.
6151          */
6152         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6153                                TCG_TYPE_I32, TCG_TYPE_I32,
6154                                ldst->addr_reg, -1);
6155         tcg_out_helper_load_slots(s, 1, mov, parm);
6156 
6157         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6158                                 TCG_TYPE_I32, 0, parm);
6159         next_arg += 2;
6160     } else {
6161         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6162                                       ldst->addr_reg, -1);
6163         tcg_out_helper_load_slots(s, nmov, mov, parm);
6164         next_arg += nmov;
6165     }
6166 
6167     switch (info->out_kind) {
6168     case TCG_CALL_RET_NORMAL:
6169     case TCG_CALL_RET_BY_VEC:
6170         break;
6171     case TCG_CALL_RET_BY_REF:
6172         /*
6173          * The return reference is in the first argument slot.
6174          * We need memory in which to return: re-use the top of stack.
6175          */
6176         {
6177             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6178 
6179             if (arg_slot_reg_p(0)) {
6180                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6181                                  TCG_REG_CALL_STACK, ofs_slot0);
6182             } else {
6183                 tcg_debug_assert(parm->ntmp != 0);
6184                 tcg_out_addi_ptr(s, parm->tmp[0],
6185                                  TCG_REG_CALL_STACK, ofs_slot0);
6186                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6187                            TCG_REG_CALL_STACK, ofs_slot0);
6188             }
6189         }
6190         break;
6191     default:
6192         g_assert_not_reached();
6193     }
6194 
6195     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6196 }
6197 
6198 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6199                                   bool load_sign,
6200                                   const TCGLdstHelperParam *parm)
6201 {
6202     MemOp mop = get_memop(ldst->oi);
6203     TCGMovExtend mov[2];
6204     int ofs_slot0;
6205 
6206     switch (ldst->type) {
6207     case TCG_TYPE_I64:
6208         if (TCG_TARGET_REG_BITS == 32) {
6209             break;
6210         }
6211         /* fall through */
6212 
6213     case TCG_TYPE_I32:
6214         mov[0].dst = ldst->datalo_reg;
6215         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6216         mov[0].dst_type = ldst->type;
6217         mov[0].src_type = TCG_TYPE_REG;
6218 
6219         /*
6220          * If load_sign, then we allowed the helper to perform the
6221          * appropriate sign extension to tcg_target_ulong, and all
6222          * we need now is a plain move.
6223          *
6224          * If they do not, then we expect the relevant extension
6225          * instruction to be no more expensive than a move, and
6226          * we thus save the icache etc by only using one of two
6227          * helper functions.
6228          */
6229         if (load_sign || !(mop & MO_SIGN)) {
6230             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6231                 mov[0].src_ext = MO_32;
6232             } else {
6233                 mov[0].src_ext = MO_64;
6234             }
6235         } else {
6236             mov[0].src_ext = mop & MO_SSIZE;
6237         }
6238         tcg_out_movext1(s, mov);
6239         return;
6240 
6241     case TCG_TYPE_I128:
6242         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6243         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6244         switch (TCG_TARGET_CALL_RET_I128) {
6245         case TCG_CALL_RET_NORMAL:
6246             break;
6247         case TCG_CALL_RET_BY_VEC:
6248             tcg_out_st(s, TCG_TYPE_V128,
6249                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6250                        TCG_REG_CALL_STACK, ofs_slot0);
6251             /* fall through */
6252         case TCG_CALL_RET_BY_REF:
6253             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6254                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6255             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6256                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6257             return;
6258         default:
6259             g_assert_not_reached();
6260         }
6261         break;
6262 
6263     default:
6264         g_assert_not_reached();
6265     }
6266 
6267     mov[0].dst = ldst->datalo_reg;
6268     mov[0].src =
6269         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6270     mov[0].dst_type = TCG_TYPE_REG;
6271     mov[0].src_type = TCG_TYPE_REG;
6272     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6273 
6274     mov[1].dst = ldst->datahi_reg;
6275     mov[1].src =
6276         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6277     mov[1].dst_type = TCG_TYPE_REG;
6278     mov[1].src_type = TCG_TYPE_REG;
6279     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6280 
6281     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6282 }
6283 
6284 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6285                                    const TCGLdstHelperParam *parm)
6286 {
6287     const TCGHelperInfo *info;
6288     const TCGCallArgumentLoc *loc;
6289     TCGMovExtend mov[4];
6290     TCGType data_type;
6291     unsigned next_arg, nmov, n;
6292     MemOp mop = get_memop(ldst->oi);
6293 
6294     switch (mop & MO_SIZE) {
6295     case MO_8:
6296     case MO_16:
6297     case MO_32:
6298         info = &info_helper_st32_mmu;
6299         data_type = TCG_TYPE_I32;
6300         break;
6301     case MO_64:
6302         info = &info_helper_st64_mmu;
6303         data_type = TCG_TYPE_I64;
6304         break;
6305     case MO_128:
6306         info = &info_helper_st128_mmu;
6307         data_type = TCG_TYPE_I128;
6308         break;
6309     default:
6310         g_assert_not_reached();
6311     }
6312 
6313     /* Defer env argument. */
6314     next_arg = 1;
6315     nmov = 0;
6316 
6317     /* Handle addr argument. */
6318     loc = &info->in[next_arg];
6319     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6320     if (TCG_TARGET_REG_BITS == 32) {
6321         /*
6322          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6323          * to 64-bits for the helper by storing the low part.  Later,
6324          * after we have processed the register inputs, we will load a
6325          * zero for the high part.
6326          */
6327         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6328                                TCG_TYPE_I32, TCG_TYPE_I32,
6329                                ldst->addr_reg, -1);
6330         next_arg += 2;
6331         nmov += 1;
6332     } else {
6333         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6334                                    ldst->addr_reg, -1);
6335         next_arg += n;
6336         nmov += n;
6337     }
6338 
6339     /* Handle data argument. */
6340     loc = &info->in[next_arg];
6341     switch (loc->kind) {
6342     case TCG_CALL_ARG_NORMAL:
6343     case TCG_CALL_ARG_EXTEND_U:
6344     case TCG_CALL_ARG_EXTEND_S:
6345         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6346                                    ldst->datalo_reg, ldst->datahi_reg);
6347         next_arg += n;
6348         nmov += n;
6349         tcg_out_helper_load_slots(s, nmov, mov, parm);
6350         break;
6351 
6352     case TCG_CALL_ARG_BY_REF:
6353         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6354         tcg_debug_assert(data_type == TCG_TYPE_I128);
6355         tcg_out_st(s, TCG_TYPE_I64,
6356                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6357                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6358         tcg_out_st(s, TCG_TYPE_I64,
6359                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6360                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6361 
6362         tcg_out_helper_load_slots(s, nmov, mov, parm);
6363 
6364         if (arg_slot_reg_p(loc->arg_slot)) {
6365             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6366                              TCG_REG_CALL_STACK,
6367                              arg_slot_stk_ofs(loc->ref_slot));
6368         } else {
6369             tcg_debug_assert(parm->ntmp != 0);
6370             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6371                              arg_slot_stk_ofs(loc->ref_slot));
6372             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6373                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6374         }
6375         next_arg += 2;
6376         break;
6377 
6378     default:
6379         g_assert_not_reached();
6380     }
6381 
6382     if (TCG_TARGET_REG_BITS == 32) {
6383         /* Zero extend the address by loading a zero for the high part. */
6384         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6385         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6386     }
6387 
6388     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6389 }
6390 
6391 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6392 {
6393     int i, start_words, num_insns;
6394     TCGOp *op;
6395 
6396     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6397                  && qemu_log_in_addr_range(pc_start))) {
6398         FILE *logfile = qemu_log_trylock();
6399         if (logfile) {
6400             fprintf(logfile, "OP:\n");
6401             tcg_dump_ops(s, logfile, false);
6402             fprintf(logfile, "\n");
6403             qemu_log_unlock(logfile);
6404         }
6405     }
6406 
6407 #ifdef CONFIG_DEBUG_TCG
6408     /* Ensure all labels referenced have been emitted.  */
6409     {
6410         TCGLabel *l;
6411         bool error = false;
6412 
6413         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6414             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6415                 qemu_log_mask(CPU_LOG_TB_OP,
6416                               "$L%d referenced but not present.\n", l->id);
6417                 error = true;
6418             }
6419         }
6420         assert(!error);
6421     }
6422 #endif
6423 
6424     /* Do not reuse any EBB that may be allocated within the TB. */
6425     tcg_temp_ebb_reset_freed(s);
6426 
6427     tcg_optimize(s);
6428 
6429     reachable_code_pass(s);
6430     liveness_pass_0(s);
6431     liveness_pass_1(s);
6432 
6433     if (s->nb_indirects > 0) {
6434         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6435                      && qemu_log_in_addr_range(pc_start))) {
6436             FILE *logfile = qemu_log_trylock();
6437             if (logfile) {
6438                 fprintf(logfile, "OP before indirect lowering:\n");
6439                 tcg_dump_ops(s, logfile, false);
6440                 fprintf(logfile, "\n");
6441                 qemu_log_unlock(logfile);
6442             }
6443         }
6444 
6445         /* Replace indirect temps with direct temps.  */
6446         if (liveness_pass_2(s)) {
6447             /* If changes were made, re-run liveness.  */
6448             liveness_pass_1(s);
6449         }
6450     }
6451 
6452     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6453                  && qemu_log_in_addr_range(pc_start))) {
6454         FILE *logfile = qemu_log_trylock();
6455         if (logfile) {
6456             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6457             tcg_dump_ops(s, logfile, true);
6458             fprintf(logfile, "\n");
6459             qemu_log_unlock(logfile);
6460         }
6461     }
6462 
6463     /* Initialize goto_tb jump offsets. */
6464     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6465     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6466     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6467     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6468 
6469     tcg_reg_alloc_start(s);
6470 
6471     /*
6472      * Reset the buffer pointers when restarting after overflow.
6473      * TODO: Move this into translate-all.c with the rest of the
6474      * buffer management.  Having only this done here is confusing.
6475      */
6476     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6477     s->code_ptr = s->code_buf;
6478     s->data_gen_ptr = NULL;
6479 
6480     QSIMPLEQ_INIT(&s->ldst_labels);
6481     s->pool_labels = NULL;
6482 
6483     start_words = s->insn_start_words;
6484     s->gen_insn_data =
6485         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6486 
6487     tcg_out_tb_start(s);
6488 
6489     num_insns = -1;
6490     QTAILQ_FOREACH(op, &s->ops, link) {
6491         TCGOpcode opc = op->opc;
6492 
6493         switch (opc) {
6494         case INDEX_op_mov:
6495         case INDEX_op_mov_vec:
6496             tcg_reg_alloc_mov(s, op);
6497             break;
6498         case INDEX_op_dup_vec:
6499             tcg_reg_alloc_dup(s, op);
6500             break;
6501         case INDEX_op_insn_start:
6502             if (num_insns >= 0) {
6503                 size_t off = tcg_current_code_size(s);
6504                 s->gen_insn_end_off[num_insns] = off;
6505                 /* Assert that we do not overflow our stored offset.  */
6506                 assert(s->gen_insn_end_off[num_insns] == off);
6507             }
6508             num_insns++;
6509             for (i = 0; i < start_words; ++i) {
6510                 s->gen_insn_data[num_insns * start_words + i] =
6511                     tcg_get_insn_start_param(op, i);
6512             }
6513             break;
6514         case INDEX_op_discard:
6515             temp_dead(s, arg_temp(op->args[0]));
6516             break;
6517         case INDEX_op_set_label:
6518             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6519             tcg_out_label(s, arg_label(op->args[0]));
6520             break;
6521         case INDEX_op_call:
6522             tcg_reg_alloc_call(s, op);
6523             break;
6524         case INDEX_op_exit_tb:
6525             tcg_out_exit_tb(s, op->args[0]);
6526             break;
6527         case INDEX_op_goto_tb:
6528             tcg_out_goto_tb(s, op->args[0]);
6529             break;
6530         case INDEX_op_dup2_vec:
6531             if (tcg_reg_alloc_dup2(s, op)) {
6532                 break;
6533             }
6534             /* fall through */
6535         default:
6536             /* Sanity check that we've not introduced any unhandled opcodes. */
6537             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6538                                               TCGOP_FLAGS(op)));
6539             /* Note: in order to speed up the code, it would be much
6540                faster to have specialized register allocator functions for
6541                some common argument patterns */
6542             tcg_reg_alloc_op(s, op);
6543             break;
6544         }
6545         /* Test for (pending) buffer overflow.  The assumption is that any
6546            one operation beginning below the high water mark cannot overrun
6547            the buffer completely.  Thus we can test for overflow after
6548            generating code without having to check during generation.  */
6549         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6550             return -1;
6551         }
6552         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6553         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6554             return -2;
6555         }
6556     }
6557     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6558     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6559 
6560     /* Generate TB finalization at the end of block */
6561     i = tcg_out_ldst_finalize(s);
6562     if (i < 0) {
6563         return i;
6564     }
6565     i = tcg_out_pool_finalize(s);
6566     if (i < 0) {
6567         return i;
6568     }
6569     if (!tcg_resolve_relocs(s)) {
6570         return -2;
6571     }
6572 
6573 #ifndef CONFIG_TCG_INTERPRETER
6574     /* flush instruction cache */
6575     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6576                         (uintptr_t)s->code_buf,
6577                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6578 #endif
6579 
6580     return tcg_current_code_size(s);
6581 }
6582 
6583 #ifdef ELF_HOST_MACHINE
6584 /* In order to use this feature, the backend needs to do three things:
6585 
6586    (1) Define ELF_HOST_MACHINE to indicate both what value to
6587        put into the ELF image and to indicate support for the feature.
6588 
6589    (2) Define tcg_register_jit.  This should create a buffer containing
6590        the contents of a .debug_frame section that describes the post-
6591        prologue unwind info for the tcg machine.
6592 
6593    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6594 */
6595 
6596 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6597 typedef enum {
6598     JIT_NOACTION = 0,
6599     JIT_REGISTER_FN,
6600     JIT_UNREGISTER_FN
6601 } jit_actions_t;
6602 
6603 struct jit_code_entry {
6604     struct jit_code_entry *next_entry;
6605     struct jit_code_entry *prev_entry;
6606     const void *symfile_addr;
6607     uint64_t symfile_size;
6608 };
6609 
6610 struct jit_descriptor {
6611     uint32_t version;
6612     uint32_t action_flag;
6613     struct jit_code_entry *relevant_entry;
6614     struct jit_code_entry *first_entry;
6615 };
6616 
6617 void __jit_debug_register_code(void) __attribute__((noinline));
6618 void __jit_debug_register_code(void)
6619 {
6620     asm("");
6621 }
6622 
6623 /* Must statically initialize the version, because GDB may check
6624    the version before we can set it.  */
6625 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6626 
6627 /* End GDB interface.  */
6628 
6629 static int find_string(const char *strtab, const char *str)
6630 {
6631     const char *p = strtab + 1;
6632 
6633     while (1) {
6634         if (strcmp(p, str) == 0) {
6635             return p - strtab;
6636         }
6637         p += strlen(p) + 1;
6638     }
6639 }
6640 
6641 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6642                                  const void *debug_frame,
6643                                  size_t debug_frame_size)
6644 {
6645     struct __attribute__((packed)) DebugInfo {
6646         uint32_t  len;
6647         uint16_t  version;
6648         uint32_t  abbrev;
6649         uint8_t   ptr_size;
6650         uint8_t   cu_die;
6651         uint16_t  cu_lang;
6652         uintptr_t cu_low_pc;
6653         uintptr_t cu_high_pc;
6654         uint8_t   fn_die;
6655         char      fn_name[16];
6656         uintptr_t fn_low_pc;
6657         uintptr_t fn_high_pc;
6658         uint8_t   cu_eoc;
6659     };
6660 
6661     struct ElfImage {
6662         ElfW(Ehdr) ehdr;
6663         ElfW(Phdr) phdr;
6664         ElfW(Shdr) shdr[7];
6665         ElfW(Sym)  sym[2];
6666         struct DebugInfo di;
6667         uint8_t    da[24];
6668         char       str[80];
6669     };
6670 
6671     struct ElfImage *img;
6672 
6673     static const struct ElfImage img_template = {
6674         .ehdr = {
6675             .e_ident[EI_MAG0] = ELFMAG0,
6676             .e_ident[EI_MAG1] = ELFMAG1,
6677             .e_ident[EI_MAG2] = ELFMAG2,
6678             .e_ident[EI_MAG3] = ELFMAG3,
6679             .e_ident[EI_CLASS] = ELF_CLASS,
6680             .e_ident[EI_DATA] = ELF_DATA,
6681             .e_ident[EI_VERSION] = EV_CURRENT,
6682             .e_type = ET_EXEC,
6683             .e_machine = ELF_HOST_MACHINE,
6684             .e_version = EV_CURRENT,
6685             .e_phoff = offsetof(struct ElfImage, phdr),
6686             .e_shoff = offsetof(struct ElfImage, shdr),
6687             .e_ehsize = sizeof(ElfW(Shdr)),
6688             .e_phentsize = sizeof(ElfW(Phdr)),
6689             .e_phnum = 1,
6690             .e_shentsize = sizeof(ElfW(Shdr)),
6691             .e_shnum = ARRAY_SIZE(img->shdr),
6692             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6693 #ifdef ELF_HOST_FLAGS
6694             .e_flags = ELF_HOST_FLAGS,
6695 #endif
6696 #ifdef ELF_OSABI
6697             .e_ident[EI_OSABI] = ELF_OSABI,
6698 #endif
6699         },
6700         .phdr = {
6701             .p_type = PT_LOAD,
6702             .p_flags = PF_X,
6703         },
6704         .shdr = {
6705             [0] = { .sh_type = SHT_NULL },
6706             /* Trick: The contents of code_gen_buffer are not present in
6707                this fake ELF file; that got allocated elsewhere.  Therefore
6708                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6709                will not look for contents.  We can record any address.  */
6710             [1] = { /* .text */
6711                 .sh_type = SHT_NOBITS,
6712                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6713             },
6714             [2] = { /* .debug_info */
6715                 .sh_type = SHT_PROGBITS,
6716                 .sh_offset = offsetof(struct ElfImage, di),
6717                 .sh_size = sizeof(struct DebugInfo),
6718             },
6719             [3] = { /* .debug_abbrev */
6720                 .sh_type = SHT_PROGBITS,
6721                 .sh_offset = offsetof(struct ElfImage, da),
6722                 .sh_size = sizeof(img->da),
6723             },
6724             [4] = { /* .debug_frame */
6725                 .sh_type = SHT_PROGBITS,
6726                 .sh_offset = sizeof(struct ElfImage),
6727             },
6728             [5] = { /* .symtab */
6729                 .sh_type = SHT_SYMTAB,
6730                 .sh_offset = offsetof(struct ElfImage, sym),
6731                 .sh_size = sizeof(img->sym),
6732                 .sh_info = 1,
6733                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6734                 .sh_entsize = sizeof(ElfW(Sym)),
6735             },
6736             [6] = { /* .strtab */
6737                 .sh_type = SHT_STRTAB,
6738                 .sh_offset = offsetof(struct ElfImage, str),
6739                 .sh_size = sizeof(img->str),
6740             }
6741         },
6742         .sym = {
6743             [1] = { /* code_gen_buffer */
6744                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6745                 .st_shndx = 1,
6746             }
6747         },
6748         .di = {
6749             .len = sizeof(struct DebugInfo) - 4,
6750             .version = 2,
6751             .ptr_size = sizeof(void *),
6752             .cu_die = 1,
6753             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6754             .fn_die = 2,
6755             .fn_name = "code_gen_buffer"
6756         },
6757         .da = {
6758             1,          /* abbrev number (the cu) */
6759             0x11, 1,    /* DW_TAG_compile_unit, has children */
6760             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6761             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6762             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6763             0, 0,       /* end of abbrev */
6764             2,          /* abbrev number (the fn) */
6765             0x2e, 0,    /* DW_TAG_subprogram, no children */
6766             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6767             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6768             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6769             0, 0,       /* end of abbrev */
6770             0           /* no more abbrev */
6771         },
6772         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6773                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6774     };
6775 
6776     /* We only need a single jit entry; statically allocate it.  */
6777     static struct jit_code_entry one_entry;
6778 
6779     uintptr_t buf = (uintptr_t)buf_ptr;
6780     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6781     DebugFrameHeader *dfh;
6782 
6783     img = g_malloc(img_size);
6784     *img = img_template;
6785 
6786     img->phdr.p_vaddr = buf;
6787     img->phdr.p_paddr = buf;
6788     img->phdr.p_memsz = buf_size;
6789 
6790     img->shdr[1].sh_name = find_string(img->str, ".text");
6791     img->shdr[1].sh_addr = buf;
6792     img->shdr[1].sh_size = buf_size;
6793 
6794     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6795     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6796 
6797     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6798     img->shdr[4].sh_size = debug_frame_size;
6799 
6800     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6801     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6802 
6803     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6804     img->sym[1].st_value = buf;
6805     img->sym[1].st_size = buf_size;
6806 
6807     img->di.cu_low_pc = buf;
6808     img->di.cu_high_pc = buf + buf_size;
6809     img->di.fn_low_pc = buf;
6810     img->di.fn_high_pc = buf + buf_size;
6811 
6812     dfh = (DebugFrameHeader *)(img + 1);
6813     memcpy(dfh, debug_frame, debug_frame_size);
6814     dfh->fde.func_start = buf;
6815     dfh->fde.func_len = buf_size;
6816 
6817 #ifdef DEBUG_JIT
6818     /* Enable this block to be able to debug the ELF image file creation.
6819        One can use readelf, objdump, or other inspection utilities.  */
6820     {
6821         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6822         FILE *f = fopen(jit, "w+b");
6823         if (f) {
6824             if (fwrite(img, img_size, 1, f) != img_size) {
6825                 /* Avoid stupid unused return value warning for fwrite.  */
6826             }
6827             fclose(f);
6828         }
6829     }
6830 #endif
6831 
6832     one_entry.symfile_addr = img;
6833     one_entry.symfile_size = img_size;
6834 
6835     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6836     __jit_debug_descriptor.relevant_entry = &one_entry;
6837     __jit_debug_descriptor.first_entry = &one_entry;
6838     __jit_debug_register_code();
6839 }
6840 #else
6841 /* No support for the feature.  Provide the entry point expected by exec.c,
6842    and implement the internal function we declared earlier.  */
6843 
6844 static void tcg_register_jit_int(const void *buf, size_t size,
6845                                  const void *debug_frame,
6846                                  size_t debug_frame_size)
6847 {
6848 }
6849 
6850 void tcg_register_jit(const void *buf, size_t buf_size)
6851 {
6852 }
6853 #endif /* ELF_HOST_MACHINE */
6854 
6855 #if !TCG_TARGET_MAYBE_vec
6856 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6857 {
6858     g_assert_not_reached();
6859 }
6860 #endif
6861