xref: /openbmc/qemu/tcg/tcg.c (revision d776198cd31d1578c4b0239dc80cb2841e86f2f8)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpDivRem {
990     TCGOutOp base;
991     void (*out_rr01r)(TCGContext *s, TCGType type,
992                       TCGReg a0, TCGReg a1, TCGReg a4);
993 } TCGOutOpDivRem;
994 
995 typedef struct TCGOutOpMul2 {
996     TCGOutOp base;
997     void (*out_rrrr)(TCGContext *s, TCGType type,
998                      TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3);
999 } TCGOutOpMul2;
1000 
1001 typedef struct TCGOutOpUnary {
1002     TCGOutOp base;
1003     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
1004 } TCGOutOpUnary;
1005 
1006 typedef struct TCGOutOpSubtract {
1007     TCGOutOp base;
1008     void (*out_rrr)(TCGContext *s, TCGType type,
1009                     TCGReg a0, TCGReg a1, TCGReg a2);
1010     void (*out_rir)(TCGContext *s, TCGType type,
1011                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1012 } TCGOutOpSubtract;
1013 
1014 #include "tcg-target.c.inc"
1015 
1016 #ifndef CONFIG_TCG_INTERPRETER
1017 /* Validate CPUTLBDescFast placement. */
1018 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1019                         sizeof(CPUNegativeOffsetState))
1020                   < MIN_TLB_MASK_TABLE_OFS);
1021 #endif
1022 
1023 /*
1024  * Register V as the TCGOutOp for O.
1025  * This verifies that V is of type T, otherwise give a nice compiler error.
1026  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1027  */
1028 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1029 
1030 /* Register allocation descriptions for every TCGOpcode. */
1031 static const TCGOutOp * const all_outop[NB_OPS] = {
1032     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1033     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1034     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1035     OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz),
1036     OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop),
1037     OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz),
1038     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1039     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1040     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1041     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1042     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1043     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1044     OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2),
1045     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1046     OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2),
1047     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1048     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1049     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1050     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1051     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1052     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1053     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1054     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1055     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1056     OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl),
1057     OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr),
1058     OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1059     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1060     OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1061     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1062     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1063 };
1064 
1065 #undef OUTOP
1066 
1067 /*
1068  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1069  * and registered the target's TCG globals) must register with this function
1070  * before initiating translation.
1071  *
1072  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1073  * of tcg_region_init() for the reasoning behind this.
1074  *
1075  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1076  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1077  * is not used anymore for translation once this function is called.
1078  *
1079  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1080  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1081  * modes.
1082  */
1083 #ifdef CONFIG_USER_ONLY
1084 void tcg_register_thread(void)
1085 {
1086     tcg_ctx = &tcg_init_ctx;
1087 }
1088 #else
1089 void tcg_register_thread(void)
1090 {
1091     TCGContext *s = g_malloc(sizeof(*s));
1092     unsigned int i, n;
1093 
1094     *s = tcg_init_ctx;
1095 
1096     /* Relink mem_base.  */
1097     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1098         if (tcg_init_ctx.temps[i].mem_base) {
1099             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1100             tcg_debug_assert(b >= 0 && b < n);
1101             s->temps[i].mem_base = &s->temps[b];
1102         }
1103     }
1104 
1105     /* Claim an entry in tcg_ctxs */
1106     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1107     g_assert(n < tcg_max_ctxs);
1108     qatomic_set(&tcg_ctxs[n], s);
1109 
1110     if (n > 0) {
1111         tcg_region_initial_alloc(s);
1112     }
1113 
1114     tcg_ctx = s;
1115 }
1116 #endif /* !CONFIG_USER_ONLY */
1117 
1118 /* pool based memory allocation */
1119 void *tcg_malloc_internal(TCGContext *s, int size)
1120 {
1121     TCGPool *p;
1122     int pool_size;
1123 
1124     if (size > TCG_POOL_CHUNK_SIZE) {
1125         /* big malloc: insert a new pool (XXX: could optimize) */
1126         p = g_malloc(sizeof(TCGPool) + size);
1127         p->size = size;
1128         p->next = s->pool_first_large;
1129         s->pool_first_large = p;
1130         return p->data;
1131     } else {
1132         p = s->pool_current;
1133         if (!p) {
1134             p = s->pool_first;
1135             if (!p)
1136                 goto new_pool;
1137         } else {
1138             if (!p->next) {
1139             new_pool:
1140                 pool_size = TCG_POOL_CHUNK_SIZE;
1141                 p = g_malloc(sizeof(TCGPool) + pool_size);
1142                 p->size = pool_size;
1143                 p->next = NULL;
1144                 if (s->pool_current) {
1145                     s->pool_current->next = p;
1146                 } else {
1147                     s->pool_first = p;
1148                 }
1149             } else {
1150                 p = p->next;
1151             }
1152         }
1153     }
1154     s->pool_current = p;
1155     s->pool_cur = p->data + size;
1156     s->pool_end = p->data + p->size;
1157     return p->data;
1158 }
1159 
1160 void tcg_pool_reset(TCGContext *s)
1161 {
1162     TCGPool *p, *t;
1163     for (p = s->pool_first_large; p; p = t) {
1164         t = p->next;
1165         g_free(p);
1166     }
1167     s->pool_first_large = NULL;
1168     s->pool_cur = s->pool_end = NULL;
1169     s->pool_current = NULL;
1170 }
1171 
1172 /*
1173  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1174  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1175  * We only use these for layout in tcg_out_ld_helper_ret and
1176  * tcg_out_st_helper_args, and share them between several of
1177  * the helpers, with the end result that it's easier to build manually.
1178  */
1179 
1180 #if TCG_TARGET_REG_BITS == 32
1181 # define dh_typecode_ttl  dh_typecode_i32
1182 #else
1183 # define dh_typecode_ttl  dh_typecode_i64
1184 #endif
1185 
1186 static TCGHelperInfo info_helper_ld32_mmu = {
1187     .flags = TCG_CALL_NO_WG,
1188     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1189               | dh_typemask(env, 1)
1190               | dh_typemask(i64, 2)  /* uint64_t addr */
1191               | dh_typemask(i32, 3)  /* unsigned oi */
1192               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1193 };
1194 
1195 static TCGHelperInfo info_helper_ld64_mmu = {
1196     .flags = TCG_CALL_NO_WG,
1197     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1198               | dh_typemask(env, 1)
1199               | dh_typemask(i64, 2)  /* uint64_t addr */
1200               | dh_typemask(i32, 3)  /* unsigned oi */
1201               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1202 };
1203 
1204 static TCGHelperInfo info_helper_ld128_mmu = {
1205     .flags = TCG_CALL_NO_WG,
1206     .typemask = dh_typemask(i128, 0) /* return Int128 */
1207               | dh_typemask(env, 1)
1208               | dh_typemask(i64, 2)  /* uint64_t addr */
1209               | dh_typemask(i32, 3)  /* unsigned oi */
1210               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1211 };
1212 
1213 static TCGHelperInfo info_helper_st32_mmu = {
1214     .flags = TCG_CALL_NO_WG,
1215     .typemask = dh_typemask(void, 0)
1216               | dh_typemask(env, 1)
1217               | dh_typemask(i64, 2)  /* uint64_t addr */
1218               | dh_typemask(i32, 3)  /* uint32_t data */
1219               | dh_typemask(i32, 4)  /* unsigned oi */
1220               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1221 };
1222 
1223 static TCGHelperInfo info_helper_st64_mmu = {
1224     .flags = TCG_CALL_NO_WG,
1225     .typemask = dh_typemask(void, 0)
1226               | dh_typemask(env, 1)
1227               | dh_typemask(i64, 2)  /* uint64_t addr */
1228               | dh_typemask(i64, 3)  /* uint64_t data */
1229               | dh_typemask(i32, 4)  /* unsigned oi */
1230               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1231 };
1232 
1233 static TCGHelperInfo info_helper_st128_mmu = {
1234     .flags = TCG_CALL_NO_WG,
1235     .typemask = dh_typemask(void, 0)
1236               | dh_typemask(env, 1)
1237               | dh_typemask(i64, 2)  /* uint64_t addr */
1238               | dh_typemask(i128, 3) /* Int128 data */
1239               | dh_typemask(i32, 4)  /* unsigned oi */
1240               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1241 };
1242 
1243 #ifdef CONFIG_TCG_INTERPRETER
1244 static ffi_type *typecode_to_ffi(int argmask)
1245 {
1246     /*
1247      * libffi does not support __int128_t, so we have forced Int128
1248      * to use the structure definition instead of the builtin type.
1249      */
1250     static ffi_type *ffi_type_i128_elements[3] = {
1251         &ffi_type_uint64,
1252         &ffi_type_uint64,
1253         NULL
1254     };
1255     static ffi_type ffi_type_i128 = {
1256         .size = 16,
1257         .alignment = __alignof__(Int128),
1258         .type = FFI_TYPE_STRUCT,
1259         .elements = ffi_type_i128_elements,
1260     };
1261 
1262     switch (argmask) {
1263     case dh_typecode_void:
1264         return &ffi_type_void;
1265     case dh_typecode_i32:
1266         return &ffi_type_uint32;
1267     case dh_typecode_s32:
1268         return &ffi_type_sint32;
1269     case dh_typecode_i64:
1270         return &ffi_type_uint64;
1271     case dh_typecode_s64:
1272         return &ffi_type_sint64;
1273     case dh_typecode_ptr:
1274         return &ffi_type_pointer;
1275     case dh_typecode_i128:
1276         return &ffi_type_i128;
1277     }
1278     g_assert_not_reached();
1279 }
1280 
1281 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1282 {
1283     unsigned typemask = info->typemask;
1284     struct {
1285         ffi_cif cif;
1286         ffi_type *args[];
1287     } *ca;
1288     ffi_status status;
1289     int nargs;
1290 
1291     /* Ignoring the return type, find the last non-zero field. */
1292     nargs = 32 - clz32(typemask >> 3);
1293     nargs = DIV_ROUND_UP(nargs, 3);
1294     assert(nargs <= MAX_CALL_IARGS);
1295 
1296     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1297     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1298     ca->cif.nargs = nargs;
1299 
1300     if (nargs != 0) {
1301         ca->cif.arg_types = ca->args;
1302         for (int j = 0; j < nargs; ++j) {
1303             int typecode = extract32(typemask, (j + 1) * 3, 3);
1304             ca->args[j] = typecode_to_ffi(typecode);
1305         }
1306     }
1307 
1308     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1309                           ca->cif.rtype, ca->cif.arg_types);
1310     assert(status == FFI_OK);
1311 
1312     return &ca->cif;
1313 }
1314 
1315 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1316 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1317 #else
1318 #define HELPER_INFO_INIT(I)      (&(I)->init)
1319 #define HELPER_INFO_INIT_VAL(I)  1
1320 #endif /* CONFIG_TCG_INTERPRETER */
1321 
1322 static inline bool arg_slot_reg_p(unsigned arg_slot)
1323 {
1324     /*
1325      * Split the sizeof away from the comparison to avoid Werror from
1326      * "unsigned < 0 is always false", when iarg_regs is empty.
1327      */
1328     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1329     return arg_slot < nreg;
1330 }
1331 
1332 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1333 {
1334     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1335     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1336 
1337     tcg_debug_assert(stk_slot < max);
1338     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1339 }
1340 
1341 typedef struct TCGCumulativeArgs {
1342     int arg_idx;                /* tcg_gen_callN args[] */
1343     int info_in_idx;            /* TCGHelperInfo in[] */
1344     int arg_slot;               /* regs+stack slot */
1345     int ref_slot;               /* stack slots for references */
1346 } TCGCumulativeArgs;
1347 
1348 static void layout_arg_even(TCGCumulativeArgs *cum)
1349 {
1350     cum->arg_slot += cum->arg_slot & 1;
1351 }
1352 
1353 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1354                          TCGCallArgumentKind kind)
1355 {
1356     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1357 
1358     *loc = (TCGCallArgumentLoc){
1359         .kind = kind,
1360         .arg_idx = cum->arg_idx,
1361         .arg_slot = cum->arg_slot,
1362     };
1363     cum->info_in_idx++;
1364     cum->arg_slot++;
1365 }
1366 
1367 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1368                                 TCGHelperInfo *info, int n)
1369 {
1370     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1371 
1372     for (int i = 0; i < n; ++i) {
1373         /* Layout all using the same arg_idx, adjusting the subindex. */
1374         loc[i] = (TCGCallArgumentLoc){
1375             .kind = TCG_CALL_ARG_NORMAL,
1376             .arg_idx = cum->arg_idx,
1377             .tmp_subindex = i,
1378             .arg_slot = cum->arg_slot + i,
1379         };
1380     }
1381     cum->info_in_idx += n;
1382     cum->arg_slot += n;
1383 }
1384 
1385 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1386 {
1387     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1388     int n = 128 / TCG_TARGET_REG_BITS;
1389 
1390     /* The first subindex carries the pointer. */
1391     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1392 
1393     /*
1394      * The callee is allowed to clobber memory associated with
1395      * structure pass by-reference.  Therefore we must make copies.
1396      * Allocate space from "ref_slot", which will be adjusted to
1397      * follow the parameters on the stack.
1398      */
1399     loc[0].ref_slot = cum->ref_slot;
1400 
1401     /*
1402      * Subsequent words also go into the reference slot, but
1403      * do not accumulate into the regular arguments.
1404      */
1405     for (int i = 1; i < n; ++i) {
1406         loc[i] = (TCGCallArgumentLoc){
1407             .kind = TCG_CALL_ARG_BY_REF_N,
1408             .arg_idx = cum->arg_idx,
1409             .tmp_subindex = i,
1410             .ref_slot = cum->ref_slot + i,
1411         };
1412     }
1413     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1414     cum->ref_slot += n;
1415 }
1416 
1417 static void init_call_layout(TCGHelperInfo *info)
1418 {
1419     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1420     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1421     unsigned typemask = info->typemask;
1422     unsigned typecode;
1423     TCGCumulativeArgs cum = { };
1424 
1425     /*
1426      * Parse and place any function return value.
1427      */
1428     typecode = typemask & 7;
1429     switch (typecode) {
1430     case dh_typecode_void:
1431         info->nr_out = 0;
1432         break;
1433     case dh_typecode_i32:
1434     case dh_typecode_s32:
1435     case dh_typecode_ptr:
1436         info->nr_out = 1;
1437         info->out_kind = TCG_CALL_RET_NORMAL;
1438         break;
1439     case dh_typecode_i64:
1440     case dh_typecode_s64:
1441         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1442         info->out_kind = TCG_CALL_RET_NORMAL;
1443         /* Query the last register now to trigger any assert early. */
1444         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1445         break;
1446     case dh_typecode_i128:
1447         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1448         info->out_kind = TCG_TARGET_CALL_RET_I128;
1449         switch (TCG_TARGET_CALL_RET_I128) {
1450         case TCG_CALL_RET_NORMAL:
1451             /* Query the last register now to trigger any assert early. */
1452             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1453             break;
1454         case TCG_CALL_RET_BY_VEC:
1455             /* Query the single register now to trigger any assert early. */
1456             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1457             break;
1458         case TCG_CALL_RET_BY_REF:
1459             /*
1460              * Allocate the first argument to the output.
1461              * We don't need to store this anywhere, just make it
1462              * unavailable for use in the input loop below.
1463              */
1464             cum.arg_slot = 1;
1465             break;
1466         default:
1467             qemu_build_not_reached();
1468         }
1469         break;
1470     default:
1471         g_assert_not_reached();
1472     }
1473 
1474     /*
1475      * Parse and place function arguments.
1476      */
1477     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1478         TCGCallArgumentKind kind;
1479         TCGType type;
1480 
1481         typecode = typemask & 7;
1482         switch (typecode) {
1483         case dh_typecode_i32:
1484         case dh_typecode_s32:
1485             type = TCG_TYPE_I32;
1486             break;
1487         case dh_typecode_i64:
1488         case dh_typecode_s64:
1489             type = TCG_TYPE_I64;
1490             break;
1491         case dh_typecode_ptr:
1492             type = TCG_TYPE_PTR;
1493             break;
1494         case dh_typecode_i128:
1495             type = TCG_TYPE_I128;
1496             break;
1497         default:
1498             g_assert_not_reached();
1499         }
1500 
1501         switch (type) {
1502         case TCG_TYPE_I32:
1503             switch (TCG_TARGET_CALL_ARG_I32) {
1504             case TCG_CALL_ARG_EVEN:
1505                 layout_arg_even(&cum);
1506                 /* fall through */
1507             case TCG_CALL_ARG_NORMAL:
1508                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1509                 break;
1510             case TCG_CALL_ARG_EXTEND:
1511                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1512                 layout_arg_1(&cum, info, kind);
1513                 break;
1514             default:
1515                 qemu_build_not_reached();
1516             }
1517             break;
1518 
1519         case TCG_TYPE_I64:
1520             switch (TCG_TARGET_CALL_ARG_I64) {
1521             case TCG_CALL_ARG_EVEN:
1522                 layout_arg_even(&cum);
1523                 /* fall through */
1524             case TCG_CALL_ARG_NORMAL:
1525                 if (TCG_TARGET_REG_BITS == 32) {
1526                     layout_arg_normal_n(&cum, info, 2);
1527                 } else {
1528                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1529                 }
1530                 break;
1531             default:
1532                 qemu_build_not_reached();
1533             }
1534             break;
1535 
1536         case TCG_TYPE_I128:
1537             switch (TCG_TARGET_CALL_ARG_I128) {
1538             case TCG_CALL_ARG_EVEN:
1539                 layout_arg_even(&cum);
1540                 /* fall through */
1541             case TCG_CALL_ARG_NORMAL:
1542                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1543                 break;
1544             case TCG_CALL_ARG_BY_REF:
1545                 layout_arg_by_ref(&cum, info);
1546                 break;
1547             default:
1548                 qemu_build_not_reached();
1549             }
1550             break;
1551 
1552         default:
1553             g_assert_not_reached();
1554         }
1555     }
1556     info->nr_in = cum.info_in_idx;
1557 
1558     /* Validate that we didn't overrun the input array. */
1559     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1560     /* Validate the backend has enough argument space. */
1561     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1562 
1563     /*
1564      * Relocate the "ref_slot" area to the end of the parameters.
1565      * Minimizing this stack offset helps code size for x86,
1566      * which has a signed 8-bit offset encoding.
1567      */
1568     if (cum.ref_slot != 0) {
1569         int ref_base = 0;
1570 
1571         if (cum.arg_slot > max_reg_slots) {
1572             int align = __alignof(Int128) / sizeof(tcg_target_long);
1573 
1574             ref_base = cum.arg_slot - max_reg_slots;
1575             if (align > 1) {
1576                 ref_base = ROUND_UP(ref_base, align);
1577             }
1578         }
1579         assert(ref_base + cum.ref_slot <= max_stk_slots);
1580         ref_base += max_reg_slots;
1581 
1582         if (ref_base != 0) {
1583             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1584                 TCGCallArgumentLoc *loc = &info->in[i];
1585                 switch (loc->kind) {
1586                 case TCG_CALL_ARG_BY_REF:
1587                 case TCG_CALL_ARG_BY_REF_N:
1588                     loc->ref_slot += ref_base;
1589                     break;
1590                 default:
1591                     break;
1592                 }
1593             }
1594         }
1595     }
1596 }
1597 
1598 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1599 static void process_constraint_sets(void);
1600 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1601                                             TCGReg reg, const char *name);
1602 
1603 static void tcg_context_init(unsigned max_threads)
1604 {
1605     TCGContext *s = &tcg_init_ctx;
1606     int n, i;
1607     TCGTemp *ts;
1608 
1609     memset(s, 0, sizeof(*s));
1610     s->nb_globals = 0;
1611 
1612     init_call_layout(&info_helper_ld32_mmu);
1613     init_call_layout(&info_helper_ld64_mmu);
1614     init_call_layout(&info_helper_ld128_mmu);
1615     init_call_layout(&info_helper_st32_mmu);
1616     init_call_layout(&info_helper_st64_mmu);
1617     init_call_layout(&info_helper_st128_mmu);
1618 
1619     tcg_target_init(s);
1620     process_constraint_sets();
1621 
1622     /* Reverse the order of the saved registers, assuming they're all at
1623        the start of tcg_target_reg_alloc_order.  */
1624     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1625         int r = tcg_target_reg_alloc_order[n];
1626         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1627             break;
1628         }
1629     }
1630     for (i = 0; i < n; ++i) {
1631         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1632     }
1633     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1634         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1635     }
1636 
1637     tcg_ctx = s;
1638     /*
1639      * In user-mode we simply share the init context among threads, since we
1640      * use a single region. See the documentation tcg_region_init() for the
1641      * reasoning behind this.
1642      * In system-mode we will have at most max_threads TCG threads.
1643      */
1644 #ifdef CONFIG_USER_ONLY
1645     tcg_ctxs = &tcg_ctx;
1646     tcg_cur_ctxs = 1;
1647     tcg_max_ctxs = 1;
1648 #else
1649     tcg_max_ctxs = max_threads;
1650     tcg_ctxs = g_new0(TCGContext *, max_threads);
1651 #endif
1652 
1653     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1654     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1655     tcg_env = temp_tcgv_ptr(ts);
1656 }
1657 
1658 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1659 {
1660     tcg_context_init(max_threads);
1661     tcg_region_init(tb_size, splitwx, max_threads);
1662 }
1663 
1664 /*
1665  * Allocate TBs right before their corresponding translated code, making
1666  * sure that TBs and code are on different cache lines.
1667  */
1668 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1669 {
1670     uintptr_t align = qemu_icache_linesize;
1671     TranslationBlock *tb;
1672     void *next;
1673 
1674  retry:
1675     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1676     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1677 
1678     if (unlikely(next > s->code_gen_highwater)) {
1679         if (tcg_region_alloc(s)) {
1680             return NULL;
1681         }
1682         goto retry;
1683     }
1684     qatomic_set(&s->code_gen_ptr, next);
1685     return tb;
1686 }
1687 
1688 void tcg_prologue_init(void)
1689 {
1690     TCGContext *s = tcg_ctx;
1691     size_t prologue_size;
1692 
1693     s->code_ptr = s->code_gen_ptr;
1694     s->code_buf = s->code_gen_ptr;
1695     s->data_gen_ptr = NULL;
1696 
1697 #ifndef CONFIG_TCG_INTERPRETER
1698     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1699 #endif
1700 
1701     s->pool_labels = NULL;
1702 
1703     qemu_thread_jit_write();
1704     /* Generate the prologue.  */
1705     tcg_target_qemu_prologue(s);
1706 
1707     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1708     {
1709         int result = tcg_out_pool_finalize(s);
1710         tcg_debug_assert(result == 0);
1711     }
1712 
1713     prologue_size = tcg_current_code_size(s);
1714     perf_report_prologue(s->code_gen_ptr, prologue_size);
1715 
1716 #ifndef CONFIG_TCG_INTERPRETER
1717     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1718                         (uintptr_t)s->code_buf, prologue_size);
1719 #endif
1720 
1721     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1722         FILE *logfile = qemu_log_trylock();
1723         if (logfile) {
1724             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1725             if (s->data_gen_ptr) {
1726                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1727                 size_t data_size = prologue_size - code_size;
1728                 size_t i;
1729 
1730                 disas(logfile, s->code_gen_ptr, code_size);
1731 
1732                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1733                     if (sizeof(tcg_target_ulong) == 8) {
1734                         fprintf(logfile,
1735                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1736                                 (uintptr_t)s->data_gen_ptr + i,
1737                                 *(uint64_t *)(s->data_gen_ptr + i));
1738                     } else {
1739                         fprintf(logfile,
1740                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1741                                 (uintptr_t)s->data_gen_ptr + i,
1742                                 *(uint32_t *)(s->data_gen_ptr + i));
1743                     }
1744                 }
1745             } else {
1746                 disas(logfile, s->code_gen_ptr, prologue_size);
1747             }
1748             fprintf(logfile, "\n");
1749             qemu_log_unlock(logfile);
1750         }
1751     }
1752 
1753 #ifndef CONFIG_TCG_INTERPRETER
1754     /*
1755      * Assert that goto_ptr is implemented completely, setting an epilogue.
1756      * For tci, we use NULL as the signal to return from the interpreter,
1757      * so skip this check.
1758      */
1759     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1760 #endif
1761 
1762     tcg_region_prologue_set(s);
1763 }
1764 
1765 void tcg_func_start(TCGContext *s)
1766 {
1767     tcg_pool_reset(s);
1768     s->nb_temps = s->nb_globals;
1769 
1770     /* No temps have been previously allocated for size or locality.  */
1771     tcg_temp_ebb_reset_freed(s);
1772 
1773     /* No constant temps have been previously allocated. */
1774     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1775         if (s->const_table[i]) {
1776             g_hash_table_remove_all(s->const_table[i]);
1777         }
1778     }
1779 
1780     s->nb_ops = 0;
1781     s->nb_labels = 0;
1782     s->current_frame_offset = s->frame_start;
1783 
1784 #ifdef CONFIG_DEBUG_TCG
1785     s->goto_tb_issue_mask = 0;
1786 #endif
1787 
1788     QTAILQ_INIT(&s->ops);
1789     QTAILQ_INIT(&s->free_ops);
1790     s->emit_before_op = NULL;
1791     QSIMPLEQ_INIT(&s->labels);
1792 
1793     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1794     tcg_debug_assert(s->insn_start_words > 0);
1795 }
1796 
1797 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1798 {
1799     int n = s->nb_temps++;
1800 
1801     if (n >= TCG_MAX_TEMPS) {
1802         tcg_raise_tb_overflow(s);
1803     }
1804     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1805 }
1806 
1807 static TCGTemp *tcg_global_alloc(TCGContext *s)
1808 {
1809     TCGTemp *ts;
1810 
1811     tcg_debug_assert(s->nb_globals == s->nb_temps);
1812     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1813     s->nb_globals++;
1814     ts = tcg_temp_alloc(s);
1815     ts->kind = TEMP_GLOBAL;
1816 
1817     return ts;
1818 }
1819 
1820 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1821                                             TCGReg reg, const char *name)
1822 {
1823     TCGTemp *ts;
1824 
1825     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1826 
1827     ts = tcg_global_alloc(s);
1828     ts->base_type = type;
1829     ts->type = type;
1830     ts->kind = TEMP_FIXED;
1831     ts->reg = reg;
1832     ts->name = name;
1833     tcg_regset_set_reg(s->reserved_regs, reg);
1834 
1835     return ts;
1836 }
1837 
1838 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1839 {
1840     s->frame_start = start;
1841     s->frame_end = start + size;
1842     s->frame_temp
1843         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1844 }
1845 
1846 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1847                                             const char *name, TCGType type)
1848 {
1849     TCGContext *s = tcg_ctx;
1850     TCGTemp *base_ts = tcgv_ptr_temp(base);
1851     TCGTemp *ts = tcg_global_alloc(s);
1852     int indirect_reg = 0;
1853 
1854     switch (base_ts->kind) {
1855     case TEMP_FIXED:
1856         break;
1857     case TEMP_GLOBAL:
1858         /* We do not support double-indirect registers.  */
1859         tcg_debug_assert(!base_ts->indirect_reg);
1860         base_ts->indirect_base = 1;
1861         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1862                             ? 2 : 1);
1863         indirect_reg = 1;
1864         break;
1865     default:
1866         g_assert_not_reached();
1867     }
1868 
1869     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1870         TCGTemp *ts2 = tcg_global_alloc(s);
1871         char buf[64];
1872 
1873         ts->base_type = TCG_TYPE_I64;
1874         ts->type = TCG_TYPE_I32;
1875         ts->indirect_reg = indirect_reg;
1876         ts->mem_allocated = 1;
1877         ts->mem_base = base_ts;
1878         ts->mem_offset = offset;
1879         pstrcpy(buf, sizeof(buf), name);
1880         pstrcat(buf, sizeof(buf), "_0");
1881         ts->name = strdup(buf);
1882 
1883         tcg_debug_assert(ts2 == ts + 1);
1884         ts2->base_type = TCG_TYPE_I64;
1885         ts2->type = TCG_TYPE_I32;
1886         ts2->indirect_reg = indirect_reg;
1887         ts2->mem_allocated = 1;
1888         ts2->mem_base = base_ts;
1889         ts2->mem_offset = offset + 4;
1890         ts2->temp_subindex = 1;
1891         pstrcpy(buf, sizeof(buf), name);
1892         pstrcat(buf, sizeof(buf), "_1");
1893         ts2->name = strdup(buf);
1894     } else {
1895         ts->base_type = type;
1896         ts->type = type;
1897         ts->indirect_reg = indirect_reg;
1898         ts->mem_allocated = 1;
1899         ts->mem_base = base_ts;
1900         ts->mem_offset = offset;
1901         ts->name = name;
1902     }
1903     return ts;
1904 }
1905 
1906 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1907 {
1908     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1909     return temp_tcgv_i32(ts);
1910 }
1911 
1912 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1913 {
1914     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1915     return temp_tcgv_i64(ts);
1916 }
1917 
1918 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1919 {
1920     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1921     return temp_tcgv_ptr(ts);
1922 }
1923 
1924 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1925 {
1926     TCGContext *s = tcg_ctx;
1927     TCGTemp *ts;
1928     int n;
1929 
1930     if (kind == TEMP_EBB) {
1931         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1932 
1933         if (idx < TCG_MAX_TEMPS) {
1934             /* There is already an available temp with the right type.  */
1935             clear_bit(idx, s->free_temps[type].l);
1936 
1937             ts = &s->temps[idx];
1938             ts->temp_allocated = 1;
1939             tcg_debug_assert(ts->base_type == type);
1940             tcg_debug_assert(ts->kind == kind);
1941             return ts;
1942         }
1943     } else {
1944         tcg_debug_assert(kind == TEMP_TB);
1945     }
1946 
1947     switch (type) {
1948     case TCG_TYPE_I32:
1949     case TCG_TYPE_V64:
1950     case TCG_TYPE_V128:
1951     case TCG_TYPE_V256:
1952         n = 1;
1953         break;
1954     case TCG_TYPE_I64:
1955         n = 64 / TCG_TARGET_REG_BITS;
1956         break;
1957     case TCG_TYPE_I128:
1958         n = 128 / TCG_TARGET_REG_BITS;
1959         break;
1960     default:
1961         g_assert_not_reached();
1962     }
1963 
1964     ts = tcg_temp_alloc(s);
1965     ts->base_type = type;
1966     ts->temp_allocated = 1;
1967     ts->kind = kind;
1968 
1969     if (n == 1) {
1970         ts->type = type;
1971     } else {
1972         ts->type = TCG_TYPE_REG;
1973 
1974         for (int i = 1; i < n; ++i) {
1975             TCGTemp *ts2 = tcg_temp_alloc(s);
1976 
1977             tcg_debug_assert(ts2 == ts + i);
1978             ts2->base_type = type;
1979             ts2->type = TCG_TYPE_REG;
1980             ts2->temp_allocated = 1;
1981             ts2->temp_subindex = i;
1982             ts2->kind = kind;
1983         }
1984     }
1985     return ts;
1986 }
1987 
1988 TCGv_i32 tcg_temp_new_i32(void)
1989 {
1990     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1991 }
1992 
1993 TCGv_i32 tcg_temp_ebb_new_i32(void)
1994 {
1995     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1996 }
1997 
1998 TCGv_i64 tcg_temp_new_i64(void)
1999 {
2000     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
2001 }
2002 
2003 TCGv_i64 tcg_temp_ebb_new_i64(void)
2004 {
2005     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
2006 }
2007 
2008 TCGv_ptr tcg_temp_new_ptr(void)
2009 {
2010     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
2011 }
2012 
2013 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2014 {
2015     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2016 }
2017 
2018 TCGv_i128 tcg_temp_new_i128(void)
2019 {
2020     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2021 }
2022 
2023 TCGv_i128 tcg_temp_ebb_new_i128(void)
2024 {
2025     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2026 }
2027 
2028 TCGv_vec tcg_temp_new_vec(TCGType type)
2029 {
2030     TCGTemp *t;
2031 
2032 #ifdef CONFIG_DEBUG_TCG
2033     switch (type) {
2034     case TCG_TYPE_V64:
2035         assert(TCG_TARGET_HAS_v64);
2036         break;
2037     case TCG_TYPE_V128:
2038         assert(TCG_TARGET_HAS_v128);
2039         break;
2040     case TCG_TYPE_V256:
2041         assert(TCG_TARGET_HAS_v256);
2042         break;
2043     default:
2044         g_assert_not_reached();
2045     }
2046 #endif
2047 
2048     t = tcg_temp_new_internal(type, TEMP_EBB);
2049     return temp_tcgv_vec(t);
2050 }
2051 
2052 /* Create a new temp of the same type as an existing temp.  */
2053 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2054 {
2055     TCGTemp *t = tcgv_vec_temp(match);
2056 
2057     tcg_debug_assert(t->temp_allocated != 0);
2058 
2059     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2060     return temp_tcgv_vec(t);
2061 }
2062 
2063 void tcg_temp_free_internal(TCGTemp *ts)
2064 {
2065     TCGContext *s = tcg_ctx;
2066 
2067     switch (ts->kind) {
2068     case TEMP_CONST:
2069     case TEMP_TB:
2070         /* Silently ignore free. */
2071         break;
2072     case TEMP_EBB:
2073         tcg_debug_assert(ts->temp_allocated != 0);
2074         ts->temp_allocated = 0;
2075         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2076         break;
2077     default:
2078         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2079         g_assert_not_reached();
2080     }
2081 }
2082 
2083 void tcg_temp_free_i32(TCGv_i32 arg)
2084 {
2085     tcg_temp_free_internal(tcgv_i32_temp(arg));
2086 }
2087 
2088 void tcg_temp_free_i64(TCGv_i64 arg)
2089 {
2090     tcg_temp_free_internal(tcgv_i64_temp(arg));
2091 }
2092 
2093 void tcg_temp_free_i128(TCGv_i128 arg)
2094 {
2095     tcg_temp_free_internal(tcgv_i128_temp(arg));
2096 }
2097 
2098 void tcg_temp_free_ptr(TCGv_ptr arg)
2099 {
2100     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2101 }
2102 
2103 void tcg_temp_free_vec(TCGv_vec arg)
2104 {
2105     tcg_temp_free_internal(tcgv_vec_temp(arg));
2106 }
2107 
2108 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2109 {
2110     TCGContext *s = tcg_ctx;
2111     GHashTable *h = s->const_table[type];
2112     TCGTemp *ts;
2113 
2114     if (h == NULL) {
2115         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2116         s->const_table[type] = h;
2117     }
2118 
2119     ts = g_hash_table_lookup(h, &val);
2120     if (ts == NULL) {
2121         int64_t *val_ptr;
2122 
2123         ts = tcg_temp_alloc(s);
2124 
2125         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2126             TCGTemp *ts2 = tcg_temp_alloc(s);
2127 
2128             tcg_debug_assert(ts2 == ts + 1);
2129 
2130             ts->base_type = TCG_TYPE_I64;
2131             ts->type = TCG_TYPE_I32;
2132             ts->kind = TEMP_CONST;
2133             ts->temp_allocated = 1;
2134 
2135             ts2->base_type = TCG_TYPE_I64;
2136             ts2->type = TCG_TYPE_I32;
2137             ts2->kind = TEMP_CONST;
2138             ts2->temp_allocated = 1;
2139             ts2->temp_subindex = 1;
2140 
2141             /*
2142              * Retain the full value of the 64-bit constant in the low
2143              * part, so that the hash table works.  Actual uses will
2144              * truncate the value to the low part.
2145              */
2146             ts[HOST_BIG_ENDIAN].val = val;
2147             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2148             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2149         } else {
2150             ts->base_type = type;
2151             ts->type = type;
2152             ts->kind = TEMP_CONST;
2153             ts->temp_allocated = 1;
2154             ts->val = val;
2155             val_ptr = &ts->val;
2156         }
2157         g_hash_table_insert(h, val_ptr, ts);
2158     }
2159 
2160     return ts;
2161 }
2162 
2163 TCGv_i32 tcg_constant_i32(int32_t val)
2164 {
2165     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2166 }
2167 
2168 TCGv_i64 tcg_constant_i64(int64_t val)
2169 {
2170     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2171 }
2172 
2173 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2174 {
2175     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2176 }
2177 
2178 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2179 {
2180     val = dup_const(vece, val);
2181     return temp_tcgv_vec(tcg_constant_internal(type, val));
2182 }
2183 
2184 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2185 {
2186     TCGTemp *t = tcgv_vec_temp(match);
2187 
2188     tcg_debug_assert(t->temp_allocated != 0);
2189     return tcg_constant_vec(t->base_type, vece, val);
2190 }
2191 
2192 #ifdef CONFIG_DEBUG_TCG
2193 size_t temp_idx(TCGTemp *ts)
2194 {
2195     ptrdiff_t n = ts - tcg_ctx->temps;
2196     assert(n >= 0 && n < tcg_ctx->nb_temps);
2197     return n;
2198 }
2199 
2200 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2201 {
2202     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2203 
2204     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2205     assert(o % sizeof(TCGTemp) == 0);
2206 
2207     return (void *)tcg_ctx + (uintptr_t)v;
2208 }
2209 #endif /* CONFIG_DEBUG_TCG */
2210 
2211 /*
2212  * Return true if OP may appear in the opcode stream with TYPE.
2213  * Test the runtime variable that controls each opcode.
2214  */
2215 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2216 {
2217     bool has_type;
2218 
2219     switch (type) {
2220     case TCG_TYPE_I32:
2221         has_type = true;
2222         break;
2223     case TCG_TYPE_I64:
2224         has_type = TCG_TARGET_REG_BITS == 64;
2225         break;
2226     case TCG_TYPE_V64:
2227         has_type = TCG_TARGET_HAS_v64;
2228         break;
2229     case TCG_TYPE_V128:
2230         has_type = TCG_TARGET_HAS_v128;
2231         break;
2232     case TCG_TYPE_V256:
2233         has_type = TCG_TARGET_HAS_v256;
2234         break;
2235     default:
2236         has_type = false;
2237         break;
2238     }
2239 
2240     switch (op) {
2241     case INDEX_op_discard:
2242     case INDEX_op_set_label:
2243     case INDEX_op_call:
2244     case INDEX_op_br:
2245     case INDEX_op_mb:
2246     case INDEX_op_insn_start:
2247     case INDEX_op_exit_tb:
2248     case INDEX_op_goto_tb:
2249     case INDEX_op_goto_ptr:
2250     case INDEX_op_qemu_ld_i32:
2251     case INDEX_op_qemu_st_i32:
2252     case INDEX_op_qemu_ld_i64:
2253     case INDEX_op_qemu_st_i64:
2254         return true;
2255 
2256     case INDEX_op_qemu_st8_i32:
2257         return TCG_TARGET_HAS_qemu_st8_i32;
2258 
2259     case INDEX_op_qemu_ld_i128:
2260     case INDEX_op_qemu_st_i128:
2261         return TCG_TARGET_HAS_qemu_ldst_i128;
2262 
2263     case INDEX_op_add:
2264     case INDEX_op_and:
2265     case INDEX_op_mov:
2266     case INDEX_op_or:
2267     case INDEX_op_xor:
2268         return has_type;
2269 
2270     case INDEX_op_setcond_i32:
2271     case INDEX_op_brcond_i32:
2272     case INDEX_op_movcond_i32:
2273     case INDEX_op_ld8u_i32:
2274     case INDEX_op_ld8s_i32:
2275     case INDEX_op_ld16u_i32:
2276     case INDEX_op_ld16s_i32:
2277     case INDEX_op_ld_i32:
2278     case INDEX_op_st8_i32:
2279     case INDEX_op_st16_i32:
2280     case INDEX_op_st_i32:
2281     case INDEX_op_extract_i32:
2282     case INDEX_op_sextract_i32:
2283     case INDEX_op_deposit_i32:
2284         return true;
2285 
2286     case INDEX_op_negsetcond_i32:
2287         return TCG_TARGET_HAS_negsetcond_i32;
2288     case INDEX_op_extract2_i32:
2289         return TCG_TARGET_HAS_extract2_i32;
2290     case INDEX_op_add2_i32:
2291         return TCG_TARGET_HAS_add2_i32;
2292     case INDEX_op_sub2_i32:
2293         return TCG_TARGET_HAS_sub2_i32;
2294     case INDEX_op_bswap16_i32:
2295         return TCG_TARGET_HAS_bswap16_i32;
2296     case INDEX_op_bswap32_i32:
2297         return TCG_TARGET_HAS_bswap32_i32;
2298 
2299     case INDEX_op_brcond2_i32:
2300     case INDEX_op_setcond2_i32:
2301         return TCG_TARGET_REG_BITS == 32;
2302 
2303     case INDEX_op_setcond_i64:
2304     case INDEX_op_brcond_i64:
2305     case INDEX_op_movcond_i64:
2306     case INDEX_op_ld8u_i64:
2307     case INDEX_op_ld8s_i64:
2308     case INDEX_op_ld16u_i64:
2309     case INDEX_op_ld16s_i64:
2310     case INDEX_op_ld32u_i64:
2311     case INDEX_op_ld32s_i64:
2312     case INDEX_op_ld_i64:
2313     case INDEX_op_st8_i64:
2314     case INDEX_op_st16_i64:
2315     case INDEX_op_st32_i64:
2316     case INDEX_op_st_i64:
2317     case INDEX_op_ext_i32_i64:
2318     case INDEX_op_extu_i32_i64:
2319     case INDEX_op_extract_i64:
2320     case INDEX_op_sextract_i64:
2321     case INDEX_op_deposit_i64:
2322         return TCG_TARGET_REG_BITS == 64;
2323 
2324     case INDEX_op_negsetcond_i64:
2325         return TCG_TARGET_HAS_negsetcond_i64;
2326     case INDEX_op_extract2_i64:
2327         return TCG_TARGET_HAS_extract2_i64;
2328     case INDEX_op_extrl_i64_i32:
2329     case INDEX_op_extrh_i64_i32:
2330         return TCG_TARGET_HAS_extr_i64_i32;
2331     case INDEX_op_bswap16_i64:
2332         return TCG_TARGET_HAS_bswap16_i64;
2333     case INDEX_op_bswap32_i64:
2334         return TCG_TARGET_HAS_bswap32_i64;
2335     case INDEX_op_bswap64_i64:
2336         return TCG_TARGET_HAS_bswap64_i64;
2337     case INDEX_op_add2_i64:
2338         return TCG_TARGET_HAS_add2_i64;
2339     case INDEX_op_sub2_i64:
2340         return TCG_TARGET_HAS_sub2_i64;
2341 
2342     case INDEX_op_mov_vec:
2343     case INDEX_op_dup_vec:
2344     case INDEX_op_dupm_vec:
2345     case INDEX_op_ld_vec:
2346     case INDEX_op_st_vec:
2347     case INDEX_op_add_vec:
2348     case INDEX_op_sub_vec:
2349     case INDEX_op_and_vec:
2350     case INDEX_op_or_vec:
2351     case INDEX_op_xor_vec:
2352     case INDEX_op_cmp_vec:
2353         return has_type;
2354     case INDEX_op_dup2_vec:
2355         return has_type && TCG_TARGET_REG_BITS == 32;
2356     case INDEX_op_not_vec:
2357         return has_type && TCG_TARGET_HAS_not_vec;
2358     case INDEX_op_neg_vec:
2359         return has_type && TCG_TARGET_HAS_neg_vec;
2360     case INDEX_op_abs_vec:
2361         return has_type && TCG_TARGET_HAS_abs_vec;
2362     case INDEX_op_andc_vec:
2363         return has_type && TCG_TARGET_HAS_andc_vec;
2364     case INDEX_op_orc_vec:
2365         return has_type && TCG_TARGET_HAS_orc_vec;
2366     case INDEX_op_nand_vec:
2367         return has_type && TCG_TARGET_HAS_nand_vec;
2368     case INDEX_op_nor_vec:
2369         return has_type && TCG_TARGET_HAS_nor_vec;
2370     case INDEX_op_eqv_vec:
2371         return has_type && TCG_TARGET_HAS_eqv_vec;
2372     case INDEX_op_mul_vec:
2373         return has_type && TCG_TARGET_HAS_mul_vec;
2374     case INDEX_op_shli_vec:
2375     case INDEX_op_shri_vec:
2376     case INDEX_op_sari_vec:
2377         return has_type && TCG_TARGET_HAS_shi_vec;
2378     case INDEX_op_shls_vec:
2379     case INDEX_op_shrs_vec:
2380     case INDEX_op_sars_vec:
2381         return has_type && TCG_TARGET_HAS_shs_vec;
2382     case INDEX_op_shlv_vec:
2383     case INDEX_op_shrv_vec:
2384     case INDEX_op_sarv_vec:
2385         return has_type && TCG_TARGET_HAS_shv_vec;
2386     case INDEX_op_rotli_vec:
2387         return has_type && TCG_TARGET_HAS_roti_vec;
2388     case INDEX_op_rotls_vec:
2389         return has_type && TCG_TARGET_HAS_rots_vec;
2390     case INDEX_op_rotlv_vec:
2391     case INDEX_op_rotrv_vec:
2392         return has_type && TCG_TARGET_HAS_rotv_vec;
2393     case INDEX_op_ssadd_vec:
2394     case INDEX_op_usadd_vec:
2395     case INDEX_op_sssub_vec:
2396     case INDEX_op_ussub_vec:
2397         return has_type && TCG_TARGET_HAS_sat_vec;
2398     case INDEX_op_smin_vec:
2399     case INDEX_op_umin_vec:
2400     case INDEX_op_smax_vec:
2401     case INDEX_op_umax_vec:
2402         return has_type && TCG_TARGET_HAS_minmax_vec;
2403     case INDEX_op_bitsel_vec:
2404         return has_type && TCG_TARGET_HAS_bitsel_vec;
2405     case INDEX_op_cmpsel_vec:
2406         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2407 
2408     default:
2409         if (op < INDEX_op_last_generic) {
2410             const TCGOutOp *outop;
2411             TCGConstraintSetIndex con_set;
2412 
2413             if (!has_type) {
2414                 return false;
2415             }
2416 
2417             outop = all_outop[op];
2418             tcg_debug_assert(outop != NULL);
2419 
2420             con_set = outop->static_constraint;
2421             if (con_set == C_Dynamic) {
2422                 con_set = outop->dynamic_constraint(type, flags);
2423             }
2424             if (con_set >= 0) {
2425                 return true;
2426             }
2427             tcg_debug_assert(con_set == C_NotImplemented);
2428             return false;
2429         }
2430         tcg_debug_assert(op < NB_OPS);
2431         return true;
2432 
2433     case INDEX_op_last_generic:
2434         g_assert_not_reached();
2435     }
2436 }
2437 
2438 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2439 {
2440     unsigned width;
2441 
2442     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2443     width = (type == TCG_TYPE_I32 ? 32 : 64);
2444 
2445     tcg_debug_assert(ofs < width);
2446     tcg_debug_assert(len > 0);
2447     tcg_debug_assert(len <= width - ofs);
2448 
2449     return TCG_TARGET_deposit_valid(type, ofs, len);
2450 }
2451 
2452 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2453 
2454 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2455                           TCGTemp *ret, TCGTemp **args)
2456 {
2457     TCGv_i64 extend_free[MAX_CALL_IARGS];
2458     int n_extend = 0;
2459     TCGOp *op;
2460     int i, n, pi = 0, total_args;
2461 
2462     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2463         init_call_layout(info);
2464         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2465     }
2466 
2467     total_args = info->nr_out + info->nr_in + 2;
2468     op = tcg_op_alloc(INDEX_op_call, total_args);
2469 
2470 #ifdef CONFIG_PLUGIN
2471     /* Flag helpers that may affect guest state */
2472     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2473         tcg_ctx->plugin_insn->calls_helpers = true;
2474     }
2475 #endif
2476 
2477     TCGOP_CALLO(op) = n = info->nr_out;
2478     switch (n) {
2479     case 0:
2480         tcg_debug_assert(ret == NULL);
2481         break;
2482     case 1:
2483         tcg_debug_assert(ret != NULL);
2484         op->args[pi++] = temp_arg(ret);
2485         break;
2486     case 2:
2487     case 4:
2488         tcg_debug_assert(ret != NULL);
2489         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2490         tcg_debug_assert(ret->temp_subindex == 0);
2491         for (i = 0; i < n; ++i) {
2492             op->args[pi++] = temp_arg(ret + i);
2493         }
2494         break;
2495     default:
2496         g_assert_not_reached();
2497     }
2498 
2499     TCGOP_CALLI(op) = n = info->nr_in;
2500     for (i = 0; i < n; i++) {
2501         const TCGCallArgumentLoc *loc = &info->in[i];
2502         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2503 
2504         switch (loc->kind) {
2505         case TCG_CALL_ARG_NORMAL:
2506         case TCG_CALL_ARG_BY_REF:
2507         case TCG_CALL_ARG_BY_REF_N:
2508             op->args[pi++] = temp_arg(ts);
2509             break;
2510 
2511         case TCG_CALL_ARG_EXTEND_U:
2512         case TCG_CALL_ARG_EXTEND_S:
2513             {
2514                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2515                 TCGv_i32 orig = temp_tcgv_i32(ts);
2516 
2517                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2518                     tcg_gen_ext_i32_i64(temp, orig);
2519                 } else {
2520                     tcg_gen_extu_i32_i64(temp, orig);
2521                 }
2522                 op->args[pi++] = tcgv_i64_arg(temp);
2523                 extend_free[n_extend++] = temp;
2524             }
2525             break;
2526 
2527         default:
2528             g_assert_not_reached();
2529         }
2530     }
2531     op->args[pi++] = (uintptr_t)func;
2532     op->args[pi++] = (uintptr_t)info;
2533     tcg_debug_assert(pi == total_args);
2534 
2535     if (tcg_ctx->emit_before_op) {
2536         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2537     } else {
2538         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2539     }
2540 
2541     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2542     for (i = 0; i < n_extend; ++i) {
2543         tcg_temp_free_i64(extend_free[i]);
2544     }
2545 }
2546 
2547 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2548 {
2549     tcg_gen_callN(func, info, ret, NULL);
2550 }
2551 
2552 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2553 {
2554     tcg_gen_callN(func, info, ret, &t1);
2555 }
2556 
2557 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2558                    TCGTemp *t1, TCGTemp *t2)
2559 {
2560     TCGTemp *args[2] = { t1, t2 };
2561     tcg_gen_callN(func, info, ret, args);
2562 }
2563 
2564 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2565                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2566 {
2567     TCGTemp *args[3] = { t1, t2, t3 };
2568     tcg_gen_callN(func, info, ret, args);
2569 }
2570 
2571 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2572                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2573 {
2574     TCGTemp *args[4] = { t1, t2, t3, t4 };
2575     tcg_gen_callN(func, info, ret, args);
2576 }
2577 
2578 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2579                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2580 {
2581     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2582     tcg_gen_callN(func, info, ret, args);
2583 }
2584 
2585 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2586                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2587                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2588 {
2589     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2590     tcg_gen_callN(func, info, ret, args);
2591 }
2592 
2593 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2594                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2595                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2596 {
2597     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2598     tcg_gen_callN(func, info, ret, args);
2599 }
2600 
2601 static void tcg_reg_alloc_start(TCGContext *s)
2602 {
2603     int i, n;
2604 
2605     for (i = 0, n = s->nb_temps; i < n; i++) {
2606         TCGTemp *ts = &s->temps[i];
2607         TCGTempVal val = TEMP_VAL_MEM;
2608 
2609         switch (ts->kind) {
2610         case TEMP_CONST:
2611             val = TEMP_VAL_CONST;
2612             break;
2613         case TEMP_FIXED:
2614             val = TEMP_VAL_REG;
2615             break;
2616         case TEMP_GLOBAL:
2617             break;
2618         case TEMP_EBB:
2619             val = TEMP_VAL_DEAD;
2620             /* fall through */
2621         case TEMP_TB:
2622             ts->mem_allocated = 0;
2623             break;
2624         default:
2625             g_assert_not_reached();
2626         }
2627         ts->val_type = val;
2628     }
2629 
2630     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2631 }
2632 
2633 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2634                                  TCGTemp *ts)
2635 {
2636     int idx = temp_idx(ts);
2637 
2638     switch (ts->kind) {
2639     case TEMP_FIXED:
2640     case TEMP_GLOBAL:
2641         pstrcpy(buf, buf_size, ts->name);
2642         break;
2643     case TEMP_TB:
2644         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2645         break;
2646     case TEMP_EBB:
2647         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2648         break;
2649     case TEMP_CONST:
2650         switch (ts->type) {
2651         case TCG_TYPE_I32:
2652             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2653             break;
2654 #if TCG_TARGET_REG_BITS > 32
2655         case TCG_TYPE_I64:
2656             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2657             break;
2658 #endif
2659         case TCG_TYPE_V64:
2660         case TCG_TYPE_V128:
2661         case TCG_TYPE_V256:
2662             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2663                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2664             break;
2665         default:
2666             g_assert_not_reached();
2667         }
2668         break;
2669     }
2670     return buf;
2671 }
2672 
2673 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2674                              int buf_size, TCGArg arg)
2675 {
2676     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2677 }
2678 
2679 static const char * const cond_name[] =
2680 {
2681     [TCG_COND_NEVER] = "never",
2682     [TCG_COND_ALWAYS] = "always",
2683     [TCG_COND_EQ] = "eq",
2684     [TCG_COND_NE] = "ne",
2685     [TCG_COND_LT] = "lt",
2686     [TCG_COND_GE] = "ge",
2687     [TCG_COND_LE] = "le",
2688     [TCG_COND_GT] = "gt",
2689     [TCG_COND_LTU] = "ltu",
2690     [TCG_COND_GEU] = "geu",
2691     [TCG_COND_LEU] = "leu",
2692     [TCG_COND_GTU] = "gtu",
2693     [TCG_COND_TSTEQ] = "tsteq",
2694     [TCG_COND_TSTNE] = "tstne",
2695 };
2696 
2697 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2698 {
2699     [MO_UB]   = "ub",
2700     [MO_SB]   = "sb",
2701     [MO_LEUW] = "leuw",
2702     [MO_LESW] = "lesw",
2703     [MO_LEUL] = "leul",
2704     [MO_LESL] = "lesl",
2705     [MO_LEUQ] = "leq",
2706     [MO_BEUW] = "beuw",
2707     [MO_BESW] = "besw",
2708     [MO_BEUL] = "beul",
2709     [MO_BESL] = "besl",
2710     [MO_BEUQ] = "beq",
2711     [MO_128 + MO_BE] = "beo",
2712     [MO_128 + MO_LE] = "leo",
2713 };
2714 
2715 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2716     [MO_UNALN >> MO_ASHIFT]    = "un+",
2717     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2718     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2719     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2720     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2721     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2722     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2723     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2724 };
2725 
2726 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2727     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2728     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2729     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2730     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2731     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2732     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2733 };
2734 
2735 static const char bswap_flag_name[][6] = {
2736     [TCG_BSWAP_IZ] = "iz",
2737     [TCG_BSWAP_OZ] = "oz",
2738     [TCG_BSWAP_OS] = "os",
2739     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2740     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2741 };
2742 
2743 #ifdef CONFIG_PLUGIN
2744 static const char * const plugin_from_name[] = {
2745     "from-tb",
2746     "from-insn",
2747     "after-insn",
2748     "after-tb",
2749 };
2750 #endif
2751 
2752 static inline bool tcg_regset_single(TCGRegSet d)
2753 {
2754     return (d & (d - 1)) == 0;
2755 }
2756 
2757 static inline TCGReg tcg_regset_first(TCGRegSet d)
2758 {
2759     if (TCG_TARGET_NB_REGS <= 32) {
2760         return ctz32(d);
2761     } else {
2762         return ctz64(d);
2763     }
2764 }
2765 
2766 /* Return only the number of characters output -- no error return. */
2767 #define ne_fprintf(...) \
2768     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2769 
2770 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2771 {
2772     char buf[128];
2773     TCGOp *op;
2774 
2775     QTAILQ_FOREACH(op, &s->ops, link) {
2776         int i, k, nb_oargs, nb_iargs, nb_cargs;
2777         const TCGOpDef *def;
2778         TCGOpcode c;
2779         int col = 0;
2780 
2781         c = op->opc;
2782         def = &tcg_op_defs[c];
2783 
2784         if (c == INDEX_op_insn_start) {
2785             nb_oargs = 0;
2786             col += ne_fprintf(f, "\n ----");
2787 
2788             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2789                 col += ne_fprintf(f, " %016" PRIx64,
2790                                   tcg_get_insn_start_param(op, i));
2791             }
2792         } else if (c == INDEX_op_call) {
2793             const TCGHelperInfo *info = tcg_call_info(op);
2794             void *func = tcg_call_func(op);
2795 
2796             /* variable number of arguments */
2797             nb_oargs = TCGOP_CALLO(op);
2798             nb_iargs = TCGOP_CALLI(op);
2799             nb_cargs = def->nb_cargs;
2800 
2801             col += ne_fprintf(f, " %s ", def->name);
2802 
2803             /*
2804              * Print the function name from TCGHelperInfo, if available.
2805              * Note that plugins have a template function for the info,
2806              * but the actual function pointer comes from the plugin.
2807              */
2808             if (func == info->func) {
2809                 col += ne_fprintf(f, "%s", info->name);
2810             } else {
2811                 col += ne_fprintf(f, "plugin(%p)", func);
2812             }
2813 
2814             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2815             for (i = 0; i < nb_oargs; i++) {
2816                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2817                                                             op->args[i]));
2818             }
2819             for (i = 0; i < nb_iargs; i++) {
2820                 TCGArg arg = op->args[nb_oargs + i];
2821                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2822                 col += ne_fprintf(f, ",%s", t);
2823             }
2824         } else {
2825             if (def->flags & TCG_OPF_INT) {
2826                 col += ne_fprintf(f, " %s_i%d ",
2827                                   def->name,
2828                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2829             } else if (def->flags & TCG_OPF_VECTOR) {
2830                 col += ne_fprintf(f, "%s v%d,e%d,",
2831                                   def->name,
2832                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2833                                   8 << TCGOP_VECE(op));
2834             } else {
2835                 col += ne_fprintf(f, " %s ", def->name);
2836             }
2837 
2838             nb_oargs = def->nb_oargs;
2839             nb_iargs = def->nb_iargs;
2840             nb_cargs = def->nb_cargs;
2841 
2842             k = 0;
2843             for (i = 0; i < nb_oargs; i++) {
2844                 const char *sep =  k ? "," : "";
2845                 col += ne_fprintf(f, "%s%s", sep,
2846                                   tcg_get_arg_str(s, buf, sizeof(buf),
2847                                                   op->args[k++]));
2848             }
2849             for (i = 0; i < nb_iargs; i++) {
2850                 const char *sep =  k ? "," : "";
2851                 col += ne_fprintf(f, "%s%s", sep,
2852                                   tcg_get_arg_str(s, buf, sizeof(buf),
2853                                                   op->args[k++]));
2854             }
2855             switch (c) {
2856             case INDEX_op_brcond_i32:
2857             case INDEX_op_setcond_i32:
2858             case INDEX_op_negsetcond_i32:
2859             case INDEX_op_movcond_i32:
2860             case INDEX_op_brcond2_i32:
2861             case INDEX_op_setcond2_i32:
2862             case INDEX_op_brcond_i64:
2863             case INDEX_op_setcond_i64:
2864             case INDEX_op_negsetcond_i64:
2865             case INDEX_op_movcond_i64:
2866             case INDEX_op_cmp_vec:
2867             case INDEX_op_cmpsel_vec:
2868                 if (op->args[k] < ARRAY_SIZE(cond_name)
2869                     && cond_name[op->args[k]]) {
2870                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2871                 } else {
2872                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2873                 }
2874                 i = 1;
2875                 break;
2876             case INDEX_op_qemu_ld_i32:
2877             case INDEX_op_qemu_st_i32:
2878             case INDEX_op_qemu_st8_i32:
2879             case INDEX_op_qemu_ld_i64:
2880             case INDEX_op_qemu_st_i64:
2881             case INDEX_op_qemu_ld_i128:
2882             case INDEX_op_qemu_st_i128:
2883                 {
2884                     const char *s_al, *s_op, *s_at;
2885                     MemOpIdx oi = op->args[k++];
2886                     MemOp mop = get_memop(oi);
2887                     unsigned ix = get_mmuidx(oi);
2888 
2889                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2890                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2891                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2892                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2893 
2894                     /* If all fields are accounted for, print symbolically. */
2895                     if (!mop && s_al && s_op && s_at) {
2896                         col += ne_fprintf(f, ",%s%s%s,%u",
2897                                           s_at, s_al, s_op, ix);
2898                     } else {
2899                         mop = get_memop(oi);
2900                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2901                     }
2902                     i = 1;
2903                 }
2904                 break;
2905             case INDEX_op_bswap16_i32:
2906             case INDEX_op_bswap16_i64:
2907             case INDEX_op_bswap32_i32:
2908             case INDEX_op_bswap32_i64:
2909             case INDEX_op_bswap64_i64:
2910                 {
2911                     TCGArg flags = op->args[k];
2912                     const char *name = NULL;
2913 
2914                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2915                         name = bswap_flag_name[flags];
2916                     }
2917                     if (name) {
2918                         col += ne_fprintf(f, ",%s", name);
2919                     } else {
2920                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2921                     }
2922                     i = k = 1;
2923                 }
2924                 break;
2925 #ifdef CONFIG_PLUGIN
2926             case INDEX_op_plugin_cb:
2927                 {
2928                     TCGArg from = op->args[k++];
2929                     const char *name = NULL;
2930 
2931                     if (from < ARRAY_SIZE(plugin_from_name)) {
2932                         name = plugin_from_name[from];
2933                     }
2934                     if (name) {
2935                         col += ne_fprintf(f, "%s", name);
2936                     } else {
2937                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2938                     }
2939                     i = 1;
2940                 }
2941                 break;
2942 #endif
2943             default:
2944                 i = 0;
2945                 break;
2946             }
2947             switch (c) {
2948             case INDEX_op_set_label:
2949             case INDEX_op_br:
2950             case INDEX_op_brcond_i32:
2951             case INDEX_op_brcond_i64:
2952             case INDEX_op_brcond2_i32:
2953                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2954                                   arg_label(op->args[k])->id);
2955                 i++, k++;
2956                 break;
2957             case INDEX_op_mb:
2958                 {
2959                     TCGBar membar = op->args[k];
2960                     const char *b_op, *m_op;
2961 
2962                     switch (membar & TCG_BAR_SC) {
2963                     case 0:
2964                         b_op = "none";
2965                         break;
2966                     case TCG_BAR_LDAQ:
2967                         b_op = "acq";
2968                         break;
2969                     case TCG_BAR_STRL:
2970                         b_op = "rel";
2971                         break;
2972                     case TCG_BAR_SC:
2973                         b_op = "seq";
2974                         break;
2975                     default:
2976                         g_assert_not_reached();
2977                     }
2978 
2979                     switch (membar & TCG_MO_ALL) {
2980                     case 0:
2981                         m_op = "none";
2982                         break;
2983                     case TCG_MO_LD_LD:
2984                         m_op = "rr";
2985                         break;
2986                     case TCG_MO_LD_ST:
2987                         m_op = "rw";
2988                         break;
2989                     case TCG_MO_ST_LD:
2990                         m_op = "wr";
2991                         break;
2992                     case TCG_MO_ST_ST:
2993                         m_op = "ww";
2994                         break;
2995                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2996                         m_op = "rr+rw";
2997                         break;
2998                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2999                         m_op = "rr+wr";
3000                         break;
3001                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3002                         m_op = "rr+ww";
3003                         break;
3004                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3005                         m_op = "rw+wr";
3006                         break;
3007                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3008                         m_op = "rw+ww";
3009                         break;
3010                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3011                         m_op = "wr+ww";
3012                         break;
3013                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3014                         m_op = "rr+rw+wr";
3015                         break;
3016                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3017                         m_op = "rr+rw+ww";
3018                         break;
3019                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3020                         m_op = "rr+wr+ww";
3021                         break;
3022                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3023                         m_op = "rw+wr+ww";
3024                         break;
3025                     case TCG_MO_ALL:
3026                         m_op = "all";
3027                         break;
3028                     default:
3029                         g_assert_not_reached();
3030                     }
3031 
3032                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3033                     i++, k++;
3034                 }
3035                 break;
3036             default:
3037                 break;
3038             }
3039             for (; i < nb_cargs; i++, k++) {
3040                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3041                                   op->args[k]);
3042             }
3043         }
3044 
3045         if (have_prefs || op->life) {
3046             for (; col < 40; ++col) {
3047                 putc(' ', f);
3048             }
3049         }
3050 
3051         if (op->life) {
3052             unsigned life = op->life;
3053 
3054             if (life & (SYNC_ARG * 3)) {
3055                 ne_fprintf(f, "  sync:");
3056                 for (i = 0; i < 2; ++i) {
3057                     if (life & (SYNC_ARG << i)) {
3058                         ne_fprintf(f, " %d", i);
3059                     }
3060                 }
3061             }
3062             life /= DEAD_ARG;
3063             if (life) {
3064                 ne_fprintf(f, "  dead:");
3065                 for (i = 0; life; ++i, life >>= 1) {
3066                     if (life & 1) {
3067                         ne_fprintf(f, " %d", i);
3068                     }
3069                 }
3070             }
3071         }
3072 
3073         if (have_prefs) {
3074             for (i = 0; i < nb_oargs; ++i) {
3075                 TCGRegSet set = output_pref(op, i);
3076 
3077                 if (i == 0) {
3078                     ne_fprintf(f, "  pref=");
3079                 } else {
3080                     ne_fprintf(f, ",");
3081                 }
3082                 if (set == 0) {
3083                     ne_fprintf(f, "none");
3084                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3085                     ne_fprintf(f, "all");
3086 #ifdef CONFIG_DEBUG_TCG
3087                 } else if (tcg_regset_single(set)) {
3088                     TCGReg reg = tcg_regset_first(set);
3089                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3090 #endif
3091                 } else if (TCG_TARGET_NB_REGS <= 32) {
3092                     ne_fprintf(f, "0x%x", (uint32_t)set);
3093                 } else {
3094                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3095                 }
3096             }
3097         }
3098 
3099         putc('\n', f);
3100     }
3101 }
3102 
3103 /* we give more priority to constraints with less registers */
3104 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3105 {
3106     int n;
3107 
3108     arg_ct += k;
3109     n = ctpop64(arg_ct->regs);
3110 
3111     /*
3112      * Sort constraints of a single register first, which includes output
3113      * aliases (which must exactly match the input already allocated).
3114      */
3115     if (n == 1 || arg_ct->oalias) {
3116         return INT_MAX;
3117     }
3118 
3119     /*
3120      * Sort register pairs next, first then second immediately after.
3121      * Arbitrarily sort multiple pairs by the index of the first reg;
3122      * there shouldn't be many pairs.
3123      */
3124     switch (arg_ct->pair) {
3125     case 1:
3126     case 3:
3127         return (k + 1) * 2;
3128     case 2:
3129         return (arg_ct->pair_index + 1) * 2 - 1;
3130     }
3131 
3132     /* Finally, sort by decreasing register count. */
3133     assert(n > 1);
3134     return -n;
3135 }
3136 
3137 /* sort from highest priority to lowest */
3138 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3139 {
3140     int i, j;
3141 
3142     for (i = 0; i < n; i++) {
3143         a[start + i].sort_index = start + i;
3144     }
3145     if (n <= 1) {
3146         return;
3147     }
3148     for (i = 0; i < n - 1; i++) {
3149         for (j = i + 1; j < n; j++) {
3150             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3151             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3152             if (p1 < p2) {
3153                 int tmp = a[start + i].sort_index;
3154                 a[start + i].sort_index = a[start + j].sort_index;
3155                 a[start + j].sort_index = tmp;
3156             }
3157         }
3158     }
3159 }
3160 
3161 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3162 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3163 
3164 static void process_constraint_sets(void)
3165 {
3166     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3167         const TCGConstraintSet *tdefs = &constraint_sets[c];
3168         TCGArgConstraint *args_ct = all_cts[c];
3169         int nb_oargs = tdefs->nb_oargs;
3170         int nb_iargs = tdefs->nb_iargs;
3171         int nb_args = nb_oargs + nb_iargs;
3172         bool saw_alias_pair = false;
3173 
3174         for (int i = 0; i < nb_args; i++) {
3175             const char *ct_str = tdefs->args_ct_str[i];
3176             bool input_p = i >= nb_oargs;
3177             int o;
3178 
3179             switch (*ct_str) {
3180             case '0' ... '9':
3181                 o = *ct_str - '0';
3182                 tcg_debug_assert(input_p);
3183                 tcg_debug_assert(o < nb_oargs);
3184                 tcg_debug_assert(args_ct[o].regs != 0);
3185                 tcg_debug_assert(!args_ct[o].oalias);
3186                 args_ct[i] = args_ct[o];
3187                 /* The output sets oalias.  */
3188                 args_ct[o].oalias = 1;
3189                 args_ct[o].alias_index = i;
3190                 /* The input sets ialias. */
3191                 args_ct[i].ialias = 1;
3192                 args_ct[i].alias_index = o;
3193                 if (args_ct[i].pair) {
3194                     saw_alias_pair = true;
3195                 }
3196                 tcg_debug_assert(ct_str[1] == '\0');
3197                 continue;
3198 
3199             case '&':
3200                 tcg_debug_assert(!input_p);
3201                 args_ct[i].newreg = true;
3202                 ct_str++;
3203                 break;
3204 
3205             case 'p': /* plus */
3206                 /* Allocate to the register after the previous. */
3207                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3208                 o = i - 1;
3209                 tcg_debug_assert(!args_ct[o].pair);
3210                 tcg_debug_assert(!args_ct[o].ct);
3211                 args_ct[i] = (TCGArgConstraint){
3212                     .pair = 2,
3213                     .pair_index = o,
3214                     .regs = args_ct[o].regs << 1,
3215                     .newreg = args_ct[o].newreg,
3216                 };
3217                 args_ct[o].pair = 1;
3218                 args_ct[o].pair_index = i;
3219                 tcg_debug_assert(ct_str[1] == '\0');
3220                 continue;
3221 
3222             case 'm': /* minus */
3223                 /* Allocate to the register before the previous. */
3224                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3225                 o = i - 1;
3226                 tcg_debug_assert(!args_ct[o].pair);
3227                 tcg_debug_assert(!args_ct[o].ct);
3228                 args_ct[i] = (TCGArgConstraint){
3229                     .pair = 1,
3230                     .pair_index = o,
3231                     .regs = args_ct[o].regs >> 1,
3232                     .newreg = args_ct[o].newreg,
3233                 };
3234                 args_ct[o].pair = 2;
3235                 args_ct[o].pair_index = i;
3236                 tcg_debug_assert(ct_str[1] == '\0');
3237                 continue;
3238             }
3239 
3240             do {
3241                 switch (*ct_str) {
3242                 case 'i':
3243                     args_ct[i].ct |= TCG_CT_CONST;
3244                     break;
3245 #ifdef TCG_REG_ZERO
3246                 case 'z':
3247                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3248                     break;
3249 #endif
3250 
3251                 /* Include all of the target-specific constraints. */
3252 
3253 #undef CONST
3254 #define CONST(CASE, MASK) \
3255     case CASE: args_ct[i].ct |= MASK; break;
3256 #define REGS(CASE, MASK) \
3257     case CASE: args_ct[i].regs |= MASK; break;
3258 
3259 #include "tcg-target-con-str.h"
3260 
3261 #undef REGS
3262 #undef CONST
3263                 default:
3264                 case '0' ... '9':
3265                 case '&':
3266                 case 'p':
3267                 case 'm':
3268                     /* Typo in TCGConstraintSet constraint. */
3269                     g_assert_not_reached();
3270                 }
3271             } while (*++ct_str != '\0');
3272         }
3273 
3274         /*
3275          * Fix up output pairs that are aliased with inputs.
3276          * When we created the alias, we copied pair from the output.
3277          * There are three cases:
3278          *    (1a) Pairs of inputs alias pairs of outputs.
3279          *    (1b) One input aliases the first of a pair of outputs.
3280          *    (2)  One input aliases the second of a pair of outputs.
3281          *
3282          * Case 1a is handled by making sure that the pair_index'es are
3283          * properly updated so that they appear the same as a pair of inputs.
3284          *
3285          * Case 1b is handled by setting the pair_index of the input to
3286          * itself, simply so it doesn't point to an unrelated argument.
3287          * Since we don't encounter the "second" during the input allocation
3288          * phase, nothing happens with the second half of the input pair.
3289          *
3290          * Case 2 is handled by setting the second input to pair=3, the
3291          * first output to pair=3, and the pair_index'es to match.
3292          */
3293         if (saw_alias_pair) {
3294             for (int i = nb_oargs; i < nb_args; i++) {
3295                 int o, o2, i2;
3296 
3297                 /*
3298                  * Since [0-9pm] must be alone in the constraint string,
3299                  * the only way they can both be set is if the pair comes
3300                  * from the output alias.
3301                  */
3302                 if (!args_ct[i].ialias) {
3303                     continue;
3304                 }
3305                 switch (args_ct[i].pair) {
3306                 case 0:
3307                     break;
3308                 case 1:
3309                     o = args_ct[i].alias_index;
3310                     o2 = args_ct[o].pair_index;
3311                     tcg_debug_assert(args_ct[o].pair == 1);
3312                     tcg_debug_assert(args_ct[o2].pair == 2);
3313                     if (args_ct[o2].oalias) {
3314                         /* Case 1a */
3315                         i2 = args_ct[o2].alias_index;
3316                         tcg_debug_assert(args_ct[i2].pair == 2);
3317                         args_ct[i2].pair_index = i;
3318                         args_ct[i].pair_index = i2;
3319                     } else {
3320                         /* Case 1b */
3321                         args_ct[i].pair_index = i;
3322                     }
3323                     break;
3324                 case 2:
3325                     o = args_ct[i].alias_index;
3326                     o2 = args_ct[o].pair_index;
3327                     tcg_debug_assert(args_ct[o].pair == 2);
3328                     tcg_debug_assert(args_ct[o2].pair == 1);
3329                     if (args_ct[o2].oalias) {
3330                         /* Case 1a */
3331                         i2 = args_ct[o2].alias_index;
3332                         tcg_debug_assert(args_ct[i2].pair == 1);
3333                         args_ct[i2].pair_index = i;
3334                         args_ct[i].pair_index = i2;
3335                     } else {
3336                         /* Case 2 */
3337                         args_ct[i].pair = 3;
3338                         args_ct[o2].pair = 3;
3339                         args_ct[i].pair_index = o2;
3340                         args_ct[o2].pair_index = i;
3341                     }
3342                     break;
3343                 default:
3344                     g_assert_not_reached();
3345                 }
3346             }
3347         }
3348 
3349         /* sort the constraints (XXX: this is just an heuristic) */
3350         sort_constraints(args_ct, 0, nb_oargs);
3351         sort_constraints(args_ct, nb_oargs, nb_iargs);
3352     }
3353 }
3354 
3355 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3356 {
3357     TCGOpcode opc = op->opc;
3358     TCGType type = TCGOP_TYPE(op);
3359     unsigned flags = TCGOP_FLAGS(op);
3360     const TCGOpDef *def = &tcg_op_defs[opc];
3361     const TCGOutOp *outop = all_outop[opc];
3362     TCGConstraintSetIndex con_set;
3363 
3364     if (def->flags & TCG_OPF_NOT_PRESENT) {
3365         return empty_cts;
3366     }
3367 
3368     if (outop) {
3369         con_set = outop->static_constraint;
3370         if (con_set == C_Dynamic) {
3371             con_set = outop->dynamic_constraint(type, flags);
3372         }
3373     } else {
3374         con_set = tcg_target_op_def(opc, type, flags);
3375     }
3376     tcg_debug_assert(con_set >= 0);
3377     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3378 
3379     /* The constraint arguments must match TCGOpcode arguments. */
3380     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3381     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3382 
3383     return all_cts[con_set];
3384 }
3385 
3386 static void remove_label_use(TCGOp *op, int idx)
3387 {
3388     TCGLabel *label = arg_label(op->args[idx]);
3389     TCGLabelUse *use;
3390 
3391     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3392         if (use->op == op) {
3393             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3394             return;
3395         }
3396     }
3397     g_assert_not_reached();
3398 }
3399 
3400 void tcg_op_remove(TCGContext *s, TCGOp *op)
3401 {
3402     switch (op->opc) {
3403     case INDEX_op_br:
3404         remove_label_use(op, 0);
3405         break;
3406     case INDEX_op_brcond_i32:
3407     case INDEX_op_brcond_i64:
3408         remove_label_use(op, 3);
3409         break;
3410     case INDEX_op_brcond2_i32:
3411         remove_label_use(op, 5);
3412         break;
3413     default:
3414         break;
3415     }
3416 
3417     QTAILQ_REMOVE(&s->ops, op, link);
3418     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3419     s->nb_ops--;
3420 }
3421 
3422 void tcg_remove_ops_after(TCGOp *op)
3423 {
3424     TCGContext *s = tcg_ctx;
3425 
3426     while (true) {
3427         TCGOp *last = tcg_last_op();
3428         if (last == op) {
3429             return;
3430         }
3431         tcg_op_remove(s, last);
3432     }
3433 }
3434 
3435 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3436 {
3437     TCGContext *s = tcg_ctx;
3438     TCGOp *op = NULL;
3439 
3440     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3441         QTAILQ_FOREACH(op, &s->free_ops, link) {
3442             if (nargs <= op->nargs) {
3443                 QTAILQ_REMOVE(&s->free_ops, op, link);
3444                 nargs = op->nargs;
3445                 goto found;
3446             }
3447         }
3448     }
3449 
3450     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3451     nargs = MAX(4, nargs);
3452     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3453 
3454  found:
3455     memset(op, 0, offsetof(TCGOp, link));
3456     op->opc = opc;
3457     op->nargs = nargs;
3458 
3459     /* Check for bitfield overflow. */
3460     tcg_debug_assert(op->nargs == nargs);
3461 
3462     s->nb_ops++;
3463     return op;
3464 }
3465 
3466 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3467 {
3468     TCGOp *op = tcg_op_alloc(opc, nargs);
3469 
3470     if (tcg_ctx->emit_before_op) {
3471         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3472     } else {
3473         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3474     }
3475     return op;
3476 }
3477 
3478 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3479                             TCGOpcode opc, TCGType type, unsigned nargs)
3480 {
3481     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3482 
3483     TCGOP_TYPE(new_op) = type;
3484     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3485     return new_op;
3486 }
3487 
3488 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3489                            TCGOpcode opc, TCGType type, unsigned nargs)
3490 {
3491     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3492 
3493     TCGOP_TYPE(new_op) = type;
3494     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3495     return new_op;
3496 }
3497 
3498 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3499 {
3500     TCGLabelUse *u;
3501 
3502     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3503         TCGOp *op = u->op;
3504         switch (op->opc) {
3505         case INDEX_op_br:
3506             op->args[0] = label_arg(to);
3507             break;
3508         case INDEX_op_brcond_i32:
3509         case INDEX_op_brcond_i64:
3510             op->args[3] = label_arg(to);
3511             break;
3512         case INDEX_op_brcond2_i32:
3513             op->args[5] = label_arg(to);
3514             break;
3515         default:
3516             g_assert_not_reached();
3517         }
3518     }
3519 
3520     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3521 }
3522 
3523 /* Reachable analysis : remove unreachable code.  */
3524 static void __attribute__((noinline))
3525 reachable_code_pass(TCGContext *s)
3526 {
3527     TCGOp *op, *op_next, *op_prev;
3528     bool dead = false;
3529 
3530     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3531         bool remove = dead;
3532         TCGLabel *label;
3533 
3534         switch (op->opc) {
3535         case INDEX_op_set_label:
3536             label = arg_label(op->args[0]);
3537 
3538             /*
3539              * Note that the first op in the TB is always a load,
3540              * so there is always something before a label.
3541              */
3542             op_prev = QTAILQ_PREV(op, link);
3543 
3544             /*
3545              * If we find two sequential labels, move all branches to
3546              * reference the second label and remove the first label.
3547              * Do this before branch to next optimization, so that the
3548              * middle label is out of the way.
3549              */
3550             if (op_prev->opc == INDEX_op_set_label) {
3551                 move_label_uses(label, arg_label(op_prev->args[0]));
3552                 tcg_op_remove(s, op_prev);
3553                 op_prev = QTAILQ_PREV(op, link);
3554             }
3555 
3556             /*
3557              * Optimization can fold conditional branches to unconditional.
3558              * If we find a label which is preceded by an unconditional
3559              * branch to next, remove the branch.  We couldn't do this when
3560              * processing the branch because any dead code between the branch
3561              * and label had not yet been removed.
3562              */
3563             if (op_prev->opc == INDEX_op_br &&
3564                 label == arg_label(op_prev->args[0])) {
3565                 tcg_op_remove(s, op_prev);
3566                 /* Fall through means insns become live again.  */
3567                 dead = false;
3568             }
3569 
3570             if (QSIMPLEQ_EMPTY(&label->branches)) {
3571                 /*
3572                  * While there is an occasional backward branch, virtually
3573                  * all branches generated by the translators are forward.
3574                  * Which means that generally we will have already removed
3575                  * all references to the label that will be, and there is
3576                  * little to be gained by iterating.
3577                  */
3578                 remove = true;
3579             } else {
3580                 /* Once we see a label, insns become live again.  */
3581                 dead = false;
3582                 remove = false;
3583             }
3584             break;
3585 
3586         case INDEX_op_br:
3587         case INDEX_op_exit_tb:
3588         case INDEX_op_goto_ptr:
3589             /* Unconditional branches; everything following is dead.  */
3590             dead = true;
3591             break;
3592 
3593         case INDEX_op_call:
3594             /* Notice noreturn helper calls, raising exceptions.  */
3595             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3596                 dead = true;
3597             }
3598             break;
3599 
3600         case INDEX_op_insn_start:
3601             /* Never remove -- we need to keep these for unwind.  */
3602             remove = false;
3603             break;
3604 
3605         default:
3606             break;
3607         }
3608 
3609         if (remove) {
3610             tcg_op_remove(s, op);
3611         }
3612     }
3613 }
3614 
3615 #define TS_DEAD  1
3616 #define TS_MEM   2
3617 
3618 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3619 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3620 
3621 /* For liveness_pass_1, the register preferences for a given temp.  */
3622 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3623 {
3624     return ts->state_ptr;
3625 }
3626 
3627 /* For liveness_pass_1, reset the preferences for a given temp to the
3628  * maximal regset for its type.
3629  */
3630 static inline void la_reset_pref(TCGTemp *ts)
3631 {
3632     *la_temp_pref(ts)
3633         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3634 }
3635 
3636 /* liveness analysis: end of function: all temps are dead, and globals
3637    should be in memory. */
3638 static void la_func_end(TCGContext *s, int ng, int nt)
3639 {
3640     int i;
3641 
3642     for (i = 0; i < ng; ++i) {
3643         s->temps[i].state = TS_DEAD | TS_MEM;
3644         la_reset_pref(&s->temps[i]);
3645     }
3646     for (i = ng; i < nt; ++i) {
3647         s->temps[i].state = TS_DEAD;
3648         la_reset_pref(&s->temps[i]);
3649     }
3650 }
3651 
3652 /* liveness analysis: end of basic block: all temps are dead, globals
3653    and local temps should be in memory. */
3654 static void la_bb_end(TCGContext *s, int ng, int nt)
3655 {
3656     int i;
3657 
3658     for (i = 0; i < nt; ++i) {
3659         TCGTemp *ts = &s->temps[i];
3660         int state;
3661 
3662         switch (ts->kind) {
3663         case TEMP_FIXED:
3664         case TEMP_GLOBAL:
3665         case TEMP_TB:
3666             state = TS_DEAD | TS_MEM;
3667             break;
3668         case TEMP_EBB:
3669         case TEMP_CONST:
3670             state = TS_DEAD;
3671             break;
3672         default:
3673             g_assert_not_reached();
3674         }
3675         ts->state = state;
3676         la_reset_pref(ts);
3677     }
3678 }
3679 
3680 /* liveness analysis: sync globals back to memory.  */
3681 static void la_global_sync(TCGContext *s, int ng)
3682 {
3683     int i;
3684 
3685     for (i = 0; i < ng; ++i) {
3686         int state = s->temps[i].state;
3687         s->temps[i].state = state | TS_MEM;
3688         if (state == TS_DEAD) {
3689             /* If the global was previously dead, reset prefs.  */
3690             la_reset_pref(&s->temps[i]);
3691         }
3692     }
3693 }
3694 
3695 /*
3696  * liveness analysis: conditional branch: all temps are dead unless
3697  * explicitly live-across-conditional-branch, globals and local temps
3698  * should be synced.
3699  */
3700 static void la_bb_sync(TCGContext *s, int ng, int nt)
3701 {
3702     la_global_sync(s, ng);
3703 
3704     for (int i = ng; i < nt; ++i) {
3705         TCGTemp *ts = &s->temps[i];
3706         int state;
3707 
3708         switch (ts->kind) {
3709         case TEMP_TB:
3710             state = ts->state;
3711             ts->state = state | TS_MEM;
3712             if (state != TS_DEAD) {
3713                 continue;
3714             }
3715             break;
3716         case TEMP_EBB:
3717         case TEMP_CONST:
3718             continue;
3719         default:
3720             g_assert_not_reached();
3721         }
3722         la_reset_pref(&s->temps[i]);
3723     }
3724 }
3725 
3726 /* liveness analysis: sync globals back to memory and kill.  */
3727 static void la_global_kill(TCGContext *s, int ng)
3728 {
3729     int i;
3730 
3731     for (i = 0; i < ng; i++) {
3732         s->temps[i].state = TS_DEAD | TS_MEM;
3733         la_reset_pref(&s->temps[i]);
3734     }
3735 }
3736 
3737 /* liveness analysis: note live globals crossing calls.  */
3738 static void la_cross_call(TCGContext *s, int nt)
3739 {
3740     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3741     int i;
3742 
3743     for (i = 0; i < nt; i++) {
3744         TCGTemp *ts = &s->temps[i];
3745         if (!(ts->state & TS_DEAD)) {
3746             TCGRegSet *pset = la_temp_pref(ts);
3747             TCGRegSet set = *pset;
3748 
3749             set &= mask;
3750             /* If the combination is not possible, restart.  */
3751             if (set == 0) {
3752                 set = tcg_target_available_regs[ts->type] & mask;
3753             }
3754             *pset = set;
3755         }
3756     }
3757 }
3758 
3759 /*
3760  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3761  * to TEMP_EBB, if possible.
3762  */
3763 static void __attribute__((noinline))
3764 liveness_pass_0(TCGContext *s)
3765 {
3766     void * const multiple_ebb = (void *)(uintptr_t)-1;
3767     int nb_temps = s->nb_temps;
3768     TCGOp *op, *ebb;
3769 
3770     for (int i = s->nb_globals; i < nb_temps; ++i) {
3771         s->temps[i].state_ptr = NULL;
3772     }
3773 
3774     /*
3775      * Represent each EBB by the op at which it begins.  In the case of
3776      * the first EBB, this is the first op, otherwise it is a label.
3777      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3778      * within a single EBB, else MULTIPLE_EBB.
3779      */
3780     ebb = QTAILQ_FIRST(&s->ops);
3781     QTAILQ_FOREACH(op, &s->ops, link) {
3782         const TCGOpDef *def;
3783         int nb_oargs, nb_iargs;
3784 
3785         switch (op->opc) {
3786         case INDEX_op_set_label:
3787             ebb = op;
3788             continue;
3789         case INDEX_op_discard:
3790             continue;
3791         case INDEX_op_call:
3792             nb_oargs = TCGOP_CALLO(op);
3793             nb_iargs = TCGOP_CALLI(op);
3794             break;
3795         default:
3796             def = &tcg_op_defs[op->opc];
3797             nb_oargs = def->nb_oargs;
3798             nb_iargs = def->nb_iargs;
3799             break;
3800         }
3801 
3802         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3803             TCGTemp *ts = arg_temp(op->args[i]);
3804 
3805             if (ts->kind != TEMP_TB) {
3806                 continue;
3807             }
3808             if (ts->state_ptr == NULL) {
3809                 ts->state_ptr = ebb;
3810             } else if (ts->state_ptr != ebb) {
3811                 ts->state_ptr = multiple_ebb;
3812             }
3813         }
3814     }
3815 
3816     /*
3817      * For TEMP_TB that turned out not to be used beyond one EBB,
3818      * reduce the liveness to TEMP_EBB.
3819      */
3820     for (int i = s->nb_globals; i < nb_temps; ++i) {
3821         TCGTemp *ts = &s->temps[i];
3822         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3823             ts->kind = TEMP_EBB;
3824         }
3825     }
3826 }
3827 
3828 /* Liveness analysis : update the opc_arg_life array to tell if a
3829    given input arguments is dead. Instructions updating dead
3830    temporaries are removed. */
3831 static void __attribute__((noinline))
3832 liveness_pass_1(TCGContext *s)
3833 {
3834     int nb_globals = s->nb_globals;
3835     int nb_temps = s->nb_temps;
3836     TCGOp *op, *op_prev;
3837     TCGRegSet *prefs;
3838     int i;
3839 
3840     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3841     for (i = 0; i < nb_temps; ++i) {
3842         s->temps[i].state_ptr = prefs + i;
3843     }
3844 
3845     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3846     la_func_end(s, nb_globals, nb_temps);
3847 
3848     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3849         int nb_iargs, nb_oargs;
3850         TCGOpcode opc_new, opc_new2;
3851         TCGLifeData arg_life = 0;
3852         TCGTemp *ts;
3853         TCGOpcode opc = op->opc;
3854         const TCGOpDef *def = &tcg_op_defs[opc];
3855         const TCGArgConstraint *args_ct;
3856 
3857         switch (opc) {
3858         case INDEX_op_call:
3859             {
3860                 const TCGHelperInfo *info = tcg_call_info(op);
3861                 int call_flags = tcg_call_flags(op);
3862 
3863                 nb_oargs = TCGOP_CALLO(op);
3864                 nb_iargs = TCGOP_CALLI(op);
3865 
3866                 /* pure functions can be removed if their result is unused */
3867                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3868                     for (i = 0; i < nb_oargs; i++) {
3869                         ts = arg_temp(op->args[i]);
3870                         if (ts->state != TS_DEAD) {
3871                             goto do_not_remove_call;
3872                         }
3873                     }
3874                     goto do_remove;
3875                 }
3876             do_not_remove_call:
3877 
3878                 /* Output args are dead.  */
3879                 for (i = 0; i < nb_oargs; i++) {
3880                     ts = arg_temp(op->args[i]);
3881                     if (ts->state & TS_DEAD) {
3882                         arg_life |= DEAD_ARG << i;
3883                     }
3884                     if (ts->state & TS_MEM) {
3885                         arg_life |= SYNC_ARG << i;
3886                     }
3887                     ts->state = TS_DEAD;
3888                     la_reset_pref(ts);
3889                 }
3890 
3891                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3892                 memset(op->output_pref, 0, sizeof(op->output_pref));
3893 
3894                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3895                                     TCG_CALL_NO_READ_GLOBALS))) {
3896                     la_global_kill(s, nb_globals);
3897                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3898                     la_global_sync(s, nb_globals);
3899                 }
3900 
3901                 /* Record arguments that die in this helper.  */
3902                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3903                     ts = arg_temp(op->args[i]);
3904                     if (ts->state & TS_DEAD) {
3905                         arg_life |= DEAD_ARG << i;
3906                     }
3907                 }
3908 
3909                 /* For all live registers, remove call-clobbered prefs.  */
3910                 la_cross_call(s, nb_temps);
3911 
3912                 /*
3913                  * Input arguments are live for preceding opcodes.
3914                  *
3915                  * For those arguments that die, and will be allocated in
3916                  * registers, clear the register set for that arg, to be
3917                  * filled in below.  For args that will be on the stack,
3918                  * reset to any available reg.  Process arguments in reverse
3919                  * order so that if a temp is used more than once, the stack
3920                  * reset to max happens before the register reset to 0.
3921                  */
3922                 for (i = nb_iargs - 1; i >= 0; i--) {
3923                     const TCGCallArgumentLoc *loc = &info->in[i];
3924                     ts = arg_temp(op->args[nb_oargs + i]);
3925 
3926                     if (ts->state & TS_DEAD) {
3927                         switch (loc->kind) {
3928                         case TCG_CALL_ARG_NORMAL:
3929                         case TCG_CALL_ARG_EXTEND_U:
3930                         case TCG_CALL_ARG_EXTEND_S:
3931                             if (arg_slot_reg_p(loc->arg_slot)) {
3932                                 *la_temp_pref(ts) = 0;
3933                                 break;
3934                             }
3935                             /* fall through */
3936                         default:
3937                             *la_temp_pref(ts) =
3938                                 tcg_target_available_regs[ts->type];
3939                             break;
3940                         }
3941                         ts->state &= ~TS_DEAD;
3942                     }
3943                 }
3944 
3945                 /*
3946                  * For each input argument, add its input register to prefs.
3947                  * If a temp is used once, this produces a single set bit;
3948                  * if a temp is used multiple times, this produces a set.
3949                  */
3950                 for (i = 0; i < nb_iargs; i++) {
3951                     const TCGCallArgumentLoc *loc = &info->in[i];
3952                     ts = arg_temp(op->args[nb_oargs + i]);
3953 
3954                     switch (loc->kind) {
3955                     case TCG_CALL_ARG_NORMAL:
3956                     case TCG_CALL_ARG_EXTEND_U:
3957                     case TCG_CALL_ARG_EXTEND_S:
3958                         if (arg_slot_reg_p(loc->arg_slot)) {
3959                             tcg_regset_set_reg(*la_temp_pref(ts),
3960                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3961                         }
3962                         break;
3963                     default:
3964                         break;
3965                     }
3966                 }
3967             }
3968             break;
3969         case INDEX_op_insn_start:
3970             break;
3971         case INDEX_op_discard:
3972             /* mark the temporary as dead */
3973             ts = arg_temp(op->args[0]);
3974             ts->state = TS_DEAD;
3975             la_reset_pref(ts);
3976             break;
3977 
3978         case INDEX_op_add2_i32:
3979         case INDEX_op_add2_i64:
3980             opc_new = INDEX_op_add;
3981             goto do_addsub2;
3982         case INDEX_op_sub2_i32:
3983         case INDEX_op_sub2_i64:
3984             opc_new = INDEX_op_sub;
3985         do_addsub2:
3986             nb_iargs = 4;
3987             nb_oargs = 2;
3988             /* Test if the high part of the operation is dead, but not
3989                the low part.  The result can be optimized to a simple
3990                add or sub.  This happens often for x86_64 guest when the
3991                cpu mode is set to 32 bit.  */
3992             if (arg_temp(op->args[1])->state == TS_DEAD) {
3993                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3994                     goto do_remove;
3995                 }
3996                 /* Replace the opcode and adjust the args in place,
3997                    leaving 3 unused args at the end.  */
3998                 op->opc = opc = opc_new;
3999                 op->args[1] = op->args[2];
4000                 op->args[2] = op->args[4];
4001                 /* Fall through and mark the single-word operation live.  */
4002                 nb_iargs = 2;
4003                 nb_oargs = 1;
4004             }
4005             goto do_not_remove;
4006 
4007         case INDEX_op_muls2:
4008             opc_new = INDEX_op_mul;
4009             opc_new2 = INDEX_op_mulsh;
4010             goto do_mul2;
4011         case INDEX_op_mulu2:
4012             opc_new = INDEX_op_mul;
4013             opc_new2 = INDEX_op_muluh;
4014         do_mul2:
4015             nb_iargs = 2;
4016             nb_oargs = 2;
4017             if (arg_temp(op->args[1])->state == TS_DEAD) {
4018                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4019                     /* Both parts of the operation are dead.  */
4020                     goto do_remove;
4021                 }
4022                 /* The high part of the operation is dead; generate the low. */
4023                 op->opc = opc = opc_new;
4024                 op->args[1] = op->args[2];
4025                 op->args[2] = op->args[3];
4026             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4027                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4028                 /* The low part of the operation is dead; generate the high. */
4029                 op->opc = opc = opc_new2;
4030                 op->args[0] = op->args[1];
4031                 op->args[1] = op->args[2];
4032                 op->args[2] = op->args[3];
4033             } else {
4034                 goto do_not_remove;
4035             }
4036             /* Mark the single-word operation live.  */
4037             nb_oargs = 1;
4038             goto do_not_remove;
4039 
4040         default:
4041             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4042             nb_iargs = def->nb_iargs;
4043             nb_oargs = def->nb_oargs;
4044 
4045             /* Test if the operation can be removed because all
4046                its outputs are dead. We assume that nb_oargs == 0
4047                implies side effects */
4048             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4049                 for (i = 0; i < nb_oargs; i++) {
4050                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4051                         goto do_not_remove;
4052                     }
4053                 }
4054                 goto do_remove;
4055             }
4056             goto do_not_remove;
4057 
4058         do_remove:
4059             tcg_op_remove(s, op);
4060             break;
4061 
4062         do_not_remove:
4063             for (i = 0; i < nb_oargs; i++) {
4064                 ts = arg_temp(op->args[i]);
4065 
4066                 /* Remember the preference of the uses that followed.  */
4067                 if (i < ARRAY_SIZE(op->output_pref)) {
4068                     op->output_pref[i] = *la_temp_pref(ts);
4069                 }
4070 
4071                 /* Output args are dead.  */
4072                 if (ts->state & TS_DEAD) {
4073                     arg_life |= DEAD_ARG << i;
4074                 }
4075                 if (ts->state & TS_MEM) {
4076                     arg_life |= SYNC_ARG << i;
4077                 }
4078                 ts->state = TS_DEAD;
4079                 la_reset_pref(ts);
4080             }
4081 
4082             /* If end of basic block, update.  */
4083             if (def->flags & TCG_OPF_BB_EXIT) {
4084                 la_func_end(s, nb_globals, nb_temps);
4085             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4086                 la_bb_sync(s, nb_globals, nb_temps);
4087             } else if (def->flags & TCG_OPF_BB_END) {
4088                 la_bb_end(s, nb_globals, nb_temps);
4089             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4090                 la_global_sync(s, nb_globals);
4091                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4092                     la_cross_call(s, nb_temps);
4093                 }
4094             }
4095 
4096             /* Record arguments that die in this opcode.  */
4097             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4098                 ts = arg_temp(op->args[i]);
4099                 if (ts->state & TS_DEAD) {
4100                     arg_life |= DEAD_ARG << i;
4101                 }
4102             }
4103 
4104             /* Input arguments are live for preceding opcodes.  */
4105             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4106                 ts = arg_temp(op->args[i]);
4107                 if (ts->state & TS_DEAD) {
4108                     /* For operands that were dead, initially allow
4109                        all regs for the type.  */
4110                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4111                     ts->state &= ~TS_DEAD;
4112                 }
4113             }
4114 
4115             /* Incorporate constraints for this operand.  */
4116             switch (opc) {
4117             case INDEX_op_mov:
4118                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4119                    have proper constraints.  That said, special case
4120                    moves to propagate preferences backward.  */
4121                 if (IS_DEAD_ARG(1)) {
4122                     *la_temp_pref(arg_temp(op->args[0]))
4123                         = *la_temp_pref(arg_temp(op->args[1]));
4124                 }
4125                 break;
4126 
4127             default:
4128                 args_ct = opcode_args_ct(op);
4129                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4130                     const TCGArgConstraint *ct = &args_ct[i];
4131                     TCGRegSet set, *pset;
4132 
4133                     ts = arg_temp(op->args[i]);
4134                     pset = la_temp_pref(ts);
4135                     set = *pset;
4136 
4137                     set &= ct->regs;
4138                     if (ct->ialias) {
4139                         set &= output_pref(op, ct->alias_index);
4140                     }
4141                     /* If the combination is not possible, restart.  */
4142                     if (set == 0) {
4143                         set = ct->regs;
4144                     }
4145                     *pset = set;
4146                 }
4147                 break;
4148             }
4149             break;
4150         }
4151         op->life = arg_life;
4152     }
4153 }
4154 
4155 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4156 static bool __attribute__((noinline))
4157 liveness_pass_2(TCGContext *s)
4158 {
4159     int nb_globals = s->nb_globals;
4160     int nb_temps, i;
4161     bool changes = false;
4162     TCGOp *op, *op_next;
4163 
4164     /* Create a temporary for each indirect global.  */
4165     for (i = 0; i < nb_globals; ++i) {
4166         TCGTemp *its = &s->temps[i];
4167         if (its->indirect_reg) {
4168             TCGTemp *dts = tcg_temp_alloc(s);
4169             dts->type = its->type;
4170             dts->base_type = its->base_type;
4171             dts->temp_subindex = its->temp_subindex;
4172             dts->kind = TEMP_EBB;
4173             its->state_ptr = dts;
4174         } else {
4175             its->state_ptr = NULL;
4176         }
4177         /* All globals begin dead.  */
4178         its->state = TS_DEAD;
4179     }
4180     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4181         TCGTemp *its = &s->temps[i];
4182         its->state_ptr = NULL;
4183         its->state = TS_DEAD;
4184     }
4185 
4186     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4187         TCGOpcode opc = op->opc;
4188         const TCGOpDef *def = &tcg_op_defs[opc];
4189         TCGLifeData arg_life = op->life;
4190         int nb_iargs, nb_oargs, call_flags;
4191         TCGTemp *arg_ts, *dir_ts;
4192 
4193         if (opc == INDEX_op_call) {
4194             nb_oargs = TCGOP_CALLO(op);
4195             nb_iargs = TCGOP_CALLI(op);
4196             call_flags = tcg_call_flags(op);
4197         } else {
4198             nb_iargs = def->nb_iargs;
4199             nb_oargs = def->nb_oargs;
4200 
4201             /* Set flags similar to how calls require.  */
4202             if (def->flags & TCG_OPF_COND_BRANCH) {
4203                 /* Like reading globals: sync_globals */
4204                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4205             } else if (def->flags & TCG_OPF_BB_END) {
4206                 /* Like writing globals: save_globals */
4207                 call_flags = 0;
4208             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4209                 /* Like reading globals: sync_globals */
4210                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4211             } else {
4212                 /* No effect on globals.  */
4213                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4214                               TCG_CALL_NO_WRITE_GLOBALS);
4215             }
4216         }
4217 
4218         /* Make sure that input arguments are available.  */
4219         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4220             arg_ts = arg_temp(op->args[i]);
4221             dir_ts = arg_ts->state_ptr;
4222             if (dir_ts && arg_ts->state == TS_DEAD) {
4223                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4224                                   ? INDEX_op_ld_i32
4225                                   : INDEX_op_ld_i64);
4226                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4227                                                   arg_ts->type, 3);
4228 
4229                 lop->args[0] = temp_arg(dir_ts);
4230                 lop->args[1] = temp_arg(arg_ts->mem_base);
4231                 lop->args[2] = arg_ts->mem_offset;
4232 
4233                 /* Loaded, but synced with memory.  */
4234                 arg_ts->state = TS_MEM;
4235             }
4236         }
4237 
4238         /* Perform input replacement, and mark inputs that became dead.
4239            No action is required except keeping temp_state up to date
4240            so that we reload when needed.  */
4241         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4242             arg_ts = arg_temp(op->args[i]);
4243             dir_ts = arg_ts->state_ptr;
4244             if (dir_ts) {
4245                 op->args[i] = temp_arg(dir_ts);
4246                 changes = true;
4247                 if (IS_DEAD_ARG(i)) {
4248                     arg_ts->state = TS_DEAD;
4249                 }
4250             }
4251         }
4252 
4253         /* Liveness analysis should ensure that the following are
4254            all correct, for call sites and basic block end points.  */
4255         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4256             /* Nothing to do */
4257         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4258             for (i = 0; i < nb_globals; ++i) {
4259                 /* Liveness should see that globals are synced back,
4260                    that is, either TS_DEAD or TS_MEM.  */
4261                 arg_ts = &s->temps[i];
4262                 tcg_debug_assert(arg_ts->state_ptr == 0
4263                                  || arg_ts->state != 0);
4264             }
4265         } else {
4266             for (i = 0; i < nb_globals; ++i) {
4267                 /* Liveness should see that globals are saved back,
4268                    that is, TS_DEAD, waiting to be reloaded.  */
4269                 arg_ts = &s->temps[i];
4270                 tcg_debug_assert(arg_ts->state_ptr == 0
4271                                  || arg_ts->state == TS_DEAD);
4272             }
4273         }
4274 
4275         /* Outputs become available.  */
4276         if (opc == INDEX_op_mov) {
4277             arg_ts = arg_temp(op->args[0]);
4278             dir_ts = arg_ts->state_ptr;
4279             if (dir_ts) {
4280                 op->args[0] = temp_arg(dir_ts);
4281                 changes = true;
4282 
4283                 /* The output is now live and modified.  */
4284                 arg_ts->state = 0;
4285 
4286                 if (NEED_SYNC_ARG(0)) {
4287                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4288                                       ? INDEX_op_st_i32
4289                                       : INDEX_op_st_i64);
4290                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4291                                                      arg_ts->type, 3);
4292                     TCGTemp *out_ts = dir_ts;
4293 
4294                     if (IS_DEAD_ARG(0)) {
4295                         out_ts = arg_temp(op->args[1]);
4296                         arg_ts->state = TS_DEAD;
4297                         tcg_op_remove(s, op);
4298                     } else {
4299                         arg_ts->state = TS_MEM;
4300                     }
4301 
4302                     sop->args[0] = temp_arg(out_ts);
4303                     sop->args[1] = temp_arg(arg_ts->mem_base);
4304                     sop->args[2] = arg_ts->mem_offset;
4305                 } else {
4306                     tcg_debug_assert(!IS_DEAD_ARG(0));
4307                 }
4308             }
4309         } else {
4310             for (i = 0; i < nb_oargs; i++) {
4311                 arg_ts = arg_temp(op->args[i]);
4312                 dir_ts = arg_ts->state_ptr;
4313                 if (!dir_ts) {
4314                     continue;
4315                 }
4316                 op->args[i] = temp_arg(dir_ts);
4317                 changes = true;
4318 
4319                 /* The output is now live and modified.  */
4320                 arg_ts->state = 0;
4321 
4322                 /* Sync outputs upon their last write.  */
4323                 if (NEED_SYNC_ARG(i)) {
4324                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4325                                       ? INDEX_op_st_i32
4326                                       : INDEX_op_st_i64);
4327                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4328                                                      arg_ts->type, 3);
4329 
4330                     sop->args[0] = temp_arg(dir_ts);
4331                     sop->args[1] = temp_arg(arg_ts->mem_base);
4332                     sop->args[2] = arg_ts->mem_offset;
4333 
4334                     arg_ts->state = TS_MEM;
4335                 }
4336                 /* Drop outputs that are dead.  */
4337                 if (IS_DEAD_ARG(i)) {
4338                     arg_ts->state = TS_DEAD;
4339                 }
4340             }
4341         }
4342     }
4343 
4344     return changes;
4345 }
4346 
4347 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4348 {
4349     intptr_t off;
4350     int size, align;
4351 
4352     /* When allocating an object, look at the full type. */
4353     size = tcg_type_size(ts->base_type);
4354     switch (ts->base_type) {
4355     case TCG_TYPE_I32:
4356         align = 4;
4357         break;
4358     case TCG_TYPE_I64:
4359     case TCG_TYPE_V64:
4360         align = 8;
4361         break;
4362     case TCG_TYPE_I128:
4363     case TCG_TYPE_V128:
4364     case TCG_TYPE_V256:
4365         /*
4366          * Note that we do not require aligned storage for V256,
4367          * and that we provide alignment for I128 to match V128,
4368          * even if that's above what the host ABI requires.
4369          */
4370         align = 16;
4371         break;
4372     default:
4373         g_assert_not_reached();
4374     }
4375 
4376     /*
4377      * Assume the stack is sufficiently aligned.
4378      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4379      * and do not require 16 byte vector alignment.  This seems slightly
4380      * easier than fully parameterizing the above switch statement.
4381      */
4382     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4383     off = ROUND_UP(s->current_frame_offset, align);
4384 
4385     /* If we've exhausted the stack frame, restart with a smaller TB. */
4386     if (off + size > s->frame_end) {
4387         tcg_raise_tb_overflow(s);
4388     }
4389     s->current_frame_offset = off + size;
4390 #if defined(__sparc__)
4391     off += TCG_TARGET_STACK_BIAS;
4392 #endif
4393 
4394     /* If the object was subdivided, assign memory to all the parts. */
4395     if (ts->base_type != ts->type) {
4396         int part_size = tcg_type_size(ts->type);
4397         int part_count = size / part_size;
4398 
4399         /*
4400          * Each part is allocated sequentially in tcg_temp_new_internal.
4401          * Jump back to the first part by subtracting the current index.
4402          */
4403         ts -= ts->temp_subindex;
4404         for (int i = 0; i < part_count; ++i) {
4405             ts[i].mem_offset = off + i * part_size;
4406             ts[i].mem_base = s->frame_temp;
4407             ts[i].mem_allocated = 1;
4408         }
4409     } else {
4410         ts->mem_offset = off;
4411         ts->mem_base = s->frame_temp;
4412         ts->mem_allocated = 1;
4413     }
4414 }
4415 
4416 /* Assign @reg to @ts, and update reg_to_temp[]. */
4417 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4418 {
4419     if (ts->val_type == TEMP_VAL_REG) {
4420         TCGReg old = ts->reg;
4421         tcg_debug_assert(s->reg_to_temp[old] == ts);
4422         if (old == reg) {
4423             return;
4424         }
4425         s->reg_to_temp[old] = NULL;
4426     }
4427     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4428     s->reg_to_temp[reg] = ts;
4429     ts->val_type = TEMP_VAL_REG;
4430     ts->reg = reg;
4431 }
4432 
4433 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4434 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4435 {
4436     tcg_debug_assert(type != TEMP_VAL_REG);
4437     if (ts->val_type == TEMP_VAL_REG) {
4438         TCGReg reg = ts->reg;
4439         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4440         s->reg_to_temp[reg] = NULL;
4441     }
4442     ts->val_type = type;
4443 }
4444 
4445 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4446 
4447 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4448    mark it free; otherwise mark it dead.  */
4449 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4450 {
4451     TCGTempVal new_type;
4452 
4453     switch (ts->kind) {
4454     case TEMP_FIXED:
4455         return;
4456     case TEMP_GLOBAL:
4457     case TEMP_TB:
4458         new_type = TEMP_VAL_MEM;
4459         break;
4460     case TEMP_EBB:
4461         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4462         break;
4463     case TEMP_CONST:
4464         new_type = TEMP_VAL_CONST;
4465         break;
4466     default:
4467         g_assert_not_reached();
4468     }
4469     set_temp_val_nonreg(s, ts, new_type);
4470 }
4471 
4472 /* Mark a temporary as dead.  */
4473 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4474 {
4475     temp_free_or_dead(s, ts, 1);
4476 }
4477 
4478 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4479    registers needs to be allocated to store a constant.  If 'free_or_dead'
4480    is non-zero, subsequently release the temporary; if it is positive, the
4481    temp is dead; if it is negative, the temp is free.  */
4482 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4483                       TCGRegSet preferred_regs, int free_or_dead)
4484 {
4485     if (!temp_readonly(ts) && !ts->mem_coherent) {
4486         if (!ts->mem_allocated) {
4487             temp_allocate_frame(s, ts);
4488         }
4489         switch (ts->val_type) {
4490         case TEMP_VAL_CONST:
4491             /* If we're going to free the temp immediately, then we won't
4492                require it later in a register, so attempt to store the
4493                constant to memory directly.  */
4494             if (free_or_dead
4495                 && tcg_out_sti(s, ts->type, ts->val,
4496                                ts->mem_base->reg, ts->mem_offset)) {
4497                 break;
4498             }
4499             temp_load(s, ts, tcg_target_available_regs[ts->type],
4500                       allocated_regs, preferred_regs);
4501             /* fallthrough */
4502 
4503         case TEMP_VAL_REG:
4504             tcg_out_st(s, ts->type, ts->reg,
4505                        ts->mem_base->reg, ts->mem_offset);
4506             break;
4507 
4508         case TEMP_VAL_MEM:
4509             break;
4510 
4511         case TEMP_VAL_DEAD:
4512         default:
4513             g_assert_not_reached();
4514         }
4515         ts->mem_coherent = 1;
4516     }
4517     if (free_or_dead) {
4518         temp_free_or_dead(s, ts, free_or_dead);
4519     }
4520 }
4521 
4522 /* free register 'reg' by spilling the corresponding temporary if necessary */
4523 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4524 {
4525     TCGTemp *ts = s->reg_to_temp[reg];
4526     if (ts != NULL) {
4527         temp_sync(s, ts, allocated_regs, 0, -1);
4528     }
4529 }
4530 
4531 /**
4532  * tcg_reg_alloc:
4533  * @required_regs: Set of registers in which we must allocate.
4534  * @allocated_regs: Set of registers which must be avoided.
4535  * @preferred_regs: Set of registers we should prefer.
4536  * @rev: True if we search the registers in "indirect" order.
4537  *
4538  * The allocated register must be in @required_regs & ~@allocated_regs,
4539  * but if we can put it in @preferred_regs we may save a move later.
4540  */
4541 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4542                             TCGRegSet allocated_regs,
4543                             TCGRegSet preferred_regs, bool rev)
4544 {
4545     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4546     TCGRegSet reg_ct[2];
4547     const int *order;
4548 
4549     reg_ct[1] = required_regs & ~allocated_regs;
4550     tcg_debug_assert(reg_ct[1] != 0);
4551     reg_ct[0] = reg_ct[1] & preferred_regs;
4552 
4553     /* Skip the preferred_regs option if it cannot be satisfied,
4554        or if the preference made no difference.  */
4555     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4556 
4557     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4558 
4559     /* Try free registers, preferences first.  */
4560     for (j = f; j < 2; j++) {
4561         TCGRegSet set = reg_ct[j];
4562 
4563         if (tcg_regset_single(set)) {
4564             /* One register in the set.  */
4565             TCGReg reg = tcg_regset_first(set);
4566             if (s->reg_to_temp[reg] == NULL) {
4567                 return reg;
4568             }
4569         } else {
4570             for (i = 0; i < n; i++) {
4571                 TCGReg reg = order[i];
4572                 if (s->reg_to_temp[reg] == NULL &&
4573                     tcg_regset_test_reg(set, reg)) {
4574                     return reg;
4575                 }
4576             }
4577         }
4578     }
4579 
4580     /* We must spill something.  */
4581     for (j = f; j < 2; j++) {
4582         TCGRegSet set = reg_ct[j];
4583 
4584         if (tcg_regset_single(set)) {
4585             /* One register in the set.  */
4586             TCGReg reg = tcg_regset_first(set);
4587             tcg_reg_free(s, reg, allocated_regs);
4588             return reg;
4589         } else {
4590             for (i = 0; i < n; i++) {
4591                 TCGReg reg = order[i];
4592                 if (tcg_regset_test_reg(set, reg)) {
4593                     tcg_reg_free(s, reg, allocated_regs);
4594                     return reg;
4595                 }
4596             }
4597         }
4598     }
4599 
4600     g_assert_not_reached();
4601 }
4602 
4603 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4604                                  TCGRegSet allocated_regs,
4605                                  TCGRegSet preferred_regs, bool rev)
4606 {
4607     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4608     TCGRegSet reg_ct[2];
4609     const int *order;
4610 
4611     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4612     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4613     tcg_debug_assert(reg_ct[1] != 0);
4614     reg_ct[0] = reg_ct[1] & preferred_regs;
4615 
4616     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4617 
4618     /*
4619      * Skip the preferred_regs option if it cannot be satisfied,
4620      * or if the preference made no difference.
4621      */
4622     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4623 
4624     /*
4625      * Minimize the number of flushes by looking for 2 free registers first,
4626      * then a single flush, then two flushes.
4627      */
4628     for (fmin = 2; fmin >= 0; fmin--) {
4629         for (j = k; j < 2; j++) {
4630             TCGRegSet set = reg_ct[j];
4631 
4632             for (i = 0; i < n; i++) {
4633                 TCGReg reg = order[i];
4634 
4635                 if (tcg_regset_test_reg(set, reg)) {
4636                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4637                     if (f >= fmin) {
4638                         tcg_reg_free(s, reg, allocated_regs);
4639                         tcg_reg_free(s, reg + 1, allocated_regs);
4640                         return reg;
4641                     }
4642                 }
4643             }
4644         }
4645     }
4646     g_assert_not_reached();
4647 }
4648 
4649 /* Make sure the temporary is in a register.  If needed, allocate the register
4650    from DESIRED while avoiding ALLOCATED.  */
4651 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4652                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4653 {
4654     TCGReg reg;
4655 
4656     switch (ts->val_type) {
4657     case TEMP_VAL_REG:
4658         return;
4659     case TEMP_VAL_CONST:
4660         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4661                             preferred_regs, ts->indirect_base);
4662         if (ts->type <= TCG_TYPE_I64) {
4663             tcg_out_movi(s, ts->type, reg, ts->val);
4664         } else {
4665             uint64_t val = ts->val;
4666             MemOp vece = MO_64;
4667 
4668             /*
4669              * Find the minimal vector element that matches the constant.
4670              * The targets will, in general, have to do this search anyway,
4671              * do this generically.
4672              */
4673             if (val == dup_const(MO_8, val)) {
4674                 vece = MO_8;
4675             } else if (val == dup_const(MO_16, val)) {
4676                 vece = MO_16;
4677             } else if (val == dup_const(MO_32, val)) {
4678                 vece = MO_32;
4679             }
4680 
4681             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4682         }
4683         ts->mem_coherent = 0;
4684         break;
4685     case TEMP_VAL_MEM:
4686         if (!ts->mem_allocated) {
4687             temp_allocate_frame(s, ts);
4688         }
4689         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4690                             preferred_regs, ts->indirect_base);
4691         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4692         ts->mem_coherent = 1;
4693         break;
4694     case TEMP_VAL_DEAD:
4695     default:
4696         g_assert_not_reached();
4697     }
4698     set_temp_val_reg(s, ts, reg);
4699 }
4700 
4701 /* Save a temporary to memory. 'allocated_regs' is used in case a
4702    temporary registers needs to be allocated to store a constant.  */
4703 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4704 {
4705     /* The liveness analysis already ensures that globals are back
4706        in memory. Keep an tcg_debug_assert for safety. */
4707     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4708 }
4709 
4710 /* save globals to their canonical location and assume they can be
4711    modified be the following code. 'allocated_regs' is used in case a
4712    temporary registers needs to be allocated to store a constant. */
4713 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4714 {
4715     int i, n;
4716 
4717     for (i = 0, n = s->nb_globals; i < n; i++) {
4718         temp_save(s, &s->temps[i], allocated_regs);
4719     }
4720 }
4721 
4722 /* sync globals to their canonical location and assume they can be
4723    read by the following code. 'allocated_regs' is used in case a
4724    temporary registers needs to be allocated to store a constant. */
4725 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4726 {
4727     int i, n;
4728 
4729     for (i = 0, n = s->nb_globals; i < n; i++) {
4730         TCGTemp *ts = &s->temps[i];
4731         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4732                          || ts->kind == TEMP_FIXED
4733                          || ts->mem_coherent);
4734     }
4735 }
4736 
4737 /* at the end of a basic block, we assume all temporaries are dead and
4738    all globals are stored at their canonical location. */
4739 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4740 {
4741     int i;
4742 
4743     for (i = s->nb_globals; i < s->nb_temps; i++) {
4744         TCGTemp *ts = &s->temps[i];
4745 
4746         switch (ts->kind) {
4747         case TEMP_TB:
4748             temp_save(s, ts, allocated_regs);
4749             break;
4750         case TEMP_EBB:
4751             /* The liveness analysis already ensures that temps are dead.
4752                Keep an tcg_debug_assert for safety. */
4753             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4754             break;
4755         case TEMP_CONST:
4756             /* Similarly, we should have freed any allocated register. */
4757             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4758             break;
4759         default:
4760             g_assert_not_reached();
4761         }
4762     }
4763 
4764     save_globals(s, allocated_regs);
4765 }
4766 
4767 /*
4768  * At a conditional branch, we assume all temporaries are dead unless
4769  * explicitly live-across-conditional-branch; all globals and local
4770  * temps are synced to their location.
4771  */
4772 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4773 {
4774     sync_globals(s, allocated_regs);
4775 
4776     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4777         TCGTemp *ts = &s->temps[i];
4778         /*
4779          * The liveness analysis already ensures that temps are dead.
4780          * Keep tcg_debug_asserts for safety.
4781          */
4782         switch (ts->kind) {
4783         case TEMP_TB:
4784             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4785             break;
4786         case TEMP_EBB:
4787         case TEMP_CONST:
4788             break;
4789         default:
4790             g_assert_not_reached();
4791         }
4792     }
4793 }
4794 
4795 /*
4796  * Specialized code generation for INDEX_op_mov_* with a constant.
4797  */
4798 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4799                                   tcg_target_ulong val, TCGLifeData arg_life,
4800                                   TCGRegSet preferred_regs)
4801 {
4802     /* ENV should not be modified.  */
4803     tcg_debug_assert(!temp_readonly(ots));
4804 
4805     /* The movi is not explicitly generated here.  */
4806     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4807     ots->val = val;
4808     ots->mem_coherent = 0;
4809     if (NEED_SYNC_ARG(0)) {
4810         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4811     } else if (IS_DEAD_ARG(0)) {
4812         temp_dead(s, ots);
4813     }
4814 }
4815 
4816 /*
4817  * Specialized code generation for INDEX_op_mov_*.
4818  */
4819 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4820 {
4821     const TCGLifeData arg_life = op->life;
4822     TCGRegSet allocated_regs, preferred_regs;
4823     TCGTemp *ts, *ots;
4824     TCGType otype, itype;
4825     TCGReg oreg, ireg;
4826 
4827     allocated_regs = s->reserved_regs;
4828     preferred_regs = output_pref(op, 0);
4829     ots = arg_temp(op->args[0]);
4830     ts = arg_temp(op->args[1]);
4831 
4832     /* ENV should not be modified.  */
4833     tcg_debug_assert(!temp_readonly(ots));
4834 
4835     /* Note that otype != itype for no-op truncation.  */
4836     otype = ots->type;
4837     itype = ts->type;
4838 
4839     if (ts->val_type == TEMP_VAL_CONST) {
4840         /* propagate constant or generate sti */
4841         tcg_target_ulong val = ts->val;
4842         if (IS_DEAD_ARG(1)) {
4843             temp_dead(s, ts);
4844         }
4845         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4846         return;
4847     }
4848 
4849     /* If the source value is in memory we're going to be forced
4850        to have it in a register in order to perform the copy.  Copy
4851        the SOURCE value into its own register first, that way we
4852        don't have to reload SOURCE the next time it is used. */
4853     if (ts->val_type == TEMP_VAL_MEM) {
4854         temp_load(s, ts, tcg_target_available_regs[itype],
4855                   allocated_regs, preferred_regs);
4856     }
4857     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4858     ireg = ts->reg;
4859 
4860     if (IS_DEAD_ARG(0)) {
4861         /* mov to a non-saved dead register makes no sense (even with
4862            liveness analysis disabled). */
4863         tcg_debug_assert(NEED_SYNC_ARG(0));
4864         if (!ots->mem_allocated) {
4865             temp_allocate_frame(s, ots);
4866         }
4867         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4868         if (IS_DEAD_ARG(1)) {
4869             temp_dead(s, ts);
4870         }
4871         temp_dead(s, ots);
4872         return;
4873     }
4874 
4875     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4876         /*
4877          * The mov can be suppressed.  Kill input first, so that it
4878          * is unlinked from reg_to_temp, then set the output to the
4879          * reg that we saved from the input.
4880          */
4881         temp_dead(s, ts);
4882         oreg = ireg;
4883     } else {
4884         if (ots->val_type == TEMP_VAL_REG) {
4885             oreg = ots->reg;
4886         } else {
4887             /* Make sure to not spill the input register during allocation. */
4888             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4889                                  allocated_regs | ((TCGRegSet)1 << ireg),
4890                                  preferred_regs, ots->indirect_base);
4891         }
4892         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4893             /*
4894              * Cross register class move not supported.
4895              * Store the source register into the destination slot
4896              * and leave the destination temp as TEMP_VAL_MEM.
4897              */
4898             assert(!temp_readonly(ots));
4899             if (!ts->mem_allocated) {
4900                 temp_allocate_frame(s, ots);
4901             }
4902             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4903             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4904             ots->mem_coherent = 1;
4905             return;
4906         }
4907     }
4908     set_temp_val_reg(s, ots, oreg);
4909     ots->mem_coherent = 0;
4910 
4911     if (NEED_SYNC_ARG(0)) {
4912         temp_sync(s, ots, allocated_regs, 0, 0);
4913     }
4914 }
4915 
4916 /*
4917  * Specialized code generation for INDEX_op_dup_vec.
4918  */
4919 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4920 {
4921     const TCGLifeData arg_life = op->life;
4922     TCGRegSet dup_out_regs, dup_in_regs;
4923     const TCGArgConstraint *dup_args_ct;
4924     TCGTemp *its, *ots;
4925     TCGType itype, vtype;
4926     unsigned vece;
4927     int lowpart_ofs;
4928     bool ok;
4929 
4930     ots = arg_temp(op->args[0]);
4931     its = arg_temp(op->args[1]);
4932 
4933     /* ENV should not be modified.  */
4934     tcg_debug_assert(!temp_readonly(ots));
4935 
4936     itype = its->type;
4937     vece = TCGOP_VECE(op);
4938     vtype = TCGOP_TYPE(op);
4939 
4940     if (its->val_type == TEMP_VAL_CONST) {
4941         /* Propagate constant via movi -> dupi.  */
4942         tcg_target_ulong val = its->val;
4943         if (IS_DEAD_ARG(1)) {
4944             temp_dead(s, its);
4945         }
4946         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4947         return;
4948     }
4949 
4950     dup_args_ct = opcode_args_ct(op);
4951     dup_out_regs = dup_args_ct[0].regs;
4952     dup_in_regs = dup_args_ct[1].regs;
4953 
4954     /* Allocate the output register now.  */
4955     if (ots->val_type != TEMP_VAL_REG) {
4956         TCGRegSet allocated_regs = s->reserved_regs;
4957         TCGReg oreg;
4958 
4959         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4960             /* Make sure to not spill the input register. */
4961             tcg_regset_set_reg(allocated_regs, its->reg);
4962         }
4963         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4964                              output_pref(op, 0), ots->indirect_base);
4965         set_temp_val_reg(s, ots, oreg);
4966     }
4967 
4968     switch (its->val_type) {
4969     case TEMP_VAL_REG:
4970         /*
4971          * The dup constriaints must be broad, covering all possible VECE.
4972          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4973          * to fail, indicating that extra moves are required for that case.
4974          */
4975         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4976             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4977                 goto done;
4978             }
4979             /* Try again from memory or a vector input register.  */
4980         }
4981         if (!its->mem_coherent) {
4982             /*
4983              * The input register is not synced, and so an extra store
4984              * would be required to use memory.  Attempt an integer-vector
4985              * register move first.  We do not have a TCGRegSet for this.
4986              */
4987             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4988                 break;
4989             }
4990             /* Sync the temp back to its slot and load from there.  */
4991             temp_sync(s, its, s->reserved_regs, 0, 0);
4992         }
4993         /* fall through */
4994 
4995     case TEMP_VAL_MEM:
4996         lowpart_ofs = 0;
4997         if (HOST_BIG_ENDIAN) {
4998             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4999         }
5000         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5001                              its->mem_offset + lowpart_ofs)) {
5002             goto done;
5003         }
5004         /* Load the input into the destination vector register. */
5005         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5006         break;
5007 
5008     default:
5009         g_assert_not_reached();
5010     }
5011 
5012     /* We now have a vector input register, so dup must succeed. */
5013     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5014     tcg_debug_assert(ok);
5015 
5016  done:
5017     ots->mem_coherent = 0;
5018     if (IS_DEAD_ARG(1)) {
5019         temp_dead(s, its);
5020     }
5021     if (NEED_SYNC_ARG(0)) {
5022         temp_sync(s, ots, s->reserved_regs, 0, 0);
5023     }
5024     if (IS_DEAD_ARG(0)) {
5025         temp_dead(s, ots);
5026     }
5027 }
5028 
5029 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5030 {
5031     const TCGLifeData arg_life = op->life;
5032     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5033     TCGRegSet i_allocated_regs;
5034     TCGRegSet o_allocated_regs;
5035     int i, k, nb_iargs, nb_oargs;
5036     TCGReg reg;
5037     TCGArg arg;
5038     const TCGArgConstraint *args_ct;
5039     const TCGArgConstraint *arg_ct;
5040     TCGTemp *ts;
5041     TCGArg new_args[TCG_MAX_OP_ARGS];
5042     int const_args[TCG_MAX_OP_ARGS];
5043     TCGCond op_cond;
5044 
5045     nb_oargs = def->nb_oargs;
5046     nb_iargs = def->nb_iargs;
5047 
5048     /* copy constants */
5049     memcpy(new_args + nb_oargs + nb_iargs,
5050            op->args + nb_oargs + nb_iargs,
5051            sizeof(TCGArg) * def->nb_cargs);
5052 
5053     i_allocated_regs = s->reserved_regs;
5054     o_allocated_regs = s->reserved_regs;
5055 
5056     switch (op->opc) {
5057     case INDEX_op_brcond_i32:
5058     case INDEX_op_brcond_i64:
5059         op_cond = op->args[2];
5060         break;
5061     case INDEX_op_setcond_i32:
5062     case INDEX_op_setcond_i64:
5063     case INDEX_op_negsetcond_i32:
5064     case INDEX_op_negsetcond_i64:
5065     case INDEX_op_cmp_vec:
5066         op_cond = op->args[3];
5067         break;
5068     case INDEX_op_brcond2_i32:
5069         op_cond = op->args[4];
5070         break;
5071     case INDEX_op_movcond_i32:
5072     case INDEX_op_movcond_i64:
5073     case INDEX_op_setcond2_i32:
5074     case INDEX_op_cmpsel_vec:
5075         op_cond = op->args[5];
5076         break;
5077     default:
5078         /* No condition within opcode. */
5079         op_cond = TCG_COND_ALWAYS;
5080         break;
5081     }
5082 
5083     args_ct = opcode_args_ct(op);
5084 
5085     /* satisfy input constraints */
5086     for (k = 0; k < nb_iargs; k++) {
5087         TCGRegSet i_preferred_regs, i_required_regs;
5088         bool allocate_new_reg, copyto_new_reg;
5089         TCGTemp *ts2;
5090         int i1, i2;
5091 
5092         i = args_ct[nb_oargs + k].sort_index;
5093         arg = op->args[i];
5094         arg_ct = &args_ct[i];
5095         ts = arg_temp(arg);
5096 
5097         if (ts->val_type == TEMP_VAL_CONST) {
5098 #ifdef TCG_REG_ZERO
5099             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5100                 /* Hardware zero register: indicate register via non-const. */
5101                 const_args[i] = 0;
5102                 new_args[i] = TCG_REG_ZERO;
5103                 continue;
5104             }
5105 #endif
5106 
5107             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5108                                        op_cond, TCGOP_VECE(op))) {
5109                 /* constant is OK for instruction */
5110                 const_args[i] = 1;
5111                 new_args[i] = ts->val;
5112                 continue;
5113             }
5114         }
5115 
5116         reg = ts->reg;
5117         i_preferred_regs = 0;
5118         i_required_regs = arg_ct->regs;
5119         allocate_new_reg = false;
5120         copyto_new_reg = false;
5121 
5122         switch (arg_ct->pair) {
5123         case 0: /* not paired */
5124             if (arg_ct->ialias) {
5125                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5126 
5127                 /*
5128                  * If the input is readonly, then it cannot also be an
5129                  * output and aliased to itself.  If the input is not
5130                  * dead after the instruction, we must allocate a new
5131                  * register and move it.
5132                  */
5133                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5134                     || args_ct[arg_ct->alias_index].newreg) {
5135                     allocate_new_reg = true;
5136                 } else if (ts->val_type == TEMP_VAL_REG) {
5137                     /*
5138                      * Check if the current register has already been
5139                      * allocated for another input.
5140                      */
5141                     allocate_new_reg =
5142                         tcg_regset_test_reg(i_allocated_regs, reg);
5143                 }
5144             }
5145             if (!allocate_new_reg) {
5146                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5147                           i_preferred_regs);
5148                 reg = ts->reg;
5149                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5150             }
5151             if (allocate_new_reg) {
5152                 /*
5153                  * Allocate a new register matching the constraint
5154                  * and move the temporary register into it.
5155                  */
5156                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5157                           i_allocated_regs, 0);
5158                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5159                                     i_preferred_regs, ts->indirect_base);
5160                 copyto_new_reg = true;
5161             }
5162             break;
5163 
5164         case 1:
5165             /* First of an input pair; if i1 == i2, the second is an output. */
5166             i1 = i;
5167             i2 = arg_ct->pair_index;
5168             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5169 
5170             /*
5171              * It is easier to default to allocating a new pair
5172              * and to identify a few cases where it's not required.
5173              */
5174             if (arg_ct->ialias) {
5175                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5176                 if (IS_DEAD_ARG(i1) &&
5177                     IS_DEAD_ARG(i2) &&
5178                     !temp_readonly(ts) &&
5179                     ts->val_type == TEMP_VAL_REG &&
5180                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5181                     tcg_regset_test_reg(i_required_regs, reg) &&
5182                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5183                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5184                     (ts2
5185                      ? ts2->val_type == TEMP_VAL_REG &&
5186                        ts2->reg == reg + 1 &&
5187                        !temp_readonly(ts2)
5188                      : s->reg_to_temp[reg + 1] == NULL)) {
5189                     break;
5190                 }
5191             } else {
5192                 /* Without aliasing, the pair must also be an input. */
5193                 tcg_debug_assert(ts2);
5194                 if (ts->val_type == TEMP_VAL_REG &&
5195                     ts2->val_type == TEMP_VAL_REG &&
5196                     ts2->reg == reg + 1 &&
5197                     tcg_regset_test_reg(i_required_regs, reg)) {
5198                     break;
5199                 }
5200             }
5201             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5202                                      0, ts->indirect_base);
5203             goto do_pair;
5204 
5205         case 2: /* pair second */
5206             reg = new_args[arg_ct->pair_index] + 1;
5207             goto do_pair;
5208 
5209         case 3: /* ialias with second output, no first input */
5210             tcg_debug_assert(arg_ct->ialias);
5211             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5212 
5213             if (IS_DEAD_ARG(i) &&
5214                 !temp_readonly(ts) &&
5215                 ts->val_type == TEMP_VAL_REG &&
5216                 reg > 0 &&
5217                 s->reg_to_temp[reg - 1] == NULL &&
5218                 tcg_regset_test_reg(i_required_regs, reg) &&
5219                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5220                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5221                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5222                 break;
5223             }
5224             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5225                                      i_allocated_regs, 0,
5226                                      ts->indirect_base);
5227             tcg_regset_set_reg(i_allocated_regs, reg);
5228             reg += 1;
5229             goto do_pair;
5230 
5231         do_pair:
5232             /*
5233              * If an aliased input is not dead after the instruction,
5234              * we must allocate a new register and move it.
5235              */
5236             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5237                 TCGRegSet t_allocated_regs = i_allocated_regs;
5238 
5239                 /*
5240                  * Because of the alias, and the continued life, make sure
5241                  * that the temp is somewhere *other* than the reg pair,
5242                  * and we get a copy in reg.
5243                  */
5244                 tcg_regset_set_reg(t_allocated_regs, reg);
5245                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5246                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5247                     /* If ts was already in reg, copy it somewhere else. */
5248                     TCGReg nr;
5249                     bool ok;
5250 
5251                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5252                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5253                                        t_allocated_regs, 0, ts->indirect_base);
5254                     ok = tcg_out_mov(s, ts->type, nr, reg);
5255                     tcg_debug_assert(ok);
5256 
5257                     set_temp_val_reg(s, ts, nr);
5258                 } else {
5259                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5260                               t_allocated_regs, 0);
5261                     copyto_new_reg = true;
5262                 }
5263             } else {
5264                 /* Preferably allocate to reg, otherwise copy. */
5265                 i_required_regs = (TCGRegSet)1 << reg;
5266                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5267                           i_preferred_regs);
5268                 copyto_new_reg = ts->reg != reg;
5269             }
5270             break;
5271 
5272         default:
5273             g_assert_not_reached();
5274         }
5275 
5276         if (copyto_new_reg) {
5277             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5278                 /*
5279                  * Cross register class move not supported.  Sync the
5280                  * temp back to its slot and load from there.
5281                  */
5282                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5283                 tcg_out_ld(s, ts->type, reg,
5284                            ts->mem_base->reg, ts->mem_offset);
5285             }
5286         }
5287         new_args[i] = reg;
5288         const_args[i] = 0;
5289         tcg_regset_set_reg(i_allocated_regs, reg);
5290     }
5291 
5292     /* mark dead temporaries and free the associated registers */
5293     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5294         if (IS_DEAD_ARG(i)) {
5295             temp_dead(s, arg_temp(op->args[i]));
5296         }
5297     }
5298 
5299     if (def->flags & TCG_OPF_COND_BRANCH) {
5300         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5301     } else if (def->flags & TCG_OPF_BB_END) {
5302         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5303     } else {
5304         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5305             /* XXX: permit generic clobber register list ? */
5306             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5307                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5308                     tcg_reg_free(s, i, i_allocated_regs);
5309                 }
5310             }
5311         }
5312         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5313             /* sync globals if the op has side effects and might trigger
5314                an exception. */
5315             sync_globals(s, i_allocated_regs);
5316         }
5317 
5318         /* satisfy the output constraints */
5319         for (k = 0; k < nb_oargs; k++) {
5320             i = args_ct[k].sort_index;
5321             arg = op->args[i];
5322             arg_ct = &args_ct[i];
5323             ts = arg_temp(arg);
5324 
5325             /* ENV should not be modified.  */
5326             tcg_debug_assert(!temp_readonly(ts));
5327 
5328             switch (arg_ct->pair) {
5329             case 0: /* not paired */
5330                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5331                     reg = new_args[arg_ct->alias_index];
5332                 } else if (arg_ct->newreg) {
5333                     reg = tcg_reg_alloc(s, arg_ct->regs,
5334                                         i_allocated_regs | o_allocated_regs,
5335                                         output_pref(op, k), ts->indirect_base);
5336                 } else {
5337                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5338                                         output_pref(op, k), ts->indirect_base);
5339                 }
5340                 break;
5341 
5342             case 1: /* first of pair */
5343                 if (arg_ct->oalias) {
5344                     reg = new_args[arg_ct->alias_index];
5345                 } else if (arg_ct->newreg) {
5346                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5347                                              i_allocated_regs | o_allocated_regs,
5348                                              output_pref(op, k),
5349                                              ts->indirect_base);
5350                 } else {
5351                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5352                                              output_pref(op, k),
5353                                              ts->indirect_base);
5354                 }
5355                 break;
5356 
5357             case 2: /* second of pair */
5358                 if (arg_ct->oalias) {
5359                     reg = new_args[arg_ct->alias_index];
5360                 } else {
5361                     reg = new_args[arg_ct->pair_index] + 1;
5362                 }
5363                 break;
5364 
5365             case 3: /* first of pair, aliasing with a second input */
5366                 tcg_debug_assert(!arg_ct->newreg);
5367                 reg = new_args[arg_ct->pair_index] - 1;
5368                 break;
5369 
5370             default:
5371                 g_assert_not_reached();
5372             }
5373             tcg_regset_set_reg(o_allocated_regs, reg);
5374             set_temp_val_reg(s, ts, reg);
5375             ts->mem_coherent = 0;
5376             new_args[i] = reg;
5377         }
5378     }
5379 
5380     /* emit instruction */
5381     TCGType type = TCGOP_TYPE(op);
5382     switch (op->opc) {
5383     case INDEX_op_ext_i32_i64:
5384         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5385         break;
5386     case INDEX_op_extu_i32_i64:
5387         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5388         break;
5389     case INDEX_op_extrl_i64_i32:
5390         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5391         break;
5392 
5393     case INDEX_op_add:
5394     case INDEX_op_and:
5395     case INDEX_op_andc:
5396     case INDEX_op_clz:
5397     case INDEX_op_ctz:
5398     case INDEX_op_divs:
5399     case INDEX_op_divu:
5400     case INDEX_op_eqv:
5401     case INDEX_op_mul:
5402     case INDEX_op_mulsh:
5403     case INDEX_op_muluh:
5404     case INDEX_op_nand:
5405     case INDEX_op_nor:
5406     case INDEX_op_or:
5407     case INDEX_op_orc:
5408     case INDEX_op_rems:
5409     case INDEX_op_remu:
5410     case INDEX_op_rotl:
5411     case INDEX_op_rotr:
5412     case INDEX_op_sar:
5413     case INDEX_op_shl:
5414     case INDEX_op_shr:
5415     case INDEX_op_xor:
5416         {
5417             const TCGOutOpBinary *out =
5418                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5419 
5420             /* Constants should never appear in the first source operand. */
5421             tcg_debug_assert(!const_args[1]);
5422             if (const_args[2]) {
5423                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5424             } else {
5425                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5426             }
5427         }
5428         break;
5429 
5430     case INDEX_op_sub:
5431         {
5432             const TCGOutOpSubtract *out = &outop_sub;
5433 
5434             /*
5435              * Constants should never appear in the second source operand.
5436              * These are folded to add with negative constant.
5437              */
5438             tcg_debug_assert(!const_args[2]);
5439             if (const_args[1]) {
5440                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5441             } else {
5442                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5443             }
5444         }
5445         break;
5446 
5447     case INDEX_op_ctpop:
5448     case INDEX_op_neg:
5449     case INDEX_op_not:
5450         {
5451             const TCGOutOpUnary *out =
5452                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5453 
5454             /* Constants should have been folded. */
5455             tcg_debug_assert(!const_args[1]);
5456             out->out_rr(s, type, new_args[0], new_args[1]);
5457         }
5458         break;
5459 
5460     case INDEX_op_divs2:
5461     case INDEX_op_divu2:
5462         {
5463             const TCGOutOpDivRem *out =
5464                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5465 
5466             /* Only used by x86 and s390x, which use matching constraints. */
5467             tcg_debug_assert(new_args[0] == new_args[2]);
5468             tcg_debug_assert(new_args[1] == new_args[3]);
5469             tcg_debug_assert(!const_args[4]);
5470             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5471         }
5472         break;
5473 
5474     case INDEX_op_muls2:
5475     case INDEX_op_mulu2:
5476         {
5477             const TCGOutOpMul2 *out =
5478                 container_of(all_outop[op->opc], TCGOutOpMul2, base);
5479 
5480             tcg_debug_assert(!const_args[2]);
5481             tcg_debug_assert(!const_args[3]);
5482             out->out_rrrr(s, type, new_args[0], new_args[1],
5483                           new_args[2], new_args[3]);
5484         }
5485         break;
5486 
5487 
5488     default:
5489         if (def->flags & TCG_OPF_VECTOR) {
5490             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5491                            TCGOP_VECE(op), new_args, const_args);
5492         } else {
5493             tcg_out_op(s, op->opc, type, new_args, const_args);
5494         }
5495         break;
5496     }
5497 
5498     /* move the outputs in the correct register if needed */
5499     for(i = 0; i < nb_oargs; i++) {
5500         ts = arg_temp(op->args[i]);
5501 
5502         /* ENV should not be modified.  */
5503         tcg_debug_assert(!temp_readonly(ts));
5504 
5505         if (NEED_SYNC_ARG(i)) {
5506             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5507         } else if (IS_DEAD_ARG(i)) {
5508             temp_dead(s, ts);
5509         }
5510     }
5511 }
5512 
5513 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5514 {
5515     const TCGLifeData arg_life = op->life;
5516     TCGTemp *ots, *itsl, *itsh;
5517     TCGType vtype = TCGOP_TYPE(op);
5518 
5519     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5520     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5521     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5522 
5523     ots = arg_temp(op->args[0]);
5524     itsl = arg_temp(op->args[1]);
5525     itsh = arg_temp(op->args[2]);
5526 
5527     /* ENV should not be modified.  */
5528     tcg_debug_assert(!temp_readonly(ots));
5529 
5530     /* Allocate the output register now.  */
5531     if (ots->val_type != TEMP_VAL_REG) {
5532         TCGRegSet allocated_regs = s->reserved_regs;
5533         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5534         TCGReg oreg;
5535 
5536         /* Make sure to not spill the input registers. */
5537         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5538             tcg_regset_set_reg(allocated_regs, itsl->reg);
5539         }
5540         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5541             tcg_regset_set_reg(allocated_regs, itsh->reg);
5542         }
5543 
5544         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5545                              output_pref(op, 0), ots->indirect_base);
5546         set_temp_val_reg(s, ots, oreg);
5547     }
5548 
5549     /* Promote dup2 of immediates to dupi_vec. */
5550     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5551         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5552         MemOp vece = MO_64;
5553 
5554         if (val == dup_const(MO_8, val)) {
5555             vece = MO_8;
5556         } else if (val == dup_const(MO_16, val)) {
5557             vece = MO_16;
5558         } else if (val == dup_const(MO_32, val)) {
5559             vece = MO_32;
5560         }
5561 
5562         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5563         goto done;
5564     }
5565 
5566     /* If the two inputs form one 64-bit value, try dupm_vec. */
5567     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5568         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5569         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5570         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5571 
5572         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5573         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5574 
5575         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5576                              its->mem_base->reg, its->mem_offset)) {
5577             goto done;
5578         }
5579     }
5580 
5581     /* Fall back to generic expansion. */
5582     return false;
5583 
5584  done:
5585     ots->mem_coherent = 0;
5586     if (IS_DEAD_ARG(1)) {
5587         temp_dead(s, itsl);
5588     }
5589     if (IS_DEAD_ARG(2)) {
5590         temp_dead(s, itsh);
5591     }
5592     if (NEED_SYNC_ARG(0)) {
5593         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5594     } else if (IS_DEAD_ARG(0)) {
5595         temp_dead(s, ots);
5596     }
5597     return true;
5598 }
5599 
5600 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5601                          TCGRegSet allocated_regs)
5602 {
5603     if (ts->val_type == TEMP_VAL_REG) {
5604         if (ts->reg != reg) {
5605             tcg_reg_free(s, reg, allocated_regs);
5606             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5607                 /*
5608                  * Cross register class move not supported.  Sync the
5609                  * temp back to its slot and load from there.
5610                  */
5611                 temp_sync(s, ts, allocated_regs, 0, 0);
5612                 tcg_out_ld(s, ts->type, reg,
5613                            ts->mem_base->reg, ts->mem_offset);
5614             }
5615         }
5616     } else {
5617         TCGRegSet arg_set = 0;
5618 
5619         tcg_reg_free(s, reg, allocated_regs);
5620         tcg_regset_set_reg(arg_set, reg);
5621         temp_load(s, ts, arg_set, allocated_regs, 0);
5622     }
5623 }
5624 
5625 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5626                          TCGRegSet allocated_regs)
5627 {
5628     /*
5629      * When the destination is on the stack, load up the temp and store.
5630      * If there are many call-saved registers, the temp might live to
5631      * see another use; otherwise it'll be discarded.
5632      */
5633     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5634     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5635                arg_slot_stk_ofs(arg_slot));
5636 }
5637 
5638 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5639                             TCGTemp *ts, TCGRegSet *allocated_regs)
5640 {
5641     if (arg_slot_reg_p(l->arg_slot)) {
5642         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5643         load_arg_reg(s, reg, ts, *allocated_regs);
5644         tcg_regset_set_reg(*allocated_regs, reg);
5645     } else {
5646         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5647     }
5648 }
5649 
5650 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5651                          intptr_t ref_off, TCGRegSet *allocated_regs)
5652 {
5653     TCGReg reg;
5654 
5655     if (arg_slot_reg_p(arg_slot)) {
5656         reg = tcg_target_call_iarg_regs[arg_slot];
5657         tcg_reg_free(s, reg, *allocated_regs);
5658         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5659         tcg_regset_set_reg(*allocated_regs, reg);
5660     } else {
5661         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5662                             *allocated_regs, 0, false);
5663         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5664         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5665                    arg_slot_stk_ofs(arg_slot));
5666     }
5667 }
5668 
5669 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5670 {
5671     const int nb_oargs = TCGOP_CALLO(op);
5672     const int nb_iargs = TCGOP_CALLI(op);
5673     const TCGLifeData arg_life = op->life;
5674     const TCGHelperInfo *info = tcg_call_info(op);
5675     TCGRegSet allocated_regs = s->reserved_regs;
5676     int i;
5677 
5678     /*
5679      * Move inputs into place in reverse order,
5680      * so that we place stacked arguments first.
5681      */
5682     for (i = nb_iargs - 1; i >= 0; --i) {
5683         const TCGCallArgumentLoc *loc = &info->in[i];
5684         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5685 
5686         switch (loc->kind) {
5687         case TCG_CALL_ARG_NORMAL:
5688         case TCG_CALL_ARG_EXTEND_U:
5689         case TCG_CALL_ARG_EXTEND_S:
5690             load_arg_normal(s, loc, ts, &allocated_regs);
5691             break;
5692         case TCG_CALL_ARG_BY_REF:
5693             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5694             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5695                          arg_slot_stk_ofs(loc->ref_slot),
5696                          &allocated_regs);
5697             break;
5698         case TCG_CALL_ARG_BY_REF_N:
5699             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5700             break;
5701         default:
5702             g_assert_not_reached();
5703         }
5704     }
5705 
5706     /* Mark dead temporaries and free the associated registers.  */
5707     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5708         if (IS_DEAD_ARG(i)) {
5709             temp_dead(s, arg_temp(op->args[i]));
5710         }
5711     }
5712 
5713     /* Clobber call registers.  */
5714     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5715         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5716             tcg_reg_free(s, i, allocated_regs);
5717         }
5718     }
5719 
5720     /*
5721      * Save globals if they might be written by the helper,
5722      * sync them if they might be read.
5723      */
5724     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5725         /* Nothing to do */
5726     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5727         sync_globals(s, allocated_regs);
5728     } else {
5729         save_globals(s, allocated_regs);
5730     }
5731 
5732     /*
5733      * If the ABI passes a pointer to the returned struct as the first
5734      * argument, load that now.  Pass a pointer to the output home slot.
5735      */
5736     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5737         TCGTemp *ts = arg_temp(op->args[0]);
5738 
5739         if (!ts->mem_allocated) {
5740             temp_allocate_frame(s, ts);
5741         }
5742         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5743     }
5744 
5745     tcg_out_call(s, tcg_call_func(op), info);
5746 
5747     /* Assign output registers and emit moves if needed.  */
5748     switch (info->out_kind) {
5749     case TCG_CALL_RET_NORMAL:
5750         for (i = 0; i < nb_oargs; i++) {
5751             TCGTemp *ts = arg_temp(op->args[i]);
5752             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5753 
5754             /* ENV should not be modified.  */
5755             tcg_debug_assert(!temp_readonly(ts));
5756 
5757             set_temp_val_reg(s, ts, reg);
5758             ts->mem_coherent = 0;
5759         }
5760         break;
5761 
5762     case TCG_CALL_RET_BY_VEC:
5763         {
5764             TCGTemp *ts = arg_temp(op->args[0]);
5765 
5766             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5767             tcg_debug_assert(ts->temp_subindex == 0);
5768             if (!ts->mem_allocated) {
5769                 temp_allocate_frame(s, ts);
5770             }
5771             tcg_out_st(s, TCG_TYPE_V128,
5772                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5773                        ts->mem_base->reg, ts->mem_offset);
5774         }
5775         /* fall through to mark all parts in memory */
5776 
5777     case TCG_CALL_RET_BY_REF:
5778         /* The callee has performed a write through the reference. */
5779         for (i = 0; i < nb_oargs; i++) {
5780             TCGTemp *ts = arg_temp(op->args[i]);
5781             ts->val_type = TEMP_VAL_MEM;
5782         }
5783         break;
5784 
5785     default:
5786         g_assert_not_reached();
5787     }
5788 
5789     /* Flush or discard output registers as needed. */
5790     for (i = 0; i < nb_oargs; i++) {
5791         TCGTemp *ts = arg_temp(op->args[i]);
5792         if (NEED_SYNC_ARG(i)) {
5793             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5794         } else if (IS_DEAD_ARG(i)) {
5795             temp_dead(s, ts);
5796         }
5797     }
5798 }
5799 
5800 /**
5801  * atom_and_align_for_opc:
5802  * @s: tcg context
5803  * @opc: memory operation code
5804  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5805  * @allow_two_ops: true if we are prepared to issue two operations
5806  *
5807  * Return the alignment and atomicity to use for the inline fast path
5808  * for the given memory operation.  The alignment may be larger than
5809  * that specified in @opc, and the correct alignment will be diagnosed
5810  * by the slow path helper.
5811  *
5812  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5813  * and issue two loads or stores for subalignment.
5814  */
5815 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5816                                            MemOp host_atom, bool allow_two_ops)
5817 {
5818     MemOp align = memop_alignment_bits(opc);
5819     MemOp size = opc & MO_SIZE;
5820     MemOp half = size ? size - 1 : 0;
5821     MemOp atom = opc & MO_ATOM_MASK;
5822     MemOp atmax;
5823 
5824     switch (atom) {
5825     case MO_ATOM_NONE:
5826         /* The operation requires no specific atomicity. */
5827         atmax = MO_8;
5828         break;
5829 
5830     case MO_ATOM_IFALIGN:
5831         atmax = size;
5832         break;
5833 
5834     case MO_ATOM_IFALIGN_PAIR:
5835         atmax = half;
5836         break;
5837 
5838     case MO_ATOM_WITHIN16:
5839         atmax = size;
5840         if (size == MO_128) {
5841             /* Misalignment implies !within16, and therefore no atomicity. */
5842         } else if (host_atom != MO_ATOM_WITHIN16) {
5843             /* The host does not implement within16, so require alignment. */
5844             align = MAX(align, size);
5845         }
5846         break;
5847 
5848     case MO_ATOM_WITHIN16_PAIR:
5849         atmax = size;
5850         /*
5851          * Misalignment implies !within16, and therefore half atomicity.
5852          * Any host prepared for two operations can implement this with
5853          * half alignment.
5854          */
5855         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5856             align = MAX(align, half);
5857         }
5858         break;
5859 
5860     case MO_ATOM_SUBALIGN:
5861         atmax = size;
5862         if (host_atom != MO_ATOM_SUBALIGN) {
5863             /* If unaligned but not odd, there are subobjects up to half. */
5864             if (allow_two_ops) {
5865                 align = MAX(align, half);
5866             } else {
5867                 align = MAX(align, size);
5868             }
5869         }
5870         break;
5871 
5872     default:
5873         g_assert_not_reached();
5874     }
5875 
5876     return (TCGAtomAlign){ .atom = atmax, .align = align };
5877 }
5878 
5879 /*
5880  * Similarly for qemu_ld/st slow path helpers.
5881  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5882  * using only the provided backend tcg_out_* functions.
5883  */
5884 
5885 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5886 {
5887     int ofs = arg_slot_stk_ofs(slot);
5888 
5889     /*
5890      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5891      * require extension to uint64_t, adjust the address for uint32_t.
5892      */
5893     if (HOST_BIG_ENDIAN &&
5894         TCG_TARGET_REG_BITS == 64 &&
5895         type == TCG_TYPE_I32) {
5896         ofs += 4;
5897     }
5898     return ofs;
5899 }
5900 
5901 static void tcg_out_helper_load_slots(TCGContext *s,
5902                                       unsigned nmov, TCGMovExtend *mov,
5903                                       const TCGLdstHelperParam *parm)
5904 {
5905     unsigned i;
5906     TCGReg dst3;
5907 
5908     /*
5909      * Start from the end, storing to the stack first.
5910      * This frees those registers, so we need not consider overlap.
5911      */
5912     for (i = nmov; i-- > 0; ) {
5913         unsigned slot = mov[i].dst;
5914 
5915         if (arg_slot_reg_p(slot)) {
5916             goto found_reg;
5917         }
5918 
5919         TCGReg src = mov[i].src;
5920         TCGType dst_type = mov[i].dst_type;
5921         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5922 
5923         /* The argument is going onto the stack; extend into scratch. */
5924         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5925             tcg_debug_assert(parm->ntmp != 0);
5926             mov[i].dst = src = parm->tmp[0];
5927             tcg_out_movext1(s, &mov[i]);
5928         }
5929 
5930         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5931                    tcg_out_helper_stk_ofs(dst_type, slot));
5932     }
5933     return;
5934 
5935  found_reg:
5936     /*
5937      * The remaining arguments are in registers.
5938      * Convert slot numbers to argument registers.
5939      */
5940     nmov = i + 1;
5941     for (i = 0; i < nmov; ++i) {
5942         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5943     }
5944 
5945     switch (nmov) {
5946     case 4:
5947         /* The backend must have provided enough temps for the worst case. */
5948         tcg_debug_assert(parm->ntmp >= 2);
5949 
5950         dst3 = mov[3].dst;
5951         for (unsigned j = 0; j < 3; ++j) {
5952             if (dst3 == mov[j].src) {
5953                 /*
5954                  * Conflict. Copy the source to a temporary, perform the
5955                  * remaining moves, then the extension from our scratch
5956                  * on the way out.
5957                  */
5958                 TCGReg scratch = parm->tmp[1];
5959 
5960                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5961                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5962                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5963                 break;
5964             }
5965         }
5966 
5967         /* No conflicts: perform this move and continue. */
5968         tcg_out_movext1(s, &mov[3]);
5969         /* fall through */
5970 
5971     case 3:
5972         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5973                         parm->ntmp ? parm->tmp[0] : -1);
5974         break;
5975     case 2:
5976         tcg_out_movext2(s, mov, mov + 1,
5977                         parm->ntmp ? parm->tmp[0] : -1);
5978         break;
5979     case 1:
5980         tcg_out_movext1(s, mov);
5981         break;
5982     default:
5983         g_assert_not_reached();
5984     }
5985 }
5986 
5987 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5988                                     TCGType type, tcg_target_long imm,
5989                                     const TCGLdstHelperParam *parm)
5990 {
5991     if (arg_slot_reg_p(slot)) {
5992         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5993     } else {
5994         int ofs = tcg_out_helper_stk_ofs(type, slot);
5995         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5996             tcg_debug_assert(parm->ntmp != 0);
5997             tcg_out_movi(s, type, parm->tmp[0], imm);
5998             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5999         }
6000     }
6001 }
6002 
6003 static void tcg_out_helper_load_common_args(TCGContext *s,
6004                                             const TCGLabelQemuLdst *ldst,
6005                                             const TCGLdstHelperParam *parm,
6006                                             const TCGHelperInfo *info,
6007                                             unsigned next_arg)
6008 {
6009     TCGMovExtend ptr_mov = {
6010         .dst_type = TCG_TYPE_PTR,
6011         .src_type = TCG_TYPE_PTR,
6012         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6013     };
6014     const TCGCallArgumentLoc *loc = &info->in[0];
6015     TCGType type;
6016     unsigned slot;
6017     tcg_target_ulong imm;
6018 
6019     /*
6020      * Handle env, which is always first.
6021      */
6022     ptr_mov.dst = loc->arg_slot;
6023     ptr_mov.src = TCG_AREG0;
6024     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6025 
6026     /*
6027      * Handle oi.
6028      */
6029     imm = ldst->oi;
6030     loc = &info->in[next_arg];
6031     type = TCG_TYPE_I32;
6032     switch (loc->kind) {
6033     case TCG_CALL_ARG_NORMAL:
6034         break;
6035     case TCG_CALL_ARG_EXTEND_U:
6036     case TCG_CALL_ARG_EXTEND_S:
6037         /* No extension required for MemOpIdx. */
6038         tcg_debug_assert(imm <= INT32_MAX);
6039         type = TCG_TYPE_REG;
6040         break;
6041     default:
6042         g_assert_not_reached();
6043     }
6044     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6045     next_arg++;
6046 
6047     /*
6048      * Handle ra.
6049      */
6050     loc = &info->in[next_arg];
6051     slot = loc->arg_slot;
6052     if (parm->ra_gen) {
6053         int arg_reg = -1;
6054         TCGReg ra_reg;
6055 
6056         if (arg_slot_reg_p(slot)) {
6057             arg_reg = tcg_target_call_iarg_regs[slot];
6058         }
6059         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6060 
6061         ptr_mov.dst = slot;
6062         ptr_mov.src = ra_reg;
6063         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6064     } else {
6065         imm = (uintptr_t)ldst->raddr;
6066         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6067     }
6068 }
6069 
6070 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6071                                        const TCGCallArgumentLoc *loc,
6072                                        TCGType dst_type, TCGType src_type,
6073                                        TCGReg lo, TCGReg hi)
6074 {
6075     MemOp reg_mo;
6076 
6077     if (dst_type <= TCG_TYPE_REG) {
6078         MemOp src_ext;
6079 
6080         switch (loc->kind) {
6081         case TCG_CALL_ARG_NORMAL:
6082             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6083             break;
6084         case TCG_CALL_ARG_EXTEND_U:
6085             dst_type = TCG_TYPE_REG;
6086             src_ext = MO_UL;
6087             break;
6088         case TCG_CALL_ARG_EXTEND_S:
6089             dst_type = TCG_TYPE_REG;
6090             src_ext = MO_SL;
6091             break;
6092         default:
6093             g_assert_not_reached();
6094         }
6095 
6096         mov[0].dst = loc->arg_slot;
6097         mov[0].dst_type = dst_type;
6098         mov[0].src = lo;
6099         mov[0].src_type = src_type;
6100         mov[0].src_ext = src_ext;
6101         return 1;
6102     }
6103 
6104     if (TCG_TARGET_REG_BITS == 32) {
6105         assert(dst_type == TCG_TYPE_I64);
6106         reg_mo = MO_32;
6107     } else {
6108         assert(dst_type == TCG_TYPE_I128);
6109         reg_mo = MO_64;
6110     }
6111 
6112     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6113     mov[0].src = lo;
6114     mov[0].dst_type = TCG_TYPE_REG;
6115     mov[0].src_type = TCG_TYPE_REG;
6116     mov[0].src_ext = reg_mo;
6117 
6118     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6119     mov[1].src = hi;
6120     mov[1].dst_type = TCG_TYPE_REG;
6121     mov[1].src_type = TCG_TYPE_REG;
6122     mov[1].src_ext = reg_mo;
6123 
6124     return 2;
6125 }
6126 
6127 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6128                                    const TCGLdstHelperParam *parm)
6129 {
6130     const TCGHelperInfo *info;
6131     const TCGCallArgumentLoc *loc;
6132     TCGMovExtend mov[2];
6133     unsigned next_arg, nmov;
6134     MemOp mop = get_memop(ldst->oi);
6135 
6136     switch (mop & MO_SIZE) {
6137     case MO_8:
6138     case MO_16:
6139     case MO_32:
6140         info = &info_helper_ld32_mmu;
6141         break;
6142     case MO_64:
6143         info = &info_helper_ld64_mmu;
6144         break;
6145     case MO_128:
6146         info = &info_helper_ld128_mmu;
6147         break;
6148     default:
6149         g_assert_not_reached();
6150     }
6151 
6152     /* Defer env argument. */
6153     next_arg = 1;
6154 
6155     loc = &info->in[next_arg];
6156     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6157         /*
6158          * 32-bit host with 32-bit guest: zero-extend the guest address
6159          * to 64-bits for the helper by storing the low part, then
6160          * load a zero for the high part.
6161          */
6162         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6163                                TCG_TYPE_I32, TCG_TYPE_I32,
6164                                ldst->addr_reg, -1);
6165         tcg_out_helper_load_slots(s, 1, mov, parm);
6166 
6167         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6168                                 TCG_TYPE_I32, 0, parm);
6169         next_arg += 2;
6170     } else {
6171         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6172                                       ldst->addr_reg, -1);
6173         tcg_out_helper_load_slots(s, nmov, mov, parm);
6174         next_arg += nmov;
6175     }
6176 
6177     switch (info->out_kind) {
6178     case TCG_CALL_RET_NORMAL:
6179     case TCG_CALL_RET_BY_VEC:
6180         break;
6181     case TCG_CALL_RET_BY_REF:
6182         /*
6183          * The return reference is in the first argument slot.
6184          * We need memory in which to return: re-use the top of stack.
6185          */
6186         {
6187             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6188 
6189             if (arg_slot_reg_p(0)) {
6190                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6191                                  TCG_REG_CALL_STACK, ofs_slot0);
6192             } else {
6193                 tcg_debug_assert(parm->ntmp != 0);
6194                 tcg_out_addi_ptr(s, parm->tmp[0],
6195                                  TCG_REG_CALL_STACK, ofs_slot0);
6196                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6197                            TCG_REG_CALL_STACK, ofs_slot0);
6198             }
6199         }
6200         break;
6201     default:
6202         g_assert_not_reached();
6203     }
6204 
6205     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6206 }
6207 
6208 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6209                                   bool load_sign,
6210                                   const TCGLdstHelperParam *parm)
6211 {
6212     MemOp mop = get_memop(ldst->oi);
6213     TCGMovExtend mov[2];
6214     int ofs_slot0;
6215 
6216     switch (ldst->type) {
6217     case TCG_TYPE_I64:
6218         if (TCG_TARGET_REG_BITS == 32) {
6219             break;
6220         }
6221         /* fall through */
6222 
6223     case TCG_TYPE_I32:
6224         mov[0].dst = ldst->datalo_reg;
6225         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6226         mov[0].dst_type = ldst->type;
6227         mov[0].src_type = TCG_TYPE_REG;
6228 
6229         /*
6230          * If load_sign, then we allowed the helper to perform the
6231          * appropriate sign extension to tcg_target_ulong, and all
6232          * we need now is a plain move.
6233          *
6234          * If they do not, then we expect the relevant extension
6235          * instruction to be no more expensive than a move, and
6236          * we thus save the icache etc by only using one of two
6237          * helper functions.
6238          */
6239         if (load_sign || !(mop & MO_SIGN)) {
6240             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6241                 mov[0].src_ext = MO_32;
6242             } else {
6243                 mov[0].src_ext = MO_64;
6244             }
6245         } else {
6246             mov[0].src_ext = mop & MO_SSIZE;
6247         }
6248         tcg_out_movext1(s, mov);
6249         return;
6250 
6251     case TCG_TYPE_I128:
6252         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6253         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6254         switch (TCG_TARGET_CALL_RET_I128) {
6255         case TCG_CALL_RET_NORMAL:
6256             break;
6257         case TCG_CALL_RET_BY_VEC:
6258             tcg_out_st(s, TCG_TYPE_V128,
6259                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6260                        TCG_REG_CALL_STACK, ofs_slot0);
6261             /* fall through */
6262         case TCG_CALL_RET_BY_REF:
6263             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6264                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6265             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6266                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6267             return;
6268         default:
6269             g_assert_not_reached();
6270         }
6271         break;
6272 
6273     default:
6274         g_assert_not_reached();
6275     }
6276 
6277     mov[0].dst = ldst->datalo_reg;
6278     mov[0].src =
6279         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6280     mov[0].dst_type = TCG_TYPE_REG;
6281     mov[0].src_type = TCG_TYPE_REG;
6282     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6283 
6284     mov[1].dst = ldst->datahi_reg;
6285     mov[1].src =
6286         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6287     mov[1].dst_type = TCG_TYPE_REG;
6288     mov[1].src_type = TCG_TYPE_REG;
6289     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6290 
6291     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6292 }
6293 
6294 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6295                                    const TCGLdstHelperParam *parm)
6296 {
6297     const TCGHelperInfo *info;
6298     const TCGCallArgumentLoc *loc;
6299     TCGMovExtend mov[4];
6300     TCGType data_type;
6301     unsigned next_arg, nmov, n;
6302     MemOp mop = get_memop(ldst->oi);
6303 
6304     switch (mop & MO_SIZE) {
6305     case MO_8:
6306     case MO_16:
6307     case MO_32:
6308         info = &info_helper_st32_mmu;
6309         data_type = TCG_TYPE_I32;
6310         break;
6311     case MO_64:
6312         info = &info_helper_st64_mmu;
6313         data_type = TCG_TYPE_I64;
6314         break;
6315     case MO_128:
6316         info = &info_helper_st128_mmu;
6317         data_type = TCG_TYPE_I128;
6318         break;
6319     default:
6320         g_assert_not_reached();
6321     }
6322 
6323     /* Defer env argument. */
6324     next_arg = 1;
6325     nmov = 0;
6326 
6327     /* Handle addr argument. */
6328     loc = &info->in[next_arg];
6329     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6330     if (TCG_TARGET_REG_BITS == 32) {
6331         /*
6332          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6333          * to 64-bits for the helper by storing the low part.  Later,
6334          * after we have processed the register inputs, we will load a
6335          * zero for the high part.
6336          */
6337         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6338                                TCG_TYPE_I32, TCG_TYPE_I32,
6339                                ldst->addr_reg, -1);
6340         next_arg += 2;
6341         nmov += 1;
6342     } else {
6343         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6344                                    ldst->addr_reg, -1);
6345         next_arg += n;
6346         nmov += n;
6347     }
6348 
6349     /* Handle data argument. */
6350     loc = &info->in[next_arg];
6351     switch (loc->kind) {
6352     case TCG_CALL_ARG_NORMAL:
6353     case TCG_CALL_ARG_EXTEND_U:
6354     case TCG_CALL_ARG_EXTEND_S:
6355         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6356                                    ldst->datalo_reg, ldst->datahi_reg);
6357         next_arg += n;
6358         nmov += n;
6359         tcg_out_helper_load_slots(s, nmov, mov, parm);
6360         break;
6361 
6362     case TCG_CALL_ARG_BY_REF:
6363         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6364         tcg_debug_assert(data_type == TCG_TYPE_I128);
6365         tcg_out_st(s, TCG_TYPE_I64,
6366                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6367                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6368         tcg_out_st(s, TCG_TYPE_I64,
6369                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6370                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6371 
6372         tcg_out_helper_load_slots(s, nmov, mov, parm);
6373 
6374         if (arg_slot_reg_p(loc->arg_slot)) {
6375             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6376                              TCG_REG_CALL_STACK,
6377                              arg_slot_stk_ofs(loc->ref_slot));
6378         } else {
6379             tcg_debug_assert(parm->ntmp != 0);
6380             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6381                              arg_slot_stk_ofs(loc->ref_slot));
6382             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6383                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6384         }
6385         next_arg += 2;
6386         break;
6387 
6388     default:
6389         g_assert_not_reached();
6390     }
6391 
6392     if (TCG_TARGET_REG_BITS == 32) {
6393         /* Zero extend the address by loading a zero for the high part. */
6394         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6395         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6396     }
6397 
6398     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6399 }
6400 
6401 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6402 {
6403     int i, start_words, num_insns;
6404     TCGOp *op;
6405 
6406     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6407                  && qemu_log_in_addr_range(pc_start))) {
6408         FILE *logfile = qemu_log_trylock();
6409         if (logfile) {
6410             fprintf(logfile, "OP:\n");
6411             tcg_dump_ops(s, logfile, false);
6412             fprintf(logfile, "\n");
6413             qemu_log_unlock(logfile);
6414         }
6415     }
6416 
6417 #ifdef CONFIG_DEBUG_TCG
6418     /* Ensure all labels referenced have been emitted.  */
6419     {
6420         TCGLabel *l;
6421         bool error = false;
6422 
6423         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6424             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6425                 qemu_log_mask(CPU_LOG_TB_OP,
6426                               "$L%d referenced but not present.\n", l->id);
6427                 error = true;
6428             }
6429         }
6430         assert(!error);
6431     }
6432 #endif
6433 
6434     /* Do not reuse any EBB that may be allocated within the TB. */
6435     tcg_temp_ebb_reset_freed(s);
6436 
6437     tcg_optimize(s);
6438 
6439     reachable_code_pass(s);
6440     liveness_pass_0(s);
6441     liveness_pass_1(s);
6442 
6443     if (s->nb_indirects > 0) {
6444         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6445                      && qemu_log_in_addr_range(pc_start))) {
6446             FILE *logfile = qemu_log_trylock();
6447             if (logfile) {
6448                 fprintf(logfile, "OP before indirect lowering:\n");
6449                 tcg_dump_ops(s, logfile, false);
6450                 fprintf(logfile, "\n");
6451                 qemu_log_unlock(logfile);
6452             }
6453         }
6454 
6455         /* Replace indirect temps with direct temps.  */
6456         if (liveness_pass_2(s)) {
6457             /* If changes were made, re-run liveness.  */
6458             liveness_pass_1(s);
6459         }
6460     }
6461 
6462     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6463                  && qemu_log_in_addr_range(pc_start))) {
6464         FILE *logfile = qemu_log_trylock();
6465         if (logfile) {
6466             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6467             tcg_dump_ops(s, logfile, true);
6468             fprintf(logfile, "\n");
6469             qemu_log_unlock(logfile);
6470         }
6471     }
6472 
6473     /* Initialize goto_tb jump offsets. */
6474     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6475     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6476     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6477     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6478 
6479     tcg_reg_alloc_start(s);
6480 
6481     /*
6482      * Reset the buffer pointers when restarting after overflow.
6483      * TODO: Move this into translate-all.c with the rest of the
6484      * buffer management.  Having only this done here is confusing.
6485      */
6486     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6487     s->code_ptr = s->code_buf;
6488     s->data_gen_ptr = NULL;
6489 
6490     QSIMPLEQ_INIT(&s->ldst_labels);
6491     s->pool_labels = NULL;
6492 
6493     start_words = s->insn_start_words;
6494     s->gen_insn_data =
6495         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6496 
6497     tcg_out_tb_start(s);
6498 
6499     num_insns = -1;
6500     QTAILQ_FOREACH(op, &s->ops, link) {
6501         TCGOpcode opc = op->opc;
6502 
6503         switch (opc) {
6504         case INDEX_op_mov:
6505         case INDEX_op_mov_vec:
6506             tcg_reg_alloc_mov(s, op);
6507             break;
6508         case INDEX_op_dup_vec:
6509             tcg_reg_alloc_dup(s, op);
6510             break;
6511         case INDEX_op_insn_start:
6512             if (num_insns >= 0) {
6513                 size_t off = tcg_current_code_size(s);
6514                 s->gen_insn_end_off[num_insns] = off;
6515                 /* Assert that we do not overflow our stored offset.  */
6516                 assert(s->gen_insn_end_off[num_insns] == off);
6517             }
6518             num_insns++;
6519             for (i = 0; i < start_words; ++i) {
6520                 s->gen_insn_data[num_insns * start_words + i] =
6521                     tcg_get_insn_start_param(op, i);
6522             }
6523             break;
6524         case INDEX_op_discard:
6525             temp_dead(s, arg_temp(op->args[0]));
6526             break;
6527         case INDEX_op_set_label:
6528             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6529             tcg_out_label(s, arg_label(op->args[0]));
6530             break;
6531         case INDEX_op_call:
6532             tcg_reg_alloc_call(s, op);
6533             break;
6534         case INDEX_op_exit_tb:
6535             tcg_out_exit_tb(s, op->args[0]);
6536             break;
6537         case INDEX_op_goto_tb:
6538             tcg_out_goto_tb(s, op->args[0]);
6539             break;
6540         case INDEX_op_dup2_vec:
6541             if (tcg_reg_alloc_dup2(s, op)) {
6542                 break;
6543             }
6544             /* fall through */
6545         default:
6546             /* Sanity check that we've not introduced any unhandled opcodes. */
6547             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6548                                               TCGOP_FLAGS(op)));
6549             /* Note: in order to speed up the code, it would be much
6550                faster to have specialized register allocator functions for
6551                some common argument patterns */
6552             tcg_reg_alloc_op(s, op);
6553             break;
6554         }
6555         /* Test for (pending) buffer overflow.  The assumption is that any
6556            one operation beginning below the high water mark cannot overrun
6557            the buffer completely.  Thus we can test for overflow after
6558            generating code without having to check during generation.  */
6559         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6560             return -1;
6561         }
6562         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6563         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6564             return -2;
6565         }
6566     }
6567     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6568     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6569 
6570     /* Generate TB finalization at the end of block */
6571     i = tcg_out_ldst_finalize(s);
6572     if (i < 0) {
6573         return i;
6574     }
6575     i = tcg_out_pool_finalize(s);
6576     if (i < 0) {
6577         return i;
6578     }
6579     if (!tcg_resolve_relocs(s)) {
6580         return -2;
6581     }
6582 
6583 #ifndef CONFIG_TCG_INTERPRETER
6584     /* flush instruction cache */
6585     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6586                         (uintptr_t)s->code_buf,
6587                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6588 #endif
6589 
6590     return tcg_current_code_size(s);
6591 }
6592 
6593 #ifdef ELF_HOST_MACHINE
6594 /* In order to use this feature, the backend needs to do three things:
6595 
6596    (1) Define ELF_HOST_MACHINE to indicate both what value to
6597        put into the ELF image and to indicate support for the feature.
6598 
6599    (2) Define tcg_register_jit.  This should create a buffer containing
6600        the contents of a .debug_frame section that describes the post-
6601        prologue unwind info for the tcg machine.
6602 
6603    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6604 */
6605 
6606 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6607 typedef enum {
6608     JIT_NOACTION = 0,
6609     JIT_REGISTER_FN,
6610     JIT_UNREGISTER_FN
6611 } jit_actions_t;
6612 
6613 struct jit_code_entry {
6614     struct jit_code_entry *next_entry;
6615     struct jit_code_entry *prev_entry;
6616     const void *symfile_addr;
6617     uint64_t symfile_size;
6618 };
6619 
6620 struct jit_descriptor {
6621     uint32_t version;
6622     uint32_t action_flag;
6623     struct jit_code_entry *relevant_entry;
6624     struct jit_code_entry *first_entry;
6625 };
6626 
6627 void __jit_debug_register_code(void) __attribute__((noinline));
6628 void __jit_debug_register_code(void)
6629 {
6630     asm("");
6631 }
6632 
6633 /* Must statically initialize the version, because GDB may check
6634    the version before we can set it.  */
6635 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6636 
6637 /* End GDB interface.  */
6638 
6639 static int find_string(const char *strtab, const char *str)
6640 {
6641     const char *p = strtab + 1;
6642 
6643     while (1) {
6644         if (strcmp(p, str) == 0) {
6645             return p - strtab;
6646         }
6647         p += strlen(p) + 1;
6648     }
6649 }
6650 
6651 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6652                                  const void *debug_frame,
6653                                  size_t debug_frame_size)
6654 {
6655     struct __attribute__((packed)) DebugInfo {
6656         uint32_t  len;
6657         uint16_t  version;
6658         uint32_t  abbrev;
6659         uint8_t   ptr_size;
6660         uint8_t   cu_die;
6661         uint16_t  cu_lang;
6662         uintptr_t cu_low_pc;
6663         uintptr_t cu_high_pc;
6664         uint8_t   fn_die;
6665         char      fn_name[16];
6666         uintptr_t fn_low_pc;
6667         uintptr_t fn_high_pc;
6668         uint8_t   cu_eoc;
6669     };
6670 
6671     struct ElfImage {
6672         ElfW(Ehdr) ehdr;
6673         ElfW(Phdr) phdr;
6674         ElfW(Shdr) shdr[7];
6675         ElfW(Sym)  sym[2];
6676         struct DebugInfo di;
6677         uint8_t    da[24];
6678         char       str[80];
6679     };
6680 
6681     struct ElfImage *img;
6682 
6683     static const struct ElfImage img_template = {
6684         .ehdr = {
6685             .e_ident[EI_MAG0] = ELFMAG0,
6686             .e_ident[EI_MAG1] = ELFMAG1,
6687             .e_ident[EI_MAG2] = ELFMAG2,
6688             .e_ident[EI_MAG3] = ELFMAG3,
6689             .e_ident[EI_CLASS] = ELF_CLASS,
6690             .e_ident[EI_DATA] = ELF_DATA,
6691             .e_ident[EI_VERSION] = EV_CURRENT,
6692             .e_type = ET_EXEC,
6693             .e_machine = ELF_HOST_MACHINE,
6694             .e_version = EV_CURRENT,
6695             .e_phoff = offsetof(struct ElfImage, phdr),
6696             .e_shoff = offsetof(struct ElfImage, shdr),
6697             .e_ehsize = sizeof(ElfW(Shdr)),
6698             .e_phentsize = sizeof(ElfW(Phdr)),
6699             .e_phnum = 1,
6700             .e_shentsize = sizeof(ElfW(Shdr)),
6701             .e_shnum = ARRAY_SIZE(img->shdr),
6702             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6703 #ifdef ELF_HOST_FLAGS
6704             .e_flags = ELF_HOST_FLAGS,
6705 #endif
6706 #ifdef ELF_OSABI
6707             .e_ident[EI_OSABI] = ELF_OSABI,
6708 #endif
6709         },
6710         .phdr = {
6711             .p_type = PT_LOAD,
6712             .p_flags = PF_X,
6713         },
6714         .shdr = {
6715             [0] = { .sh_type = SHT_NULL },
6716             /* Trick: The contents of code_gen_buffer are not present in
6717                this fake ELF file; that got allocated elsewhere.  Therefore
6718                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6719                will not look for contents.  We can record any address.  */
6720             [1] = { /* .text */
6721                 .sh_type = SHT_NOBITS,
6722                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6723             },
6724             [2] = { /* .debug_info */
6725                 .sh_type = SHT_PROGBITS,
6726                 .sh_offset = offsetof(struct ElfImage, di),
6727                 .sh_size = sizeof(struct DebugInfo),
6728             },
6729             [3] = { /* .debug_abbrev */
6730                 .sh_type = SHT_PROGBITS,
6731                 .sh_offset = offsetof(struct ElfImage, da),
6732                 .sh_size = sizeof(img->da),
6733             },
6734             [4] = { /* .debug_frame */
6735                 .sh_type = SHT_PROGBITS,
6736                 .sh_offset = sizeof(struct ElfImage),
6737             },
6738             [5] = { /* .symtab */
6739                 .sh_type = SHT_SYMTAB,
6740                 .sh_offset = offsetof(struct ElfImage, sym),
6741                 .sh_size = sizeof(img->sym),
6742                 .sh_info = 1,
6743                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6744                 .sh_entsize = sizeof(ElfW(Sym)),
6745             },
6746             [6] = { /* .strtab */
6747                 .sh_type = SHT_STRTAB,
6748                 .sh_offset = offsetof(struct ElfImage, str),
6749                 .sh_size = sizeof(img->str),
6750             }
6751         },
6752         .sym = {
6753             [1] = { /* code_gen_buffer */
6754                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6755                 .st_shndx = 1,
6756             }
6757         },
6758         .di = {
6759             .len = sizeof(struct DebugInfo) - 4,
6760             .version = 2,
6761             .ptr_size = sizeof(void *),
6762             .cu_die = 1,
6763             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6764             .fn_die = 2,
6765             .fn_name = "code_gen_buffer"
6766         },
6767         .da = {
6768             1,          /* abbrev number (the cu) */
6769             0x11, 1,    /* DW_TAG_compile_unit, has children */
6770             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6771             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6772             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6773             0, 0,       /* end of abbrev */
6774             2,          /* abbrev number (the fn) */
6775             0x2e, 0,    /* DW_TAG_subprogram, no children */
6776             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6777             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6778             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6779             0, 0,       /* end of abbrev */
6780             0           /* no more abbrev */
6781         },
6782         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6783                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6784     };
6785 
6786     /* We only need a single jit entry; statically allocate it.  */
6787     static struct jit_code_entry one_entry;
6788 
6789     uintptr_t buf = (uintptr_t)buf_ptr;
6790     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6791     DebugFrameHeader *dfh;
6792 
6793     img = g_malloc(img_size);
6794     *img = img_template;
6795 
6796     img->phdr.p_vaddr = buf;
6797     img->phdr.p_paddr = buf;
6798     img->phdr.p_memsz = buf_size;
6799 
6800     img->shdr[1].sh_name = find_string(img->str, ".text");
6801     img->shdr[1].sh_addr = buf;
6802     img->shdr[1].sh_size = buf_size;
6803 
6804     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6805     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6806 
6807     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6808     img->shdr[4].sh_size = debug_frame_size;
6809 
6810     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6811     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6812 
6813     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6814     img->sym[1].st_value = buf;
6815     img->sym[1].st_size = buf_size;
6816 
6817     img->di.cu_low_pc = buf;
6818     img->di.cu_high_pc = buf + buf_size;
6819     img->di.fn_low_pc = buf;
6820     img->di.fn_high_pc = buf + buf_size;
6821 
6822     dfh = (DebugFrameHeader *)(img + 1);
6823     memcpy(dfh, debug_frame, debug_frame_size);
6824     dfh->fde.func_start = buf;
6825     dfh->fde.func_len = buf_size;
6826 
6827 #ifdef DEBUG_JIT
6828     /* Enable this block to be able to debug the ELF image file creation.
6829        One can use readelf, objdump, or other inspection utilities.  */
6830     {
6831         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6832         FILE *f = fopen(jit, "w+b");
6833         if (f) {
6834             if (fwrite(img, img_size, 1, f) != img_size) {
6835                 /* Avoid stupid unused return value warning for fwrite.  */
6836             }
6837             fclose(f);
6838         }
6839     }
6840 #endif
6841 
6842     one_entry.symfile_addr = img;
6843     one_entry.symfile_size = img_size;
6844 
6845     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6846     __jit_debug_descriptor.relevant_entry = &one_entry;
6847     __jit_debug_descriptor.first_entry = &one_entry;
6848     __jit_debug_register_code();
6849 }
6850 #else
6851 /* No support for the feature.  Provide the entry point expected by exec.c,
6852    and implement the internal function we declared earlier.  */
6853 
6854 static void tcg_register_jit_int(const void *buf, size_t size,
6855                                  const void *debug_frame,
6856                                  size_t debug_frame_size)
6857 {
6858 }
6859 
6860 void tcg_register_jit(const void *buf, size_t buf_size)
6861 {
6862 }
6863 #endif /* ELF_HOST_MACHINE */
6864 
6865 #if !TCG_TARGET_MAYBE_vec
6866 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6867 {
6868     g_assert_not_reached();
6869 }
6870 #endif
6871