xref: /openbmc/qemu/tcg/tcg.c (revision 59379a45af1f4d62fc8c1ae0ddee988f47075787)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 #include "tcg-target.c.inc"
990 
991 #ifndef CONFIG_TCG_INTERPRETER
992 /* Validate CPUTLBDescFast placement. */
993 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
994                         sizeof(CPUNegativeOffsetState))
995                   < MIN_TLB_MASK_TABLE_OFS);
996 #endif
997 
998 /*
999  * Register V as the TCGOutOp for O.
1000  * This verifies that V is of type T, otherwise give a nice compiler error.
1001  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1002  */
1003 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1004 
1005 /* Register allocation descriptions for every TCGOpcode. */
1006 static const TCGOutOp * const all_outop[NB_OPS] = {
1007     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1008     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1009     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1010     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1011     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1012     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1013     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1014     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1015 };
1016 
1017 #undef OUTOP
1018 
1019 /*
1020  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1021  * and registered the target's TCG globals) must register with this function
1022  * before initiating translation.
1023  *
1024  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1025  * of tcg_region_init() for the reasoning behind this.
1026  *
1027  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1028  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1029  * is not used anymore for translation once this function is called.
1030  *
1031  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1032  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1033  * modes.
1034  */
1035 #ifdef CONFIG_USER_ONLY
1036 void tcg_register_thread(void)
1037 {
1038     tcg_ctx = &tcg_init_ctx;
1039 }
1040 #else
1041 void tcg_register_thread(void)
1042 {
1043     TCGContext *s = g_malloc(sizeof(*s));
1044     unsigned int i, n;
1045 
1046     *s = tcg_init_ctx;
1047 
1048     /* Relink mem_base.  */
1049     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1050         if (tcg_init_ctx.temps[i].mem_base) {
1051             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1052             tcg_debug_assert(b >= 0 && b < n);
1053             s->temps[i].mem_base = &s->temps[b];
1054         }
1055     }
1056 
1057     /* Claim an entry in tcg_ctxs */
1058     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1059     g_assert(n < tcg_max_ctxs);
1060     qatomic_set(&tcg_ctxs[n], s);
1061 
1062     if (n > 0) {
1063         tcg_region_initial_alloc(s);
1064     }
1065 
1066     tcg_ctx = s;
1067 }
1068 #endif /* !CONFIG_USER_ONLY */
1069 
1070 /* pool based memory allocation */
1071 void *tcg_malloc_internal(TCGContext *s, int size)
1072 {
1073     TCGPool *p;
1074     int pool_size;
1075 
1076     if (size > TCG_POOL_CHUNK_SIZE) {
1077         /* big malloc: insert a new pool (XXX: could optimize) */
1078         p = g_malloc(sizeof(TCGPool) + size);
1079         p->size = size;
1080         p->next = s->pool_first_large;
1081         s->pool_first_large = p;
1082         return p->data;
1083     } else {
1084         p = s->pool_current;
1085         if (!p) {
1086             p = s->pool_first;
1087             if (!p)
1088                 goto new_pool;
1089         } else {
1090             if (!p->next) {
1091             new_pool:
1092                 pool_size = TCG_POOL_CHUNK_SIZE;
1093                 p = g_malloc(sizeof(TCGPool) + pool_size);
1094                 p->size = pool_size;
1095                 p->next = NULL;
1096                 if (s->pool_current) {
1097                     s->pool_current->next = p;
1098                 } else {
1099                     s->pool_first = p;
1100                 }
1101             } else {
1102                 p = p->next;
1103             }
1104         }
1105     }
1106     s->pool_current = p;
1107     s->pool_cur = p->data + size;
1108     s->pool_end = p->data + p->size;
1109     return p->data;
1110 }
1111 
1112 void tcg_pool_reset(TCGContext *s)
1113 {
1114     TCGPool *p, *t;
1115     for (p = s->pool_first_large; p; p = t) {
1116         t = p->next;
1117         g_free(p);
1118     }
1119     s->pool_first_large = NULL;
1120     s->pool_cur = s->pool_end = NULL;
1121     s->pool_current = NULL;
1122 }
1123 
1124 /*
1125  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1126  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1127  * We only use these for layout in tcg_out_ld_helper_ret and
1128  * tcg_out_st_helper_args, and share them between several of
1129  * the helpers, with the end result that it's easier to build manually.
1130  */
1131 
1132 #if TCG_TARGET_REG_BITS == 32
1133 # define dh_typecode_ttl  dh_typecode_i32
1134 #else
1135 # define dh_typecode_ttl  dh_typecode_i64
1136 #endif
1137 
1138 static TCGHelperInfo info_helper_ld32_mmu = {
1139     .flags = TCG_CALL_NO_WG,
1140     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1141               | dh_typemask(env, 1)
1142               | dh_typemask(i64, 2)  /* uint64_t addr */
1143               | dh_typemask(i32, 3)  /* unsigned oi */
1144               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1145 };
1146 
1147 static TCGHelperInfo info_helper_ld64_mmu = {
1148     .flags = TCG_CALL_NO_WG,
1149     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1150               | dh_typemask(env, 1)
1151               | dh_typemask(i64, 2)  /* uint64_t addr */
1152               | dh_typemask(i32, 3)  /* unsigned oi */
1153               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1154 };
1155 
1156 static TCGHelperInfo info_helper_ld128_mmu = {
1157     .flags = TCG_CALL_NO_WG,
1158     .typemask = dh_typemask(i128, 0) /* return Int128 */
1159               | dh_typemask(env, 1)
1160               | dh_typemask(i64, 2)  /* uint64_t addr */
1161               | dh_typemask(i32, 3)  /* unsigned oi */
1162               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1163 };
1164 
1165 static TCGHelperInfo info_helper_st32_mmu = {
1166     .flags = TCG_CALL_NO_WG,
1167     .typemask = dh_typemask(void, 0)
1168               | dh_typemask(env, 1)
1169               | dh_typemask(i64, 2)  /* uint64_t addr */
1170               | dh_typemask(i32, 3)  /* uint32_t data */
1171               | dh_typemask(i32, 4)  /* unsigned oi */
1172               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1173 };
1174 
1175 static TCGHelperInfo info_helper_st64_mmu = {
1176     .flags = TCG_CALL_NO_WG,
1177     .typemask = dh_typemask(void, 0)
1178               | dh_typemask(env, 1)
1179               | dh_typemask(i64, 2)  /* uint64_t addr */
1180               | dh_typemask(i64, 3)  /* uint64_t data */
1181               | dh_typemask(i32, 4)  /* unsigned oi */
1182               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1183 };
1184 
1185 static TCGHelperInfo info_helper_st128_mmu = {
1186     .flags = TCG_CALL_NO_WG,
1187     .typemask = dh_typemask(void, 0)
1188               | dh_typemask(env, 1)
1189               | dh_typemask(i64, 2)  /* uint64_t addr */
1190               | dh_typemask(i128, 3) /* Int128 data */
1191               | dh_typemask(i32, 4)  /* unsigned oi */
1192               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1193 };
1194 
1195 #ifdef CONFIG_TCG_INTERPRETER
1196 static ffi_type *typecode_to_ffi(int argmask)
1197 {
1198     /*
1199      * libffi does not support __int128_t, so we have forced Int128
1200      * to use the structure definition instead of the builtin type.
1201      */
1202     static ffi_type *ffi_type_i128_elements[3] = {
1203         &ffi_type_uint64,
1204         &ffi_type_uint64,
1205         NULL
1206     };
1207     static ffi_type ffi_type_i128 = {
1208         .size = 16,
1209         .alignment = __alignof__(Int128),
1210         .type = FFI_TYPE_STRUCT,
1211         .elements = ffi_type_i128_elements,
1212     };
1213 
1214     switch (argmask) {
1215     case dh_typecode_void:
1216         return &ffi_type_void;
1217     case dh_typecode_i32:
1218         return &ffi_type_uint32;
1219     case dh_typecode_s32:
1220         return &ffi_type_sint32;
1221     case dh_typecode_i64:
1222         return &ffi_type_uint64;
1223     case dh_typecode_s64:
1224         return &ffi_type_sint64;
1225     case dh_typecode_ptr:
1226         return &ffi_type_pointer;
1227     case dh_typecode_i128:
1228         return &ffi_type_i128;
1229     }
1230     g_assert_not_reached();
1231 }
1232 
1233 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1234 {
1235     unsigned typemask = info->typemask;
1236     struct {
1237         ffi_cif cif;
1238         ffi_type *args[];
1239     } *ca;
1240     ffi_status status;
1241     int nargs;
1242 
1243     /* Ignoring the return type, find the last non-zero field. */
1244     nargs = 32 - clz32(typemask >> 3);
1245     nargs = DIV_ROUND_UP(nargs, 3);
1246     assert(nargs <= MAX_CALL_IARGS);
1247 
1248     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1249     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1250     ca->cif.nargs = nargs;
1251 
1252     if (nargs != 0) {
1253         ca->cif.arg_types = ca->args;
1254         for (int j = 0; j < nargs; ++j) {
1255             int typecode = extract32(typemask, (j + 1) * 3, 3);
1256             ca->args[j] = typecode_to_ffi(typecode);
1257         }
1258     }
1259 
1260     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1261                           ca->cif.rtype, ca->cif.arg_types);
1262     assert(status == FFI_OK);
1263 
1264     return &ca->cif;
1265 }
1266 
1267 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1268 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1269 #else
1270 #define HELPER_INFO_INIT(I)      (&(I)->init)
1271 #define HELPER_INFO_INIT_VAL(I)  1
1272 #endif /* CONFIG_TCG_INTERPRETER */
1273 
1274 static inline bool arg_slot_reg_p(unsigned arg_slot)
1275 {
1276     /*
1277      * Split the sizeof away from the comparison to avoid Werror from
1278      * "unsigned < 0 is always false", when iarg_regs is empty.
1279      */
1280     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1281     return arg_slot < nreg;
1282 }
1283 
1284 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1285 {
1286     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1287     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1288 
1289     tcg_debug_assert(stk_slot < max);
1290     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1291 }
1292 
1293 typedef struct TCGCumulativeArgs {
1294     int arg_idx;                /* tcg_gen_callN args[] */
1295     int info_in_idx;            /* TCGHelperInfo in[] */
1296     int arg_slot;               /* regs+stack slot */
1297     int ref_slot;               /* stack slots for references */
1298 } TCGCumulativeArgs;
1299 
1300 static void layout_arg_even(TCGCumulativeArgs *cum)
1301 {
1302     cum->arg_slot += cum->arg_slot & 1;
1303 }
1304 
1305 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1306                          TCGCallArgumentKind kind)
1307 {
1308     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1309 
1310     *loc = (TCGCallArgumentLoc){
1311         .kind = kind,
1312         .arg_idx = cum->arg_idx,
1313         .arg_slot = cum->arg_slot,
1314     };
1315     cum->info_in_idx++;
1316     cum->arg_slot++;
1317 }
1318 
1319 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1320                                 TCGHelperInfo *info, int n)
1321 {
1322     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1323 
1324     for (int i = 0; i < n; ++i) {
1325         /* Layout all using the same arg_idx, adjusting the subindex. */
1326         loc[i] = (TCGCallArgumentLoc){
1327             .kind = TCG_CALL_ARG_NORMAL,
1328             .arg_idx = cum->arg_idx,
1329             .tmp_subindex = i,
1330             .arg_slot = cum->arg_slot + i,
1331         };
1332     }
1333     cum->info_in_idx += n;
1334     cum->arg_slot += n;
1335 }
1336 
1337 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1338 {
1339     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1340     int n = 128 / TCG_TARGET_REG_BITS;
1341 
1342     /* The first subindex carries the pointer. */
1343     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1344 
1345     /*
1346      * The callee is allowed to clobber memory associated with
1347      * structure pass by-reference.  Therefore we must make copies.
1348      * Allocate space from "ref_slot", which will be adjusted to
1349      * follow the parameters on the stack.
1350      */
1351     loc[0].ref_slot = cum->ref_slot;
1352 
1353     /*
1354      * Subsequent words also go into the reference slot, but
1355      * do not accumulate into the regular arguments.
1356      */
1357     for (int i = 1; i < n; ++i) {
1358         loc[i] = (TCGCallArgumentLoc){
1359             .kind = TCG_CALL_ARG_BY_REF_N,
1360             .arg_idx = cum->arg_idx,
1361             .tmp_subindex = i,
1362             .ref_slot = cum->ref_slot + i,
1363         };
1364     }
1365     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1366     cum->ref_slot += n;
1367 }
1368 
1369 static void init_call_layout(TCGHelperInfo *info)
1370 {
1371     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1372     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1373     unsigned typemask = info->typemask;
1374     unsigned typecode;
1375     TCGCumulativeArgs cum = { };
1376 
1377     /*
1378      * Parse and place any function return value.
1379      */
1380     typecode = typemask & 7;
1381     switch (typecode) {
1382     case dh_typecode_void:
1383         info->nr_out = 0;
1384         break;
1385     case dh_typecode_i32:
1386     case dh_typecode_s32:
1387     case dh_typecode_ptr:
1388         info->nr_out = 1;
1389         info->out_kind = TCG_CALL_RET_NORMAL;
1390         break;
1391     case dh_typecode_i64:
1392     case dh_typecode_s64:
1393         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1394         info->out_kind = TCG_CALL_RET_NORMAL;
1395         /* Query the last register now to trigger any assert early. */
1396         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1397         break;
1398     case dh_typecode_i128:
1399         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1400         info->out_kind = TCG_TARGET_CALL_RET_I128;
1401         switch (TCG_TARGET_CALL_RET_I128) {
1402         case TCG_CALL_RET_NORMAL:
1403             /* Query the last register now to trigger any assert early. */
1404             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1405             break;
1406         case TCG_CALL_RET_BY_VEC:
1407             /* Query the single register now to trigger any assert early. */
1408             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1409             break;
1410         case TCG_CALL_RET_BY_REF:
1411             /*
1412              * Allocate the first argument to the output.
1413              * We don't need to store this anywhere, just make it
1414              * unavailable for use in the input loop below.
1415              */
1416             cum.arg_slot = 1;
1417             break;
1418         default:
1419             qemu_build_not_reached();
1420         }
1421         break;
1422     default:
1423         g_assert_not_reached();
1424     }
1425 
1426     /*
1427      * Parse and place function arguments.
1428      */
1429     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1430         TCGCallArgumentKind kind;
1431         TCGType type;
1432 
1433         typecode = typemask & 7;
1434         switch (typecode) {
1435         case dh_typecode_i32:
1436         case dh_typecode_s32:
1437             type = TCG_TYPE_I32;
1438             break;
1439         case dh_typecode_i64:
1440         case dh_typecode_s64:
1441             type = TCG_TYPE_I64;
1442             break;
1443         case dh_typecode_ptr:
1444             type = TCG_TYPE_PTR;
1445             break;
1446         case dh_typecode_i128:
1447             type = TCG_TYPE_I128;
1448             break;
1449         default:
1450             g_assert_not_reached();
1451         }
1452 
1453         switch (type) {
1454         case TCG_TYPE_I32:
1455             switch (TCG_TARGET_CALL_ARG_I32) {
1456             case TCG_CALL_ARG_EVEN:
1457                 layout_arg_even(&cum);
1458                 /* fall through */
1459             case TCG_CALL_ARG_NORMAL:
1460                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1461                 break;
1462             case TCG_CALL_ARG_EXTEND:
1463                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1464                 layout_arg_1(&cum, info, kind);
1465                 break;
1466             default:
1467                 qemu_build_not_reached();
1468             }
1469             break;
1470 
1471         case TCG_TYPE_I64:
1472             switch (TCG_TARGET_CALL_ARG_I64) {
1473             case TCG_CALL_ARG_EVEN:
1474                 layout_arg_even(&cum);
1475                 /* fall through */
1476             case TCG_CALL_ARG_NORMAL:
1477                 if (TCG_TARGET_REG_BITS == 32) {
1478                     layout_arg_normal_n(&cum, info, 2);
1479                 } else {
1480                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1481                 }
1482                 break;
1483             default:
1484                 qemu_build_not_reached();
1485             }
1486             break;
1487 
1488         case TCG_TYPE_I128:
1489             switch (TCG_TARGET_CALL_ARG_I128) {
1490             case TCG_CALL_ARG_EVEN:
1491                 layout_arg_even(&cum);
1492                 /* fall through */
1493             case TCG_CALL_ARG_NORMAL:
1494                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1495                 break;
1496             case TCG_CALL_ARG_BY_REF:
1497                 layout_arg_by_ref(&cum, info);
1498                 break;
1499             default:
1500                 qemu_build_not_reached();
1501             }
1502             break;
1503 
1504         default:
1505             g_assert_not_reached();
1506         }
1507     }
1508     info->nr_in = cum.info_in_idx;
1509 
1510     /* Validate that we didn't overrun the input array. */
1511     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1512     /* Validate the backend has enough argument space. */
1513     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1514 
1515     /*
1516      * Relocate the "ref_slot" area to the end of the parameters.
1517      * Minimizing this stack offset helps code size for x86,
1518      * which has a signed 8-bit offset encoding.
1519      */
1520     if (cum.ref_slot != 0) {
1521         int ref_base = 0;
1522 
1523         if (cum.arg_slot > max_reg_slots) {
1524             int align = __alignof(Int128) / sizeof(tcg_target_long);
1525 
1526             ref_base = cum.arg_slot - max_reg_slots;
1527             if (align > 1) {
1528                 ref_base = ROUND_UP(ref_base, align);
1529             }
1530         }
1531         assert(ref_base + cum.ref_slot <= max_stk_slots);
1532         ref_base += max_reg_slots;
1533 
1534         if (ref_base != 0) {
1535             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1536                 TCGCallArgumentLoc *loc = &info->in[i];
1537                 switch (loc->kind) {
1538                 case TCG_CALL_ARG_BY_REF:
1539                 case TCG_CALL_ARG_BY_REF_N:
1540                     loc->ref_slot += ref_base;
1541                     break;
1542                 default:
1543                     break;
1544                 }
1545             }
1546         }
1547     }
1548 }
1549 
1550 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1551 static void process_constraint_sets(void);
1552 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1553                                             TCGReg reg, const char *name);
1554 
1555 static void tcg_context_init(unsigned max_threads)
1556 {
1557     TCGContext *s = &tcg_init_ctx;
1558     int n, i;
1559     TCGTemp *ts;
1560 
1561     memset(s, 0, sizeof(*s));
1562     s->nb_globals = 0;
1563 
1564     init_call_layout(&info_helper_ld32_mmu);
1565     init_call_layout(&info_helper_ld64_mmu);
1566     init_call_layout(&info_helper_ld128_mmu);
1567     init_call_layout(&info_helper_st32_mmu);
1568     init_call_layout(&info_helper_st64_mmu);
1569     init_call_layout(&info_helper_st128_mmu);
1570 
1571     tcg_target_init(s);
1572     process_constraint_sets();
1573 
1574     /* Reverse the order of the saved registers, assuming they're all at
1575        the start of tcg_target_reg_alloc_order.  */
1576     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1577         int r = tcg_target_reg_alloc_order[n];
1578         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1579             break;
1580         }
1581     }
1582     for (i = 0; i < n; ++i) {
1583         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1584     }
1585     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1586         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1587     }
1588 
1589     tcg_ctx = s;
1590     /*
1591      * In user-mode we simply share the init context among threads, since we
1592      * use a single region. See the documentation tcg_region_init() for the
1593      * reasoning behind this.
1594      * In system-mode we will have at most max_threads TCG threads.
1595      */
1596 #ifdef CONFIG_USER_ONLY
1597     tcg_ctxs = &tcg_ctx;
1598     tcg_cur_ctxs = 1;
1599     tcg_max_ctxs = 1;
1600 #else
1601     tcg_max_ctxs = max_threads;
1602     tcg_ctxs = g_new0(TCGContext *, max_threads);
1603 #endif
1604 
1605     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1606     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1607     tcg_env = temp_tcgv_ptr(ts);
1608 }
1609 
1610 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1611 {
1612     tcg_context_init(max_threads);
1613     tcg_region_init(tb_size, splitwx, max_threads);
1614 }
1615 
1616 /*
1617  * Allocate TBs right before their corresponding translated code, making
1618  * sure that TBs and code are on different cache lines.
1619  */
1620 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1621 {
1622     uintptr_t align = qemu_icache_linesize;
1623     TranslationBlock *tb;
1624     void *next;
1625 
1626  retry:
1627     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1628     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1629 
1630     if (unlikely(next > s->code_gen_highwater)) {
1631         if (tcg_region_alloc(s)) {
1632             return NULL;
1633         }
1634         goto retry;
1635     }
1636     qatomic_set(&s->code_gen_ptr, next);
1637     return tb;
1638 }
1639 
1640 void tcg_prologue_init(void)
1641 {
1642     TCGContext *s = tcg_ctx;
1643     size_t prologue_size;
1644 
1645     s->code_ptr = s->code_gen_ptr;
1646     s->code_buf = s->code_gen_ptr;
1647     s->data_gen_ptr = NULL;
1648 
1649 #ifndef CONFIG_TCG_INTERPRETER
1650     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1651 #endif
1652 
1653     s->pool_labels = NULL;
1654 
1655     qemu_thread_jit_write();
1656     /* Generate the prologue.  */
1657     tcg_target_qemu_prologue(s);
1658 
1659     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1660     {
1661         int result = tcg_out_pool_finalize(s);
1662         tcg_debug_assert(result == 0);
1663     }
1664 
1665     prologue_size = tcg_current_code_size(s);
1666     perf_report_prologue(s->code_gen_ptr, prologue_size);
1667 
1668 #ifndef CONFIG_TCG_INTERPRETER
1669     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1670                         (uintptr_t)s->code_buf, prologue_size);
1671 #endif
1672 
1673     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1674         FILE *logfile = qemu_log_trylock();
1675         if (logfile) {
1676             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1677             if (s->data_gen_ptr) {
1678                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1679                 size_t data_size = prologue_size - code_size;
1680                 size_t i;
1681 
1682                 disas(logfile, s->code_gen_ptr, code_size);
1683 
1684                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1685                     if (sizeof(tcg_target_ulong) == 8) {
1686                         fprintf(logfile,
1687                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1688                                 (uintptr_t)s->data_gen_ptr + i,
1689                                 *(uint64_t *)(s->data_gen_ptr + i));
1690                     } else {
1691                         fprintf(logfile,
1692                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1693                                 (uintptr_t)s->data_gen_ptr + i,
1694                                 *(uint32_t *)(s->data_gen_ptr + i));
1695                     }
1696                 }
1697             } else {
1698                 disas(logfile, s->code_gen_ptr, prologue_size);
1699             }
1700             fprintf(logfile, "\n");
1701             qemu_log_unlock(logfile);
1702         }
1703     }
1704 
1705 #ifndef CONFIG_TCG_INTERPRETER
1706     /*
1707      * Assert that goto_ptr is implemented completely, setting an epilogue.
1708      * For tci, we use NULL as the signal to return from the interpreter,
1709      * so skip this check.
1710      */
1711     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1712 #endif
1713 
1714     tcg_region_prologue_set(s);
1715 }
1716 
1717 void tcg_func_start(TCGContext *s)
1718 {
1719     tcg_pool_reset(s);
1720     s->nb_temps = s->nb_globals;
1721 
1722     /* No temps have been previously allocated for size or locality.  */
1723     tcg_temp_ebb_reset_freed(s);
1724 
1725     /* No constant temps have been previously allocated. */
1726     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1727         if (s->const_table[i]) {
1728             g_hash_table_remove_all(s->const_table[i]);
1729         }
1730     }
1731 
1732     s->nb_ops = 0;
1733     s->nb_labels = 0;
1734     s->current_frame_offset = s->frame_start;
1735 
1736 #ifdef CONFIG_DEBUG_TCG
1737     s->goto_tb_issue_mask = 0;
1738 #endif
1739 
1740     QTAILQ_INIT(&s->ops);
1741     QTAILQ_INIT(&s->free_ops);
1742     s->emit_before_op = NULL;
1743     QSIMPLEQ_INIT(&s->labels);
1744 
1745     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1746     tcg_debug_assert(s->insn_start_words > 0);
1747 }
1748 
1749 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1750 {
1751     int n = s->nb_temps++;
1752 
1753     if (n >= TCG_MAX_TEMPS) {
1754         tcg_raise_tb_overflow(s);
1755     }
1756     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1757 }
1758 
1759 static TCGTemp *tcg_global_alloc(TCGContext *s)
1760 {
1761     TCGTemp *ts;
1762 
1763     tcg_debug_assert(s->nb_globals == s->nb_temps);
1764     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1765     s->nb_globals++;
1766     ts = tcg_temp_alloc(s);
1767     ts->kind = TEMP_GLOBAL;
1768 
1769     return ts;
1770 }
1771 
1772 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1773                                             TCGReg reg, const char *name)
1774 {
1775     TCGTemp *ts;
1776 
1777     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1778 
1779     ts = tcg_global_alloc(s);
1780     ts->base_type = type;
1781     ts->type = type;
1782     ts->kind = TEMP_FIXED;
1783     ts->reg = reg;
1784     ts->name = name;
1785     tcg_regset_set_reg(s->reserved_regs, reg);
1786 
1787     return ts;
1788 }
1789 
1790 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1791 {
1792     s->frame_start = start;
1793     s->frame_end = start + size;
1794     s->frame_temp
1795         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1796 }
1797 
1798 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1799                                             const char *name, TCGType type)
1800 {
1801     TCGContext *s = tcg_ctx;
1802     TCGTemp *base_ts = tcgv_ptr_temp(base);
1803     TCGTemp *ts = tcg_global_alloc(s);
1804     int indirect_reg = 0;
1805 
1806     switch (base_ts->kind) {
1807     case TEMP_FIXED:
1808         break;
1809     case TEMP_GLOBAL:
1810         /* We do not support double-indirect registers.  */
1811         tcg_debug_assert(!base_ts->indirect_reg);
1812         base_ts->indirect_base = 1;
1813         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1814                             ? 2 : 1);
1815         indirect_reg = 1;
1816         break;
1817     default:
1818         g_assert_not_reached();
1819     }
1820 
1821     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1822         TCGTemp *ts2 = tcg_global_alloc(s);
1823         char buf[64];
1824 
1825         ts->base_type = TCG_TYPE_I64;
1826         ts->type = TCG_TYPE_I32;
1827         ts->indirect_reg = indirect_reg;
1828         ts->mem_allocated = 1;
1829         ts->mem_base = base_ts;
1830         ts->mem_offset = offset;
1831         pstrcpy(buf, sizeof(buf), name);
1832         pstrcat(buf, sizeof(buf), "_0");
1833         ts->name = strdup(buf);
1834 
1835         tcg_debug_assert(ts2 == ts + 1);
1836         ts2->base_type = TCG_TYPE_I64;
1837         ts2->type = TCG_TYPE_I32;
1838         ts2->indirect_reg = indirect_reg;
1839         ts2->mem_allocated = 1;
1840         ts2->mem_base = base_ts;
1841         ts2->mem_offset = offset + 4;
1842         ts2->temp_subindex = 1;
1843         pstrcpy(buf, sizeof(buf), name);
1844         pstrcat(buf, sizeof(buf), "_1");
1845         ts2->name = strdup(buf);
1846     } else {
1847         ts->base_type = type;
1848         ts->type = type;
1849         ts->indirect_reg = indirect_reg;
1850         ts->mem_allocated = 1;
1851         ts->mem_base = base_ts;
1852         ts->mem_offset = offset;
1853         ts->name = name;
1854     }
1855     return ts;
1856 }
1857 
1858 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1859 {
1860     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1861     return temp_tcgv_i32(ts);
1862 }
1863 
1864 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1865 {
1866     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1867     return temp_tcgv_i64(ts);
1868 }
1869 
1870 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1871 {
1872     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1873     return temp_tcgv_ptr(ts);
1874 }
1875 
1876 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1877 {
1878     TCGContext *s = tcg_ctx;
1879     TCGTemp *ts;
1880     int n;
1881 
1882     if (kind == TEMP_EBB) {
1883         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1884 
1885         if (idx < TCG_MAX_TEMPS) {
1886             /* There is already an available temp with the right type.  */
1887             clear_bit(idx, s->free_temps[type].l);
1888 
1889             ts = &s->temps[idx];
1890             ts->temp_allocated = 1;
1891             tcg_debug_assert(ts->base_type == type);
1892             tcg_debug_assert(ts->kind == kind);
1893             return ts;
1894         }
1895     } else {
1896         tcg_debug_assert(kind == TEMP_TB);
1897     }
1898 
1899     switch (type) {
1900     case TCG_TYPE_I32:
1901     case TCG_TYPE_V64:
1902     case TCG_TYPE_V128:
1903     case TCG_TYPE_V256:
1904         n = 1;
1905         break;
1906     case TCG_TYPE_I64:
1907         n = 64 / TCG_TARGET_REG_BITS;
1908         break;
1909     case TCG_TYPE_I128:
1910         n = 128 / TCG_TARGET_REG_BITS;
1911         break;
1912     default:
1913         g_assert_not_reached();
1914     }
1915 
1916     ts = tcg_temp_alloc(s);
1917     ts->base_type = type;
1918     ts->temp_allocated = 1;
1919     ts->kind = kind;
1920 
1921     if (n == 1) {
1922         ts->type = type;
1923     } else {
1924         ts->type = TCG_TYPE_REG;
1925 
1926         for (int i = 1; i < n; ++i) {
1927             TCGTemp *ts2 = tcg_temp_alloc(s);
1928 
1929             tcg_debug_assert(ts2 == ts + i);
1930             ts2->base_type = type;
1931             ts2->type = TCG_TYPE_REG;
1932             ts2->temp_allocated = 1;
1933             ts2->temp_subindex = i;
1934             ts2->kind = kind;
1935         }
1936     }
1937     return ts;
1938 }
1939 
1940 TCGv_i32 tcg_temp_new_i32(void)
1941 {
1942     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1943 }
1944 
1945 TCGv_i32 tcg_temp_ebb_new_i32(void)
1946 {
1947     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1948 }
1949 
1950 TCGv_i64 tcg_temp_new_i64(void)
1951 {
1952     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1953 }
1954 
1955 TCGv_i64 tcg_temp_ebb_new_i64(void)
1956 {
1957     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1958 }
1959 
1960 TCGv_ptr tcg_temp_new_ptr(void)
1961 {
1962     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1963 }
1964 
1965 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1966 {
1967     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1968 }
1969 
1970 TCGv_i128 tcg_temp_new_i128(void)
1971 {
1972     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1973 }
1974 
1975 TCGv_i128 tcg_temp_ebb_new_i128(void)
1976 {
1977     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1978 }
1979 
1980 TCGv_vec tcg_temp_new_vec(TCGType type)
1981 {
1982     TCGTemp *t;
1983 
1984 #ifdef CONFIG_DEBUG_TCG
1985     switch (type) {
1986     case TCG_TYPE_V64:
1987         assert(TCG_TARGET_HAS_v64);
1988         break;
1989     case TCG_TYPE_V128:
1990         assert(TCG_TARGET_HAS_v128);
1991         break;
1992     case TCG_TYPE_V256:
1993         assert(TCG_TARGET_HAS_v256);
1994         break;
1995     default:
1996         g_assert_not_reached();
1997     }
1998 #endif
1999 
2000     t = tcg_temp_new_internal(type, TEMP_EBB);
2001     return temp_tcgv_vec(t);
2002 }
2003 
2004 /* Create a new temp of the same type as an existing temp.  */
2005 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2006 {
2007     TCGTemp *t = tcgv_vec_temp(match);
2008 
2009     tcg_debug_assert(t->temp_allocated != 0);
2010 
2011     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2012     return temp_tcgv_vec(t);
2013 }
2014 
2015 void tcg_temp_free_internal(TCGTemp *ts)
2016 {
2017     TCGContext *s = tcg_ctx;
2018 
2019     switch (ts->kind) {
2020     case TEMP_CONST:
2021     case TEMP_TB:
2022         /* Silently ignore free. */
2023         break;
2024     case TEMP_EBB:
2025         tcg_debug_assert(ts->temp_allocated != 0);
2026         ts->temp_allocated = 0;
2027         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2028         break;
2029     default:
2030         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2031         g_assert_not_reached();
2032     }
2033 }
2034 
2035 void tcg_temp_free_i32(TCGv_i32 arg)
2036 {
2037     tcg_temp_free_internal(tcgv_i32_temp(arg));
2038 }
2039 
2040 void tcg_temp_free_i64(TCGv_i64 arg)
2041 {
2042     tcg_temp_free_internal(tcgv_i64_temp(arg));
2043 }
2044 
2045 void tcg_temp_free_i128(TCGv_i128 arg)
2046 {
2047     tcg_temp_free_internal(tcgv_i128_temp(arg));
2048 }
2049 
2050 void tcg_temp_free_ptr(TCGv_ptr arg)
2051 {
2052     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2053 }
2054 
2055 void tcg_temp_free_vec(TCGv_vec arg)
2056 {
2057     tcg_temp_free_internal(tcgv_vec_temp(arg));
2058 }
2059 
2060 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2061 {
2062     TCGContext *s = tcg_ctx;
2063     GHashTable *h = s->const_table[type];
2064     TCGTemp *ts;
2065 
2066     if (h == NULL) {
2067         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2068         s->const_table[type] = h;
2069     }
2070 
2071     ts = g_hash_table_lookup(h, &val);
2072     if (ts == NULL) {
2073         int64_t *val_ptr;
2074 
2075         ts = tcg_temp_alloc(s);
2076 
2077         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2078             TCGTemp *ts2 = tcg_temp_alloc(s);
2079 
2080             tcg_debug_assert(ts2 == ts + 1);
2081 
2082             ts->base_type = TCG_TYPE_I64;
2083             ts->type = TCG_TYPE_I32;
2084             ts->kind = TEMP_CONST;
2085             ts->temp_allocated = 1;
2086 
2087             ts2->base_type = TCG_TYPE_I64;
2088             ts2->type = TCG_TYPE_I32;
2089             ts2->kind = TEMP_CONST;
2090             ts2->temp_allocated = 1;
2091             ts2->temp_subindex = 1;
2092 
2093             /*
2094              * Retain the full value of the 64-bit constant in the low
2095              * part, so that the hash table works.  Actual uses will
2096              * truncate the value to the low part.
2097              */
2098             ts[HOST_BIG_ENDIAN].val = val;
2099             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2100             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2101         } else {
2102             ts->base_type = type;
2103             ts->type = type;
2104             ts->kind = TEMP_CONST;
2105             ts->temp_allocated = 1;
2106             ts->val = val;
2107             val_ptr = &ts->val;
2108         }
2109         g_hash_table_insert(h, val_ptr, ts);
2110     }
2111 
2112     return ts;
2113 }
2114 
2115 TCGv_i32 tcg_constant_i32(int32_t val)
2116 {
2117     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2118 }
2119 
2120 TCGv_i64 tcg_constant_i64(int64_t val)
2121 {
2122     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2123 }
2124 
2125 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2126 {
2127     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2128 }
2129 
2130 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2131 {
2132     val = dup_const(vece, val);
2133     return temp_tcgv_vec(tcg_constant_internal(type, val));
2134 }
2135 
2136 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2137 {
2138     TCGTemp *t = tcgv_vec_temp(match);
2139 
2140     tcg_debug_assert(t->temp_allocated != 0);
2141     return tcg_constant_vec(t->base_type, vece, val);
2142 }
2143 
2144 #ifdef CONFIG_DEBUG_TCG
2145 size_t temp_idx(TCGTemp *ts)
2146 {
2147     ptrdiff_t n = ts - tcg_ctx->temps;
2148     assert(n >= 0 && n < tcg_ctx->nb_temps);
2149     return n;
2150 }
2151 
2152 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2153 {
2154     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2155 
2156     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2157     assert(o % sizeof(TCGTemp) == 0);
2158 
2159     return (void *)tcg_ctx + (uintptr_t)v;
2160 }
2161 #endif /* CONFIG_DEBUG_TCG */
2162 
2163 /*
2164  * Return true if OP may appear in the opcode stream with TYPE.
2165  * Test the runtime variable that controls each opcode.
2166  */
2167 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2168 {
2169     bool has_type;
2170 
2171     switch (type) {
2172     case TCG_TYPE_I32:
2173         has_type = true;
2174         break;
2175     case TCG_TYPE_I64:
2176         has_type = TCG_TARGET_REG_BITS == 64;
2177         break;
2178     case TCG_TYPE_V64:
2179         has_type = TCG_TARGET_HAS_v64;
2180         break;
2181     case TCG_TYPE_V128:
2182         has_type = TCG_TARGET_HAS_v128;
2183         break;
2184     case TCG_TYPE_V256:
2185         has_type = TCG_TARGET_HAS_v256;
2186         break;
2187     default:
2188         has_type = false;
2189         break;
2190     }
2191 
2192     switch (op) {
2193     case INDEX_op_discard:
2194     case INDEX_op_set_label:
2195     case INDEX_op_call:
2196     case INDEX_op_br:
2197     case INDEX_op_mb:
2198     case INDEX_op_insn_start:
2199     case INDEX_op_exit_tb:
2200     case INDEX_op_goto_tb:
2201     case INDEX_op_goto_ptr:
2202     case INDEX_op_qemu_ld_i32:
2203     case INDEX_op_qemu_st_i32:
2204     case INDEX_op_qemu_ld_i64:
2205     case INDEX_op_qemu_st_i64:
2206         return true;
2207 
2208     case INDEX_op_qemu_st8_i32:
2209         return TCG_TARGET_HAS_qemu_st8_i32;
2210 
2211     case INDEX_op_qemu_ld_i128:
2212     case INDEX_op_qemu_st_i128:
2213         return TCG_TARGET_HAS_qemu_ldst_i128;
2214 
2215     case INDEX_op_add:
2216     case INDEX_op_and:
2217     case INDEX_op_mov:
2218     case INDEX_op_or:
2219     case INDEX_op_xor:
2220         return has_type;
2221 
2222     case INDEX_op_setcond_i32:
2223     case INDEX_op_brcond_i32:
2224     case INDEX_op_movcond_i32:
2225     case INDEX_op_ld8u_i32:
2226     case INDEX_op_ld8s_i32:
2227     case INDEX_op_ld16u_i32:
2228     case INDEX_op_ld16s_i32:
2229     case INDEX_op_ld_i32:
2230     case INDEX_op_st8_i32:
2231     case INDEX_op_st16_i32:
2232     case INDEX_op_st_i32:
2233     case INDEX_op_sub_i32:
2234     case INDEX_op_neg_i32:
2235     case INDEX_op_mul_i32:
2236     case INDEX_op_shl_i32:
2237     case INDEX_op_shr_i32:
2238     case INDEX_op_sar_i32:
2239     case INDEX_op_extract_i32:
2240     case INDEX_op_sextract_i32:
2241     case INDEX_op_deposit_i32:
2242         return true;
2243 
2244     case INDEX_op_negsetcond_i32:
2245         return TCG_TARGET_HAS_negsetcond_i32;
2246     case INDEX_op_div_i32:
2247     case INDEX_op_divu_i32:
2248         return TCG_TARGET_HAS_div_i32;
2249     case INDEX_op_rem_i32:
2250     case INDEX_op_remu_i32:
2251         return TCG_TARGET_HAS_rem_i32;
2252     case INDEX_op_div2_i32:
2253     case INDEX_op_divu2_i32:
2254         return TCG_TARGET_HAS_div2_i32;
2255     case INDEX_op_rotl_i32:
2256     case INDEX_op_rotr_i32:
2257         return TCG_TARGET_HAS_rot_i32;
2258     case INDEX_op_extract2_i32:
2259         return TCG_TARGET_HAS_extract2_i32;
2260     case INDEX_op_add2_i32:
2261         return TCG_TARGET_HAS_add2_i32;
2262     case INDEX_op_sub2_i32:
2263         return TCG_TARGET_HAS_sub2_i32;
2264     case INDEX_op_mulu2_i32:
2265         return TCG_TARGET_HAS_mulu2_i32;
2266     case INDEX_op_muls2_i32:
2267         return TCG_TARGET_HAS_muls2_i32;
2268     case INDEX_op_muluh_i32:
2269         return TCG_TARGET_HAS_muluh_i32;
2270     case INDEX_op_mulsh_i32:
2271         return TCG_TARGET_HAS_mulsh_i32;
2272     case INDEX_op_bswap16_i32:
2273         return TCG_TARGET_HAS_bswap16_i32;
2274     case INDEX_op_bswap32_i32:
2275         return TCG_TARGET_HAS_bswap32_i32;
2276     case INDEX_op_not_i32:
2277         return TCG_TARGET_HAS_not_i32;
2278     case INDEX_op_nor_i32:
2279         return TCG_TARGET_HAS_nor_i32;
2280     case INDEX_op_clz_i32:
2281         return TCG_TARGET_HAS_clz_i32;
2282     case INDEX_op_ctz_i32:
2283         return TCG_TARGET_HAS_ctz_i32;
2284     case INDEX_op_ctpop_i32:
2285         return TCG_TARGET_HAS_ctpop_i32;
2286 
2287     case INDEX_op_brcond2_i32:
2288     case INDEX_op_setcond2_i32:
2289         return TCG_TARGET_REG_BITS == 32;
2290 
2291     case INDEX_op_setcond_i64:
2292     case INDEX_op_brcond_i64:
2293     case INDEX_op_movcond_i64:
2294     case INDEX_op_ld8u_i64:
2295     case INDEX_op_ld8s_i64:
2296     case INDEX_op_ld16u_i64:
2297     case INDEX_op_ld16s_i64:
2298     case INDEX_op_ld32u_i64:
2299     case INDEX_op_ld32s_i64:
2300     case INDEX_op_ld_i64:
2301     case INDEX_op_st8_i64:
2302     case INDEX_op_st16_i64:
2303     case INDEX_op_st32_i64:
2304     case INDEX_op_st_i64:
2305     case INDEX_op_sub_i64:
2306     case INDEX_op_neg_i64:
2307     case INDEX_op_mul_i64:
2308     case INDEX_op_shl_i64:
2309     case INDEX_op_shr_i64:
2310     case INDEX_op_sar_i64:
2311     case INDEX_op_ext_i32_i64:
2312     case INDEX_op_extu_i32_i64:
2313     case INDEX_op_extract_i64:
2314     case INDEX_op_sextract_i64:
2315     case INDEX_op_deposit_i64:
2316         return TCG_TARGET_REG_BITS == 64;
2317 
2318     case INDEX_op_negsetcond_i64:
2319         return TCG_TARGET_HAS_negsetcond_i64;
2320     case INDEX_op_div_i64:
2321     case INDEX_op_divu_i64:
2322         return TCG_TARGET_HAS_div_i64;
2323     case INDEX_op_rem_i64:
2324     case INDEX_op_remu_i64:
2325         return TCG_TARGET_HAS_rem_i64;
2326     case INDEX_op_div2_i64:
2327     case INDEX_op_divu2_i64:
2328         return TCG_TARGET_HAS_div2_i64;
2329     case INDEX_op_rotl_i64:
2330     case INDEX_op_rotr_i64:
2331         return TCG_TARGET_HAS_rot_i64;
2332     case INDEX_op_extract2_i64:
2333         return TCG_TARGET_HAS_extract2_i64;
2334     case INDEX_op_extrl_i64_i32:
2335     case INDEX_op_extrh_i64_i32:
2336         return TCG_TARGET_HAS_extr_i64_i32;
2337     case INDEX_op_bswap16_i64:
2338         return TCG_TARGET_HAS_bswap16_i64;
2339     case INDEX_op_bswap32_i64:
2340         return TCG_TARGET_HAS_bswap32_i64;
2341     case INDEX_op_bswap64_i64:
2342         return TCG_TARGET_HAS_bswap64_i64;
2343     case INDEX_op_not_i64:
2344         return TCG_TARGET_HAS_not_i64;
2345     case INDEX_op_nor_i64:
2346         return TCG_TARGET_HAS_nor_i64;
2347     case INDEX_op_clz_i64:
2348         return TCG_TARGET_HAS_clz_i64;
2349     case INDEX_op_ctz_i64:
2350         return TCG_TARGET_HAS_ctz_i64;
2351     case INDEX_op_ctpop_i64:
2352         return TCG_TARGET_HAS_ctpop_i64;
2353     case INDEX_op_add2_i64:
2354         return TCG_TARGET_HAS_add2_i64;
2355     case INDEX_op_sub2_i64:
2356         return TCG_TARGET_HAS_sub2_i64;
2357     case INDEX_op_mulu2_i64:
2358         return TCG_TARGET_HAS_mulu2_i64;
2359     case INDEX_op_muls2_i64:
2360         return TCG_TARGET_HAS_muls2_i64;
2361     case INDEX_op_muluh_i64:
2362         return TCG_TARGET_HAS_muluh_i64;
2363     case INDEX_op_mulsh_i64:
2364         return TCG_TARGET_HAS_mulsh_i64;
2365 
2366     case INDEX_op_mov_vec:
2367     case INDEX_op_dup_vec:
2368     case INDEX_op_dupm_vec:
2369     case INDEX_op_ld_vec:
2370     case INDEX_op_st_vec:
2371     case INDEX_op_add_vec:
2372     case INDEX_op_sub_vec:
2373     case INDEX_op_and_vec:
2374     case INDEX_op_or_vec:
2375     case INDEX_op_xor_vec:
2376     case INDEX_op_cmp_vec:
2377         return has_type;
2378     case INDEX_op_dup2_vec:
2379         return has_type && TCG_TARGET_REG_BITS == 32;
2380     case INDEX_op_not_vec:
2381         return has_type && TCG_TARGET_HAS_not_vec;
2382     case INDEX_op_neg_vec:
2383         return has_type && TCG_TARGET_HAS_neg_vec;
2384     case INDEX_op_abs_vec:
2385         return has_type && TCG_TARGET_HAS_abs_vec;
2386     case INDEX_op_andc_vec:
2387         return has_type && TCG_TARGET_HAS_andc_vec;
2388     case INDEX_op_orc_vec:
2389         return has_type && TCG_TARGET_HAS_orc_vec;
2390     case INDEX_op_nand_vec:
2391         return has_type && TCG_TARGET_HAS_nand_vec;
2392     case INDEX_op_nor_vec:
2393         return has_type && TCG_TARGET_HAS_nor_vec;
2394     case INDEX_op_eqv_vec:
2395         return has_type && TCG_TARGET_HAS_eqv_vec;
2396     case INDEX_op_mul_vec:
2397         return has_type && TCG_TARGET_HAS_mul_vec;
2398     case INDEX_op_shli_vec:
2399     case INDEX_op_shri_vec:
2400     case INDEX_op_sari_vec:
2401         return has_type && TCG_TARGET_HAS_shi_vec;
2402     case INDEX_op_shls_vec:
2403     case INDEX_op_shrs_vec:
2404     case INDEX_op_sars_vec:
2405         return has_type && TCG_TARGET_HAS_shs_vec;
2406     case INDEX_op_shlv_vec:
2407     case INDEX_op_shrv_vec:
2408     case INDEX_op_sarv_vec:
2409         return has_type && TCG_TARGET_HAS_shv_vec;
2410     case INDEX_op_rotli_vec:
2411         return has_type && TCG_TARGET_HAS_roti_vec;
2412     case INDEX_op_rotls_vec:
2413         return has_type && TCG_TARGET_HAS_rots_vec;
2414     case INDEX_op_rotlv_vec:
2415     case INDEX_op_rotrv_vec:
2416         return has_type && TCG_TARGET_HAS_rotv_vec;
2417     case INDEX_op_ssadd_vec:
2418     case INDEX_op_usadd_vec:
2419     case INDEX_op_sssub_vec:
2420     case INDEX_op_ussub_vec:
2421         return has_type && TCG_TARGET_HAS_sat_vec;
2422     case INDEX_op_smin_vec:
2423     case INDEX_op_umin_vec:
2424     case INDEX_op_smax_vec:
2425     case INDEX_op_umax_vec:
2426         return has_type && TCG_TARGET_HAS_minmax_vec;
2427     case INDEX_op_bitsel_vec:
2428         return has_type && TCG_TARGET_HAS_bitsel_vec;
2429     case INDEX_op_cmpsel_vec:
2430         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2431 
2432     default:
2433         if (op < INDEX_op_last_generic) {
2434             const TCGOutOp *outop;
2435             TCGConstraintSetIndex con_set;
2436 
2437             if (!has_type) {
2438                 return false;
2439             }
2440 
2441             outop = all_outop[op];
2442             tcg_debug_assert(outop != NULL);
2443 
2444             con_set = outop->static_constraint;
2445             if (con_set == C_Dynamic) {
2446                 con_set = outop->dynamic_constraint(type, flags);
2447             }
2448             if (con_set >= 0) {
2449                 return true;
2450             }
2451             tcg_debug_assert(con_set == C_NotImplemented);
2452             return false;
2453         }
2454         tcg_debug_assert(op < NB_OPS);
2455         return true;
2456 
2457     case INDEX_op_last_generic:
2458         g_assert_not_reached();
2459     }
2460 }
2461 
2462 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2463 {
2464     unsigned width;
2465 
2466     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2467     width = (type == TCG_TYPE_I32 ? 32 : 64);
2468 
2469     tcg_debug_assert(ofs < width);
2470     tcg_debug_assert(len > 0);
2471     tcg_debug_assert(len <= width - ofs);
2472 
2473     return TCG_TARGET_deposit_valid(type, ofs, len);
2474 }
2475 
2476 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2477 
2478 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2479                           TCGTemp *ret, TCGTemp **args)
2480 {
2481     TCGv_i64 extend_free[MAX_CALL_IARGS];
2482     int n_extend = 0;
2483     TCGOp *op;
2484     int i, n, pi = 0, total_args;
2485 
2486     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2487         init_call_layout(info);
2488         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2489     }
2490 
2491     total_args = info->nr_out + info->nr_in + 2;
2492     op = tcg_op_alloc(INDEX_op_call, total_args);
2493 
2494 #ifdef CONFIG_PLUGIN
2495     /* Flag helpers that may affect guest state */
2496     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2497         tcg_ctx->plugin_insn->calls_helpers = true;
2498     }
2499 #endif
2500 
2501     TCGOP_CALLO(op) = n = info->nr_out;
2502     switch (n) {
2503     case 0:
2504         tcg_debug_assert(ret == NULL);
2505         break;
2506     case 1:
2507         tcg_debug_assert(ret != NULL);
2508         op->args[pi++] = temp_arg(ret);
2509         break;
2510     case 2:
2511     case 4:
2512         tcg_debug_assert(ret != NULL);
2513         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2514         tcg_debug_assert(ret->temp_subindex == 0);
2515         for (i = 0; i < n; ++i) {
2516             op->args[pi++] = temp_arg(ret + i);
2517         }
2518         break;
2519     default:
2520         g_assert_not_reached();
2521     }
2522 
2523     TCGOP_CALLI(op) = n = info->nr_in;
2524     for (i = 0; i < n; i++) {
2525         const TCGCallArgumentLoc *loc = &info->in[i];
2526         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2527 
2528         switch (loc->kind) {
2529         case TCG_CALL_ARG_NORMAL:
2530         case TCG_CALL_ARG_BY_REF:
2531         case TCG_CALL_ARG_BY_REF_N:
2532             op->args[pi++] = temp_arg(ts);
2533             break;
2534 
2535         case TCG_CALL_ARG_EXTEND_U:
2536         case TCG_CALL_ARG_EXTEND_S:
2537             {
2538                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2539                 TCGv_i32 orig = temp_tcgv_i32(ts);
2540 
2541                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2542                     tcg_gen_ext_i32_i64(temp, orig);
2543                 } else {
2544                     tcg_gen_extu_i32_i64(temp, orig);
2545                 }
2546                 op->args[pi++] = tcgv_i64_arg(temp);
2547                 extend_free[n_extend++] = temp;
2548             }
2549             break;
2550 
2551         default:
2552             g_assert_not_reached();
2553         }
2554     }
2555     op->args[pi++] = (uintptr_t)func;
2556     op->args[pi++] = (uintptr_t)info;
2557     tcg_debug_assert(pi == total_args);
2558 
2559     if (tcg_ctx->emit_before_op) {
2560         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2561     } else {
2562         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2563     }
2564 
2565     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2566     for (i = 0; i < n_extend; ++i) {
2567         tcg_temp_free_i64(extend_free[i]);
2568     }
2569 }
2570 
2571 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2572 {
2573     tcg_gen_callN(func, info, ret, NULL);
2574 }
2575 
2576 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2577 {
2578     tcg_gen_callN(func, info, ret, &t1);
2579 }
2580 
2581 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2582                    TCGTemp *t1, TCGTemp *t2)
2583 {
2584     TCGTemp *args[2] = { t1, t2 };
2585     tcg_gen_callN(func, info, ret, args);
2586 }
2587 
2588 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2589                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2590 {
2591     TCGTemp *args[3] = { t1, t2, t3 };
2592     tcg_gen_callN(func, info, ret, args);
2593 }
2594 
2595 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2596                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2597 {
2598     TCGTemp *args[4] = { t1, t2, t3, t4 };
2599     tcg_gen_callN(func, info, ret, args);
2600 }
2601 
2602 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2603                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2604 {
2605     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2606     tcg_gen_callN(func, info, ret, args);
2607 }
2608 
2609 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2610                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2611                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2612 {
2613     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2614     tcg_gen_callN(func, info, ret, args);
2615 }
2616 
2617 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2618                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2619                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2620 {
2621     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2622     tcg_gen_callN(func, info, ret, args);
2623 }
2624 
2625 static void tcg_reg_alloc_start(TCGContext *s)
2626 {
2627     int i, n;
2628 
2629     for (i = 0, n = s->nb_temps; i < n; i++) {
2630         TCGTemp *ts = &s->temps[i];
2631         TCGTempVal val = TEMP_VAL_MEM;
2632 
2633         switch (ts->kind) {
2634         case TEMP_CONST:
2635             val = TEMP_VAL_CONST;
2636             break;
2637         case TEMP_FIXED:
2638             val = TEMP_VAL_REG;
2639             break;
2640         case TEMP_GLOBAL:
2641             break;
2642         case TEMP_EBB:
2643             val = TEMP_VAL_DEAD;
2644             /* fall through */
2645         case TEMP_TB:
2646             ts->mem_allocated = 0;
2647             break;
2648         default:
2649             g_assert_not_reached();
2650         }
2651         ts->val_type = val;
2652     }
2653 
2654     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2655 }
2656 
2657 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2658                                  TCGTemp *ts)
2659 {
2660     int idx = temp_idx(ts);
2661 
2662     switch (ts->kind) {
2663     case TEMP_FIXED:
2664     case TEMP_GLOBAL:
2665         pstrcpy(buf, buf_size, ts->name);
2666         break;
2667     case TEMP_TB:
2668         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2669         break;
2670     case TEMP_EBB:
2671         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2672         break;
2673     case TEMP_CONST:
2674         switch (ts->type) {
2675         case TCG_TYPE_I32:
2676             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2677             break;
2678 #if TCG_TARGET_REG_BITS > 32
2679         case TCG_TYPE_I64:
2680             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2681             break;
2682 #endif
2683         case TCG_TYPE_V64:
2684         case TCG_TYPE_V128:
2685         case TCG_TYPE_V256:
2686             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2687                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2688             break;
2689         default:
2690             g_assert_not_reached();
2691         }
2692         break;
2693     }
2694     return buf;
2695 }
2696 
2697 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2698                              int buf_size, TCGArg arg)
2699 {
2700     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2701 }
2702 
2703 static const char * const cond_name[] =
2704 {
2705     [TCG_COND_NEVER] = "never",
2706     [TCG_COND_ALWAYS] = "always",
2707     [TCG_COND_EQ] = "eq",
2708     [TCG_COND_NE] = "ne",
2709     [TCG_COND_LT] = "lt",
2710     [TCG_COND_GE] = "ge",
2711     [TCG_COND_LE] = "le",
2712     [TCG_COND_GT] = "gt",
2713     [TCG_COND_LTU] = "ltu",
2714     [TCG_COND_GEU] = "geu",
2715     [TCG_COND_LEU] = "leu",
2716     [TCG_COND_GTU] = "gtu",
2717     [TCG_COND_TSTEQ] = "tsteq",
2718     [TCG_COND_TSTNE] = "tstne",
2719 };
2720 
2721 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2722 {
2723     [MO_UB]   = "ub",
2724     [MO_SB]   = "sb",
2725     [MO_LEUW] = "leuw",
2726     [MO_LESW] = "lesw",
2727     [MO_LEUL] = "leul",
2728     [MO_LESL] = "lesl",
2729     [MO_LEUQ] = "leq",
2730     [MO_BEUW] = "beuw",
2731     [MO_BESW] = "besw",
2732     [MO_BEUL] = "beul",
2733     [MO_BESL] = "besl",
2734     [MO_BEUQ] = "beq",
2735     [MO_128 + MO_BE] = "beo",
2736     [MO_128 + MO_LE] = "leo",
2737 };
2738 
2739 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2740     [MO_UNALN >> MO_ASHIFT]    = "un+",
2741     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2742     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2743     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2744     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2745     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2746     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2747     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2748 };
2749 
2750 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2751     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2752     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2753     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2754     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2755     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2756     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2757 };
2758 
2759 static const char bswap_flag_name[][6] = {
2760     [TCG_BSWAP_IZ] = "iz",
2761     [TCG_BSWAP_OZ] = "oz",
2762     [TCG_BSWAP_OS] = "os",
2763     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2764     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2765 };
2766 
2767 #ifdef CONFIG_PLUGIN
2768 static const char * const plugin_from_name[] = {
2769     "from-tb",
2770     "from-insn",
2771     "after-insn",
2772     "after-tb",
2773 };
2774 #endif
2775 
2776 static inline bool tcg_regset_single(TCGRegSet d)
2777 {
2778     return (d & (d - 1)) == 0;
2779 }
2780 
2781 static inline TCGReg tcg_regset_first(TCGRegSet d)
2782 {
2783     if (TCG_TARGET_NB_REGS <= 32) {
2784         return ctz32(d);
2785     } else {
2786         return ctz64(d);
2787     }
2788 }
2789 
2790 /* Return only the number of characters output -- no error return. */
2791 #define ne_fprintf(...) \
2792     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2793 
2794 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2795 {
2796     char buf[128];
2797     TCGOp *op;
2798 
2799     QTAILQ_FOREACH(op, &s->ops, link) {
2800         int i, k, nb_oargs, nb_iargs, nb_cargs;
2801         const TCGOpDef *def;
2802         TCGOpcode c;
2803         int col = 0;
2804 
2805         c = op->opc;
2806         def = &tcg_op_defs[c];
2807 
2808         if (c == INDEX_op_insn_start) {
2809             nb_oargs = 0;
2810             col += ne_fprintf(f, "\n ----");
2811 
2812             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2813                 col += ne_fprintf(f, " %016" PRIx64,
2814                                   tcg_get_insn_start_param(op, i));
2815             }
2816         } else if (c == INDEX_op_call) {
2817             const TCGHelperInfo *info = tcg_call_info(op);
2818             void *func = tcg_call_func(op);
2819 
2820             /* variable number of arguments */
2821             nb_oargs = TCGOP_CALLO(op);
2822             nb_iargs = TCGOP_CALLI(op);
2823             nb_cargs = def->nb_cargs;
2824 
2825             col += ne_fprintf(f, " %s ", def->name);
2826 
2827             /*
2828              * Print the function name from TCGHelperInfo, if available.
2829              * Note that plugins have a template function for the info,
2830              * but the actual function pointer comes from the plugin.
2831              */
2832             if (func == info->func) {
2833                 col += ne_fprintf(f, "%s", info->name);
2834             } else {
2835                 col += ne_fprintf(f, "plugin(%p)", func);
2836             }
2837 
2838             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2839             for (i = 0; i < nb_oargs; i++) {
2840                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2841                                                             op->args[i]));
2842             }
2843             for (i = 0; i < nb_iargs; i++) {
2844                 TCGArg arg = op->args[nb_oargs + i];
2845                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2846                 col += ne_fprintf(f, ",%s", t);
2847             }
2848         } else {
2849             if (def->flags & TCG_OPF_INT) {
2850                 col += ne_fprintf(f, " %s_i%d ",
2851                                   def->name,
2852                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2853             } else if (def->flags & TCG_OPF_VECTOR) {
2854                 col += ne_fprintf(f, "%s v%d,e%d,",
2855                                   def->name,
2856                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2857                                   8 << TCGOP_VECE(op));
2858             } else {
2859                 col += ne_fprintf(f, " %s ", def->name);
2860             }
2861 
2862             nb_oargs = def->nb_oargs;
2863             nb_iargs = def->nb_iargs;
2864             nb_cargs = def->nb_cargs;
2865 
2866             k = 0;
2867             for (i = 0; i < nb_oargs; i++) {
2868                 const char *sep =  k ? "," : "";
2869                 col += ne_fprintf(f, "%s%s", sep,
2870                                   tcg_get_arg_str(s, buf, sizeof(buf),
2871                                                   op->args[k++]));
2872             }
2873             for (i = 0; i < nb_iargs; i++) {
2874                 const char *sep =  k ? "," : "";
2875                 col += ne_fprintf(f, "%s%s", sep,
2876                                   tcg_get_arg_str(s, buf, sizeof(buf),
2877                                                   op->args[k++]));
2878             }
2879             switch (c) {
2880             case INDEX_op_brcond_i32:
2881             case INDEX_op_setcond_i32:
2882             case INDEX_op_negsetcond_i32:
2883             case INDEX_op_movcond_i32:
2884             case INDEX_op_brcond2_i32:
2885             case INDEX_op_setcond2_i32:
2886             case INDEX_op_brcond_i64:
2887             case INDEX_op_setcond_i64:
2888             case INDEX_op_negsetcond_i64:
2889             case INDEX_op_movcond_i64:
2890             case INDEX_op_cmp_vec:
2891             case INDEX_op_cmpsel_vec:
2892                 if (op->args[k] < ARRAY_SIZE(cond_name)
2893                     && cond_name[op->args[k]]) {
2894                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2895                 } else {
2896                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2897                 }
2898                 i = 1;
2899                 break;
2900             case INDEX_op_qemu_ld_i32:
2901             case INDEX_op_qemu_st_i32:
2902             case INDEX_op_qemu_st8_i32:
2903             case INDEX_op_qemu_ld_i64:
2904             case INDEX_op_qemu_st_i64:
2905             case INDEX_op_qemu_ld_i128:
2906             case INDEX_op_qemu_st_i128:
2907                 {
2908                     const char *s_al, *s_op, *s_at;
2909                     MemOpIdx oi = op->args[k++];
2910                     MemOp mop = get_memop(oi);
2911                     unsigned ix = get_mmuidx(oi);
2912 
2913                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2914                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2915                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2916                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2917 
2918                     /* If all fields are accounted for, print symbolically. */
2919                     if (!mop && s_al && s_op && s_at) {
2920                         col += ne_fprintf(f, ",%s%s%s,%u",
2921                                           s_at, s_al, s_op, ix);
2922                     } else {
2923                         mop = get_memop(oi);
2924                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2925                     }
2926                     i = 1;
2927                 }
2928                 break;
2929             case INDEX_op_bswap16_i32:
2930             case INDEX_op_bswap16_i64:
2931             case INDEX_op_bswap32_i32:
2932             case INDEX_op_bswap32_i64:
2933             case INDEX_op_bswap64_i64:
2934                 {
2935                     TCGArg flags = op->args[k];
2936                     const char *name = NULL;
2937 
2938                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2939                         name = bswap_flag_name[flags];
2940                     }
2941                     if (name) {
2942                         col += ne_fprintf(f, ",%s", name);
2943                     } else {
2944                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2945                     }
2946                     i = k = 1;
2947                 }
2948                 break;
2949 #ifdef CONFIG_PLUGIN
2950             case INDEX_op_plugin_cb:
2951                 {
2952                     TCGArg from = op->args[k++];
2953                     const char *name = NULL;
2954 
2955                     if (from < ARRAY_SIZE(plugin_from_name)) {
2956                         name = plugin_from_name[from];
2957                     }
2958                     if (name) {
2959                         col += ne_fprintf(f, "%s", name);
2960                     } else {
2961                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2962                     }
2963                     i = 1;
2964                 }
2965                 break;
2966 #endif
2967             default:
2968                 i = 0;
2969                 break;
2970             }
2971             switch (c) {
2972             case INDEX_op_set_label:
2973             case INDEX_op_br:
2974             case INDEX_op_brcond_i32:
2975             case INDEX_op_brcond_i64:
2976             case INDEX_op_brcond2_i32:
2977                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2978                                   arg_label(op->args[k])->id);
2979                 i++, k++;
2980                 break;
2981             case INDEX_op_mb:
2982                 {
2983                     TCGBar membar = op->args[k];
2984                     const char *b_op, *m_op;
2985 
2986                     switch (membar & TCG_BAR_SC) {
2987                     case 0:
2988                         b_op = "none";
2989                         break;
2990                     case TCG_BAR_LDAQ:
2991                         b_op = "acq";
2992                         break;
2993                     case TCG_BAR_STRL:
2994                         b_op = "rel";
2995                         break;
2996                     case TCG_BAR_SC:
2997                         b_op = "seq";
2998                         break;
2999                     default:
3000                         g_assert_not_reached();
3001                     }
3002 
3003                     switch (membar & TCG_MO_ALL) {
3004                     case 0:
3005                         m_op = "none";
3006                         break;
3007                     case TCG_MO_LD_LD:
3008                         m_op = "rr";
3009                         break;
3010                     case TCG_MO_LD_ST:
3011                         m_op = "rw";
3012                         break;
3013                     case TCG_MO_ST_LD:
3014                         m_op = "wr";
3015                         break;
3016                     case TCG_MO_ST_ST:
3017                         m_op = "ww";
3018                         break;
3019                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3020                         m_op = "rr+rw";
3021                         break;
3022                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3023                         m_op = "rr+wr";
3024                         break;
3025                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3026                         m_op = "rr+ww";
3027                         break;
3028                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3029                         m_op = "rw+wr";
3030                         break;
3031                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3032                         m_op = "rw+ww";
3033                         break;
3034                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3035                         m_op = "wr+ww";
3036                         break;
3037                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3038                         m_op = "rr+rw+wr";
3039                         break;
3040                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3041                         m_op = "rr+rw+ww";
3042                         break;
3043                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3044                         m_op = "rr+wr+ww";
3045                         break;
3046                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3047                         m_op = "rw+wr+ww";
3048                         break;
3049                     case TCG_MO_ALL:
3050                         m_op = "all";
3051                         break;
3052                     default:
3053                         g_assert_not_reached();
3054                     }
3055 
3056                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3057                     i++, k++;
3058                 }
3059                 break;
3060             default:
3061                 break;
3062             }
3063             for (; i < nb_cargs; i++, k++) {
3064                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3065                                   op->args[k]);
3066             }
3067         }
3068 
3069         if (have_prefs || op->life) {
3070             for (; col < 40; ++col) {
3071                 putc(' ', f);
3072             }
3073         }
3074 
3075         if (op->life) {
3076             unsigned life = op->life;
3077 
3078             if (life & (SYNC_ARG * 3)) {
3079                 ne_fprintf(f, "  sync:");
3080                 for (i = 0; i < 2; ++i) {
3081                     if (life & (SYNC_ARG << i)) {
3082                         ne_fprintf(f, " %d", i);
3083                     }
3084                 }
3085             }
3086             life /= DEAD_ARG;
3087             if (life) {
3088                 ne_fprintf(f, "  dead:");
3089                 for (i = 0; life; ++i, life >>= 1) {
3090                     if (life & 1) {
3091                         ne_fprintf(f, " %d", i);
3092                     }
3093                 }
3094             }
3095         }
3096 
3097         if (have_prefs) {
3098             for (i = 0; i < nb_oargs; ++i) {
3099                 TCGRegSet set = output_pref(op, i);
3100 
3101                 if (i == 0) {
3102                     ne_fprintf(f, "  pref=");
3103                 } else {
3104                     ne_fprintf(f, ",");
3105                 }
3106                 if (set == 0) {
3107                     ne_fprintf(f, "none");
3108                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3109                     ne_fprintf(f, "all");
3110 #ifdef CONFIG_DEBUG_TCG
3111                 } else if (tcg_regset_single(set)) {
3112                     TCGReg reg = tcg_regset_first(set);
3113                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3114 #endif
3115                 } else if (TCG_TARGET_NB_REGS <= 32) {
3116                     ne_fprintf(f, "0x%x", (uint32_t)set);
3117                 } else {
3118                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3119                 }
3120             }
3121         }
3122 
3123         putc('\n', f);
3124     }
3125 }
3126 
3127 /* we give more priority to constraints with less registers */
3128 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3129 {
3130     int n;
3131 
3132     arg_ct += k;
3133     n = ctpop64(arg_ct->regs);
3134 
3135     /*
3136      * Sort constraints of a single register first, which includes output
3137      * aliases (which must exactly match the input already allocated).
3138      */
3139     if (n == 1 || arg_ct->oalias) {
3140         return INT_MAX;
3141     }
3142 
3143     /*
3144      * Sort register pairs next, first then second immediately after.
3145      * Arbitrarily sort multiple pairs by the index of the first reg;
3146      * there shouldn't be many pairs.
3147      */
3148     switch (arg_ct->pair) {
3149     case 1:
3150     case 3:
3151         return (k + 1) * 2;
3152     case 2:
3153         return (arg_ct->pair_index + 1) * 2 - 1;
3154     }
3155 
3156     /* Finally, sort by decreasing register count. */
3157     assert(n > 1);
3158     return -n;
3159 }
3160 
3161 /* sort from highest priority to lowest */
3162 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3163 {
3164     int i, j;
3165 
3166     for (i = 0; i < n; i++) {
3167         a[start + i].sort_index = start + i;
3168     }
3169     if (n <= 1) {
3170         return;
3171     }
3172     for (i = 0; i < n - 1; i++) {
3173         for (j = i + 1; j < n; j++) {
3174             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3175             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3176             if (p1 < p2) {
3177                 int tmp = a[start + i].sort_index;
3178                 a[start + i].sort_index = a[start + j].sort_index;
3179                 a[start + j].sort_index = tmp;
3180             }
3181         }
3182     }
3183 }
3184 
3185 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3186 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3187 
3188 static void process_constraint_sets(void)
3189 {
3190     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3191         const TCGConstraintSet *tdefs = &constraint_sets[c];
3192         TCGArgConstraint *args_ct = all_cts[c];
3193         int nb_oargs = tdefs->nb_oargs;
3194         int nb_iargs = tdefs->nb_iargs;
3195         int nb_args = nb_oargs + nb_iargs;
3196         bool saw_alias_pair = false;
3197 
3198         for (int i = 0; i < nb_args; i++) {
3199             const char *ct_str = tdefs->args_ct_str[i];
3200             bool input_p = i >= nb_oargs;
3201             int o;
3202 
3203             switch (*ct_str) {
3204             case '0' ... '9':
3205                 o = *ct_str - '0';
3206                 tcg_debug_assert(input_p);
3207                 tcg_debug_assert(o < nb_oargs);
3208                 tcg_debug_assert(args_ct[o].regs != 0);
3209                 tcg_debug_assert(!args_ct[o].oalias);
3210                 args_ct[i] = args_ct[o];
3211                 /* The output sets oalias.  */
3212                 args_ct[o].oalias = 1;
3213                 args_ct[o].alias_index = i;
3214                 /* The input sets ialias. */
3215                 args_ct[i].ialias = 1;
3216                 args_ct[i].alias_index = o;
3217                 if (args_ct[i].pair) {
3218                     saw_alias_pair = true;
3219                 }
3220                 tcg_debug_assert(ct_str[1] == '\0');
3221                 continue;
3222 
3223             case '&':
3224                 tcg_debug_assert(!input_p);
3225                 args_ct[i].newreg = true;
3226                 ct_str++;
3227                 break;
3228 
3229             case 'p': /* plus */
3230                 /* Allocate to the register after the previous. */
3231                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3232                 o = i - 1;
3233                 tcg_debug_assert(!args_ct[o].pair);
3234                 tcg_debug_assert(!args_ct[o].ct);
3235                 args_ct[i] = (TCGArgConstraint){
3236                     .pair = 2,
3237                     .pair_index = o,
3238                     .regs = args_ct[o].regs << 1,
3239                     .newreg = args_ct[o].newreg,
3240                 };
3241                 args_ct[o].pair = 1;
3242                 args_ct[o].pair_index = i;
3243                 tcg_debug_assert(ct_str[1] == '\0');
3244                 continue;
3245 
3246             case 'm': /* minus */
3247                 /* Allocate to the register before the previous. */
3248                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3249                 o = i - 1;
3250                 tcg_debug_assert(!args_ct[o].pair);
3251                 tcg_debug_assert(!args_ct[o].ct);
3252                 args_ct[i] = (TCGArgConstraint){
3253                     .pair = 1,
3254                     .pair_index = o,
3255                     .regs = args_ct[o].regs >> 1,
3256                     .newreg = args_ct[o].newreg,
3257                 };
3258                 args_ct[o].pair = 2;
3259                 args_ct[o].pair_index = i;
3260                 tcg_debug_assert(ct_str[1] == '\0');
3261                 continue;
3262             }
3263 
3264             do {
3265                 switch (*ct_str) {
3266                 case 'i':
3267                     args_ct[i].ct |= TCG_CT_CONST;
3268                     break;
3269 #ifdef TCG_REG_ZERO
3270                 case 'z':
3271                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3272                     break;
3273 #endif
3274 
3275                 /* Include all of the target-specific constraints. */
3276 
3277 #undef CONST
3278 #define CONST(CASE, MASK) \
3279     case CASE: args_ct[i].ct |= MASK; break;
3280 #define REGS(CASE, MASK) \
3281     case CASE: args_ct[i].regs |= MASK; break;
3282 
3283 #include "tcg-target-con-str.h"
3284 
3285 #undef REGS
3286 #undef CONST
3287                 default:
3288                 case '0' ... '9':
3289                 case '&':
3290                 case 'p':
3291                 case 'm':
3292                     /* Typo in TCGConstraintSet constraint. */
3293                     g_assert_not_reached();
3294                 }
3295             } while (*++ct_str != '\0');
3296         }
3297 
3298         /*
3299          * Fix up output pairs that are aliased with inputs.
3300          * When we created the alias, we copied pair from the output.
3301          * There are three cases:
3302          *    (1a) Pairs of inputs alias pairs of outputs.
3303          *    (1b) One input aliases the first of a pair of outputs.
3304          *    (2)  One input aliases the second of a pair of outputs.
3305          *
3306          * Case 1a is handled by making sure that the pair_index'es are
3307          * properly updated so that they appear the same as a pair of inputs.
3308          *
3309          * Case 1b is handled by setting the pair_index of the input to
3310          * itself, simply so it doesn't point to an unrelated argument.
3311          * Since we don't encounter the "second" during the input allocation
3312          * phase, nothing happens with the second half of the input pair.
3313          *
3314          * Case 2 is handled by setting the second input to pair=3, the
3315          * first output to pair=3, and the pair_index'es to match.
3316          */
3317         if (saw_alias_pair) {
3318             for (int i = nb_oargs; i < nb_args; i++) {
3319                 int o, o2, i2;
3320 
3321                 /*
3322                  * Since [0-9pm] must be alone in the constraint string,
3323                  * the only way they can both be set is if the pair comes
3324                  * from the output alias.
3325                  */
3326                 if (!args_ct[i].ialias) {
3327                     continue;
3328                 }
3329                 switch (args_ct[i].pair) {
3330                 case 0:
3331                     break;
3332                 case 1:
3333                     o = args_ct[i].alias_index;
3334                     o2 = args_ct[o].pair_index;
3335                     tcg_debug_assert(args_ct[o].pair == 1);
3336                     tcg_debug_assert(args_ct[o2].pair == 2);
3337                     if (args_ct[o2].oalias) {
3338                         /* Case 1a */
3339                         i2 = args_ct[o2].alias_index;
3340                         tcg_debug_assert(args_ct[i2].pair == 2);
3341                         args_ct[i2].pair_index = i;
3342                         args_ct[i].pair_index = i2;
3343                     } else {
3344                         /* Case 1b */
3345                         args_ct[i].pair_index = i;
3346                     }
3347                     break;
3348                 case 2:
3349                     o = args_ct[i].alias_index;
3350                     o2 = args_ct[o].pair_index;
3351                     tcg_debug_assert(args_ct[o].pair == 2);
3352                     tcg_debug_assert(args_ct[o2].pair == 1);
3353                     if (args_ct[o2].oalias) {
3354                         /* Case 1a */
3355                         i2 = args_ct[o2].alias_index;
3356                         tcg_debug_assert(args_ct[i2].pair == 1);
3357                         args_ct[i2].pair_index = i;
3358                         args_ct[i].pair_index = i2;
3359                     } else {
3360                         /* Case 2 */
3361                         args_ct[i].pair = 3;
3362                         args_ct[o2].pair = 3;
3363                         args_ct[i].pair_index = o2;
3364                         args_ct[o2].pair_index = i;
3365                     }
3366                     break;
3367                 default:
3368                     g_assert_not_reached();
3369                 }
3370             }
3371         }
3372 
3373         /* sort the constraints (XXX: this is just an heuristic) */
3374         sort_constraints(args_ct, 0, nb_oargs);
3375         sort_constraints(args_ct, nb_oargs, nb_iargs);
3376     }
3377 }
3378 
3379 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3380 {
3381     TCGOpcode opc = op->opc;
3382     TCGType type = TCGOP_TYPE(op);
3383     unsigned flags = TCGOP_FLAGS(op);
3384     const TCGOpDef *def = &tcg_op_defs[opc];
3385     const TCGOutOp *outop = all_outop[opc];
3386     TCGConstraintSetIndex con_set;
3387 
3388     if (def->flags & TCG_OPF_NOT_PRESENT) {
3389         return empty_cts;
3390     }
3391 
3392     if (outop) {
3393         con_set = outop->static_constraint;
3394         if (con_set == C_Dynamic) {
3395             con_set = outop->dynamic_constraint(type, flags);
3396         }
3397     } else {
3398         con_set = tcg_target_op_def(opc, type, flags);
3399     }
3400     tcg_debug_assert(con_set >= 0);
3401     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3402 
3403     /* The constraint arguments must match TCGOpcode arguments. */
3404     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3405     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3406 
3407     return all_cts[con_set];
3408 }
3409 
3410 static void remove_label_use(TCGOp *op, int idx)
3411 {
3412     TCGLabel *label = arg_label(op->args[idx]);
3413     TCGLabelUse *use;
3414 
3415     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3416         if (use->op == op) {
3417             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3418             return;
3419         }
3420     }
3421     g_assert_not_reached();
3422 }
3423 
3424 void tcg_op_remove(TCGContext *s, TCGOp *op)
3425 {
3426     switch (op->opc) {
3427     case INDEX_op_br:
3428         remove_label_use(op, 0);
3429         break;
3430     case INDEX_op_brcond_i32:
3431     case INDEX_op_brcond_i64:
3432         remove_label_use(op, 3);
3433         break;
3434     case INDEX_op_brcond2_i32:
3435         remove_label_use(op, 5);
3436         break;
3437     default:
3438         break;
3439     }
3440 
3441     QTAILQ_REMOVE(&s->ops, op, link);
3442     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3443     s->nb_ops--;
3444 }
3445 
3446 void tcg_remove_ops_after(TCGOp *op)
3447 {
3448     TCGContext *s = tcg_ctx;
3449 
3450     while (true) {
3451         TCGOp *last = tcg_last_op();
3452         if (last == op) {
3453             return;
3454         }
3455         tcg_op_remove(s, last);
3456     }
3457 }
3458 
3459 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3460 {
3461     TCGContext *s = tcg_ctx;
3462     TCGOp *op = NULL;
3463 
3464     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3465         QTAILQ_FOREACH(op, &s->free_ops, link) {
3466             if (nargs <= op->nargs) {
3467                 QTAILQ_REMOVE(&s->free_ops, op, link);
3468                 nargs = op->nargs;
3469                 goto found;
3470             }
3471         }
3472     }
3473 
3474     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3475     nargs = MAX(4, nargs);
3476     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3477 
3478  found:
3479     memset(op, 0, offsetof(TCGOp, link));
3480     op->opc = opc;
3481     op->nargs = nargs;
3482 
3483     /* Check for bitfield overflow. */
3484     tcg_debug_assert(op->nargs == nargs);
3485 
3486     s->nb_ops++;
3487     return op;
3488 }
3489 
3490 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3491 {
3492     TCGOp *op = tcg_op_alloc(opc, nargs);
3493 
3494     if (tcg_ctx->emit_before_op) {
3495         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3496     } else {
3497         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3498     }
3499     return op;
3500 }
3501 
3502 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3503                             TCGOpcode opc, TCGType type, unsigned nargs)
3504 {
3505     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3506 
3507     TCGOP_TYPE(new_op) = type;
3508     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3509     return new_op;
3510 }
3511 
3512 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3513                            TCGOpcode opc, TCGType type, unsigned nargs)
3514 {
3515     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3516 
3517     TCGOP_TYPE(new_op) = type;
3518     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3519     return new_op;
3520 }
3521 
3522 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3523 {
3524     TCGLabelUse *u;
3525 
3526     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3527         TCGOp *op = u->op;
3528         switch (op->opc) {
3529         case INDEX_op_br:
3530             op->args[0] = label_arg(to);
3531             break;
3532         case INDEX_op_brcond_i32:
3533         case INDEX_op_brcond_i64:
3534             op->args[3] = label_arg(to);
3535             break;
3536         case INDEX_op_brcond2_i32:
3537             op->args[5] = label_arg(to);
3538             break;
3539         default:
3540             g_assert_not_reached();
3541         }
3542     }
3543 
3544     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3545 }
3546 
3547 /* Reachable analysis : remove unreachable code.  */
3548 static void __attribute__((noinline))
3549 reachable_code_pass(TCGContext *s)
3550 {
3551     TCGOp *op, *op_next, *op_prev;
3552     bool dead = false;
3553 
3554     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3555         bool remove = dead;
3556         TCGLabel *label;
3557 
3558         switch (op->opc) {
3559         case INDEX_op_set_label:
3560             label = arg_label(op->args[0]);
3561 
3562             /*
3563              * Note that the first op in the TB is always a load,
3564              * so there is always something before a label.
3565              */
3566             op_prev = QTAILQ_PREV(op, link);
3567 
3568             /*
3569              * If we find two sequential labels, move all branches to
3570              * reference the second label and remove the first label.
3571              * Do this before branch to next optimization, so that the
3572              * middle label is out of the way.
3573              */
3574             if (op_prev->opc == INDEX_op_set_label) {
3575                 move_label_uses(label, arg_label(op_prev->args[0]));
3576                 tcg_op_remove(s, op_prev);
3577                 op_prev = QTAILQ_PREV(op, link);
3578             }
3579 
3580             /*
3581              * Optimization can fold conditional branches to unconditional.
3582              * If we find a label which is preceded by an unconditional
3583              * branch to next, remove the branch.  We couldn't do this when
3584              * processing the branch because any dead code between the branch
3585              * and label had not yet been removed.
3586              */
3587             if (op_prev->opc == INDEX_op_br &&
3588                 label == arg_label(op_prev->args[0])) {
3589                 tcg_op_remove(s, op_prev);
3590                 /* Fall through means insns become live again.  */
3591                 dead = false;
3592             }
3593 
3594             if (QSIMPLEQ_EMPTY(&label->branches)) {
3595                 /*
3596                  * While there is an occasional backward branch, virtually
3597                  * all branches generated by the translators are forward.
3598                  * Which means that generally we will have already removed
3599                  * all references to the label that will be, and there is
3600                  * little to be gained by iterating.
3601                  */
3602                 remove = true;
3603             } else {
3604                 /* Once we see a label, insns become live again.  */
3605                 dead = false;
3606                 remove = false;
3607             }
3608             break;
3609 
3610         case INDEX_op_br:
3611         case INDEX_op_exit_tb:
3612         case INDEX_op_goto_ptr:
3613             /* Unconditional branches; everything following is dead.  */
3614             dead = true;
3615             break;
3616 
3617         case INDEX_op_call:
3618             /* Notice noreturn helper calls, raising exceptions.  */
3619             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3620                 dead = true;
3621             }
3622             break;
3623 
3624         case INDEX_op_insn_start:
3625             /* Never remove -- we need to keep these for unwind.  */
3626             remove = false;
3627             break;
3628 
3629         default:
3630             break;
3631         }
3632 
3633         if (remove) {
3634             tcg_op_remove(s, op);
3635         }
3636     }
3637 }
3638 
3639 #define TS_DEAD  1
3640 #define TS_MEM   2
3641 
3642 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3643 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3644 
3645 /* For liveness_pass_1, the register preferences for a given temp.  */
3646 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3647 {
3648     return ts->state_ptr;
3649 }
3650 
3651 /* For liveness_pass_1, reset the preferences for a given temp to the
3652  * maximal regset for its type.
3653  */
3654 static inline void la_reset_pref(TCGTemp *ts)
3655 {
3656     *la_temp_pref(ts)
3657         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3658 }
3659 
3660 /* liveness analysis: end of function: all temps are dead, and globals
3661    should be in memory. */
3662 static void la_func_end(TCGContext *s, int ng, int nt)
3663 {
3664     int i;
3665 
3666     for (i = 0; i < ng; ++i) {
3667         s->temps[i].state = TS_DEAD | TS_MEM;
3668         la_reset_pref(&s->temps[i]);
3669     }
3670     for (i = ng; i < nt; ++i) {
3671         s->temps[i].state = TS_DEAD;
3672         la_reset_pref(&s->temps[i]);
3673     }
3674 }
3675 
3676 /* liveness analysis: end of basic block: all temps are dead, globals
3677    and local temps should be in memory. */
3678 static void la_bb_end(TCGContext *s, int ng, int nt)
3679 {
3680     int i;
3681 
3682     for (i = 0; i < nt; ++i) {
3683         TCGTemp *ts = &s->temps[i];
3684         int state;
3685 
3686         switch (ts->kind) {
3687         case TEMP_FIXED:
3688         case TEMP_GLOBAL:
3689         case TEMP_TB:
3690             state = TS_DEAD | TS_MEM;
3691             break;
3692         case TEMP_EBB:
3693         case TEMP_CONST:
3694             state = TS_DEAD;
3695             break;
3696         default:
3697             g_assert_not_reached();
3698         }
3699         ts->state = state;
3700         la_reset_pref(ts);
3701     }
3702 }
3703 
3704 /* liveness analysis: sync globals back to memory.  */
3705 static void la_global_sync(TCGContext *s, int ng)
3706 {
3707     int i;
3708 
3709     for (i = 0; i < ng; ++i) {
3710         int state = s->temps[i].state;
3711         s->temps[i].state = state | TS_MEM;
3712         if (state == TS_DEAD) {
3713             /* If the global was previously dead, reset prefs.  */
3714             la_reset_pref(&s->temps[i]);
3715         }
3716     }
3717 }
3718 
3719 /*
3720  * liveness analysis: conditional branch: all temps are dead unless
3721  * explicitly live-across-conditional-branch, globals and local temps
3722  * should be synced.
3723  */
3724 static void la_bb_sync(TCGContext *s, int ng, int nt)
3725 {
3726     la_global_sync(s, ng);
3727 
3728     for (int i = ng; i < nt; ++i) {
3729         TCGTemp *ts = &s->temps[i];
3730         int state;
3731 
3732         switch (ts->kind) {
3733         case TEMP_TB:
3734             state = ts->state;
3735             ts->state = state | TS_MEM;
3736             if (state != TS_DEAD) {
3737                 continue;
3738             }
3739             break;
3740         case TEMP_EBB:
3741         case TEMP_CONST:
3742             continue;
3743         default:
3744             g_assert_not_reached();
3745         }
3746         la_reset_pref(&s->temps[i]);
3747     }
3748 }
3749 
3750 /* liveness analysis: sync globals back to memory and kill.  */
3751 static void la_global_kill(TCGContext *s, int ng)
3752 {
3753     int i;
3754 
3755     for (i = 0; i < ng; i++) {
3756         s->temps[i].state = TS_DEAD | TS_MEM;
3757         la_reset_pref(&s->temps[i]);
3758     }
3759 }
3760 
3761 /* liveness analysis: note live globals crossing calls.  */
3762 static void la_cross_call(TCGContext *s, int nt)
3763 {
3764     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3765     int i;
3766 
3767     for (i = 0; i < nt; i++) {
3768         TCGTemp *ts = &s->temps[i];
3769         if (!(ts->state & TS_DEAD)) {
3770             TCGRegSet *pset = la_temp_pref(ts);
3771             TCGRegSet set = *pset;
3772 
3773             set &= mask;
3774             /* If the combination is not possible, restart.  */
3775             if (set == 0) {
3776                 set = tcg_target_available_regs[ts->type] & mask;
3777             }
3778             *pset = set;
3779         }
3780     }
3781 }
3782 
3783 /*
3784  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3785  * to TEMP_EBB, if possible.
3786  */
3787 static void __attribute__((noinline))
3788 liveness_pass_0(TCGContext *s)
3789 {
3790     void * const multiple_ebb = (void *)(uintptr_t)-1;
3791     int nb_temps = s->nb_temps;
3792     TCGOp *op, *ebb;
3793 
3794     for (int i = s->nb_globals; i < nb_temps; ++i) {
3795         s->temps[i].state_ptr = NULL;
3796     }
3797 
3798     /*
3799      * Represent each EBB by the op at which it begins.  In the case of
3800      * the first EBB, this is the first op, otherwise it is a label.
3801      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3802      * within a single EBB, else MULTIPLE_EBB.
3803      */
3804     ebb = QTAILQ_FIRST(&s->ops);
3805     QTAILQ_FOREACH(op, &s->ops, link) {
3806         const TCGOpDef *def;
3807         int nb_oargs, nb_iargs;
3808 
3809         switch (op->opc) {
3810         case INDEX_op_set_label:
3811             ebb = op;
3812             continue;
3813         case INDEX_op_discard:
3814             continue;
3815         case INDEX_op_call:
3816             nb_oargs = TCGOP_CALLO(op);
3817             nb_iargs = TCGOP_CALLI(op);
3818             break;
3819         default:
3820             def = &tcg_op_defs[op->opc];
3821             nb_oargs = def->nb_oargs;
3822             nb_iargs = def->nb_iargs;
3823             break;
3824         }
3825 
3826         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3827             TCGTemp *ts = arg_temp(op->args[i]);
3828 
3829             if (ts->kind != TEMP_TB) {
3830                 continue;
3831             }
3832             if (ts->state_ptr == NULL) {
3833                 ts->state_ptr = ebb;
3834             } else if (ts->state_ptr != ebb) {
3835                 ts->state_ptr = multiple_ebb;
3836             }
3837         }
3838     }
3839 
3840     /*
3841      * For TEMP_TB that turned out not to be used beyond one EBB,
3842      * reduce the liveness to TEMP_EBB.
3843      */
3844     for (int i = s->nb_globals; i < nb_temps; ++i) {
3845         TCGTemp *ts = &s->temps[i];
3846         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3847             ts->kind = TEMP_EBB;
3848         }
3849     }
3850 }
3851 
3852 /* Liveness analysis : update the opc_arg_life array to tell if a
3853    given input arguments is dead. Instructions updating dead
3854    temporaries are removed. */
3855 static void __attribute__((noinline))
3856 liveness_pass_1(TCGContext *s)
3857 {
3858     int nb_globals = s->nb_globals;
3859     int nb_temps = s->nb_temps;
3860     TCGOp *op, *op_prev;
3861     TCGRegSet *prefs;
3862     int i;
3863 
3864     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3865     for (i = 0; i < nb_temps; ++i) {
3866         s->temps[i].state_ptr = prefs + i;
3867     }
3868 
3869     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3870     la_func_end(s, nb_globals, nb_temps);
3871 
3872     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3873         int nb_iargs, nb_oargs;
3874         TCGOpcode opc_new, opc_new2;
3875         bool have_opc_new2;
3876         TCGLifeData arg_life = 0;
3877         TCGTemp *ts;
3878         TCGOpcode opc = op->opc;
3879         const TCGOpDef *def = &tcg_op_defs[opc];
3880         const TCGArgConstraint *args_ct;
3881 
3882         switch (opc) {
3883         case INDEX_op_call:
3884             {
3885                 const TCGHelperInfo *info = tcg_call_info(op);
3886                 int call_flags = tcg_call_flags(op);
3887 
3888                 nb_oargs = TCGOP_CALLO(op);
3889                 nb_iargs = TCGOP_CALLI(op);
3890 
3891                 /* pure functions can be removed if their result is unused */
3892                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3893                     for (i = 0; i < nb_oargs; i++) {
3894                         ts = arg_temp(op->args[i]);
3895                         if (ts->state != TS_DEAD) {
3896                             goto do_not_remove_call;
3897                         }
3898                     }
3899                     goto do_remove;
3900                 }
3901             do_not_remove_call:
3902 
3903                 /* Output args are dead.  */
3904                 for (i = 0; i < nb_oargs; i++) {
3905                     ts = arg_temp(op->args[i]);
3906                     if (ts->state & TS_DEAD) {
3907                         arg_life |= DEAD_ARG << i;
3908                     }
3909                     if (ts->state & TS_MEM) {
3910                         arg_life |= SYNC_ARG << i;
3911                     }
3912                     ts->state = TS_DEAD;
3913                     la_reset_pref(ts);
3914                 }
3915 
3916                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3917                 memset(op->output_pref, 0, sizeof(op->output_pref));
3918 
3919                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3920                                     TCG_CALL_NO_READ_GLOBALS))) {
3921                     la_global_kill(s, nb_globals);
3922                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3923                     la_global_sync(s, nb_globals);
3924                 }
3925 
3926                 /* Record arguments that die in this helper.  */
3927                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3928                     ts = arg_temp(op->args[i]);
3929                     if (ts->state & TS_DEAD) {
3930                         arg_life |= DEAD_ARG << i;
3931                     }
3932                 }
3933 
3934                 /* For all live registers, remove call-clobbered prefs.  */
3935                 la_cross_call(s, nb_temps);
3936 
3937                 /*
3938                  * Input arguments are live for preceding opcodes.
3939                  *
3940                  * For those arguments that die, and will be allocated in
3941                  * registers, clear the register set for that arg, to be
3942                  * filled in below.  For args that will be on the stack,
3943                  * reset to any available reg.  Process arguments in reverse
3944                  * order so that if a temp is used more than once, the stack
3945                  * reset to max happens before the register reset to 0.
3946                  */
3947                 for (i = nb_iargs - 1; i >= 0; i--) {
3948                     const TCGCallArgumentLoc *loc = &info->in[i];
3949                     ts = arg_temp(op->args[nb_oargs + i]);
3950 
3951                     if (ts->state & TS_DEAD) {
3952                         switch (loc->kind) {
3953                         case TCG_CALL_ARG_NORMAL:
3954                         case TCG_CALL_ARG_EXTEND_U:
3955                         case TCG_CALL_ARG_EXTEND_S:
3956                             if (arg_slot_reg_p(loc->arg_slot)) {
3957                                 *la_temp_pref(ts) = 0;
3958                                 break;
3959                             }
3960                             /* fall through */
3961                         default:
3962                             *la_temp_pref(ts) =
3963                                 tcg_target_available_regs[ts->type];
3964                             break;
3965                         }
3966                         ts->state &= ~TS_DEAD;
3967                     }
3968                 }
3969 
3970                 /*
3971                  * For each input argument, add its input register to prefs.
3972                  * If a temp is used once, this produces a single set bit;
3973                  * if a temp is used multiple times, this produces a set.
3974                  */
3975                 for (i = 0; i < nb_iargs; i++) {
3976                     const TCGCallArgumentLoc *loc = &info->in[i];
3977                     ts = arg_temp(op->args[nb_oargs + i]);
3978 
3979                     switch (loc->kind) {
3980                     case TCG_CALL_ARG_NORMAL:
3981                     case TCG_CALL_ARG_EXTEND_U:
3982                     case TCG_CALL_ARG_EXTEND_S:
3983                         if (arg_slot_reg_p(loc->arg_slot)) {
3984                             tcg_regset_set_reg(*la_temp_pref(ts),
3985                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3986                         }
3987                         break;
3988                     default:
3989                         break;
3990                     }
3991                 }
3992             }
3993             break;
3994         case INDEX_op_insn_start:
3995             break;
3996         case INDEX_op_discard:
3997             /* mark the temporary as dead */
3998             ts = arg_temp(op->args[0]);
3999             ts->state = TS_DEAD;
4000             la_reset_pref(ts);
4001             break;
4002 
4003         case INDEX_op_add2_i32:
4004         case INDEX_op_add2_i64:
4005             opc_new = INDEX_op_add;
4006             goto do_addsub2;
4007         case INDEX_op_sub2_i32:
4008             opc_new = INDEX_op_sub_i32;
4009             goto do_addsub2;
4010         case INDEX_op_sub2_i64:
4011             opc_new = INDEX_op_sub_i64;
4012         do_addsub2:
4013             nb_iargs = 4;
4014             nb_oargs = 2;
4015             /* Test if the high part of the operation is dead, but not
4016                the low part.  The result can be optimized to a simple
4017                add or sub.  This happens often for x86_64 guest when the
4018                cpu mode is set to 32 bit.  */
4019             if (arg_temp(op->args[1])->state == TS_DEAD) {
4020                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4021                     goto do_remove;
4022                 }
4023                 /* Replace the opcode and adjust the args in place,
4024                    leaving 3 unused args at the end.  */
4025                 op->opc = opc = opc_new;
4026                 op->args[1] = op->args[2];
4027                 op->args[2] = op->args[4];
4028                 /* Fall through and mark the single-word operation live.  */
4029                 nb_iargs = 2;
4030                 nb_oargs = 1;
4031             }
4032             goto do_not_remove;
4033 
4034         case INDEX_op_mulu2_i32:
4035             opc_new = INDEX_op_mul_i32;
4036             opc_new2 = INDEX_op_muluh_i32;
4037             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
4038             goto do_mul2;
4039         case INDEX_op_muls2_i32:
4040             opc_new = INDEX_op_mul_i32;
4041             opc_new2 = INDEX_op_mulsh_i32;
4042             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
4043             goto do_mul2;
4044         case INDEX_op_mulu2_i64:
4045             opc_new = INDEX_op_mul_i64;
4046             opc_new2 = INDEX_op_muluh_i64;
4047             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
4048             goto do_mul2;
4049         case INDEX_op_muls2_i64:
4050             opc_new = INDEX_op_mul_i64;
4051             opc_new2 = INDEX_op_mulsh_i64;
4052             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
4053             goto do_mul2;
4054         do_mul2:
4055             nb_iargs = 2;
4056             nb_oargs = 2;
4057             if (arg_temp(op->args[1])->state == TS_DEAD) {
4058                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4059                     /* Both parts of the operation are dead.  */
4060                     goto do_remove;
4061                 }
4062                 /* The high part of the operation is dead; generate the low. */
4063                 op->opc = opc = opc_new;
4064                 op->args[1] = op->args[2];
4065                 op->args[2] = op->args[3];
4066             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
4067                 /* The low part of the operation is dead; generate the high. */
4068                 op->opc = opc = opc_new2;
4069                 op->args[0] = op->args[1];
4070                 op->args[1] = op->args[2];
4071                 op->args[2] = op->args[3];
4072             } else {
4073                 goto do_not_remove;
4074             }
4075             /* Mark the single-word operation live.  */
4076             nb_oargs = 1;
4077             goto do_not_remove;
4078 
4079         default:
4080             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4081             nb_iargs = def->nb_iargs;
4082             nb_oargs = def->nb_oargs;
4083 
4084             /* Test if the operation can be removed because all
4085                its outputs are dead. We assume that nb_oargs == 0
4086                implies side effects */
4087             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4088                 for (i = 0; i < nb_oargs; i++) {
4089                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4090                         goto do_not_remove;
4091                     }
4092                 }
4093                 goto do_remove;
4094             }
4095             goto do_not_remove;
4096 
4097         do_remove:
4098             tcg_op_remove(s, op);
4099             break;
4100 
4101         do_not_remove:
4102             for (i = 0; i < nb_oargs; i++) {
4103                 ts = arg_temp(op->args[i]);
4104 
4105                 /* Remember the preference of the uses that followed.  */
4106                 if (i < ARRAY_SIZE(op->output_pref)) {
4107                     op->output_pref[i] = *la_temp_pref(ts);
4108                 }
4109 
4110                 /* Output args are dead.  */
4111                 if (ts->state & TS_DEAD) {
4112                     arg_life |= DEAD_ARG << i;
4113                 }
4114                 if (ts->state & TS_MEM) {
4115                     arg_life |= SYNC_ARG << i;
4116                 }
4117                 ts->state = TS_DEAD;
4118                 la_reset_pref(ts);
4119             }
4120 
4121             /* If end of basic block, update.  */
4122             if (def->flags & TCG_OPF_BB_EXIT) {
4123                 la_func_end(s, nb_globals, nb_temps);
4124             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4125                 la_bb_sync(s, nb_globals, nb_temps);
4126             } else if (def->flags & TCG_OPF_BB_END) {
4127                 la_bb_end(s, nb_globals, nb_temps);
4128             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4129                 la_global_sync(s, nb_globals);
4130                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4131                     la_cross_call(s, nb_temps);
4132                 }
4133             }
4134 
4135             /* Record arguments that die in this opcode.  */
4136             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4137                 ts = arg_temp(op->args[i]);
4138                 if (ts->state & TS_DEAD) {
4139                     arg_life |= DEAD_ARG << i;
4140                 }
4141             }
4142 
4143             /* Input arguments are live for preceding opcodes.  */
4144             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4145                 ts = arg_temp(op->args[i]);
4146                 if (ts->state & TS_DEAD) {
4147                     /* For operands that were dead, initially allow
4148                        all regs for the type.  */
4149                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4150                     ts->state &= ~TS_DEAD;
4151                 }
4152             }
4153 
4154             /* Incorporate constraints for this operand.  */
4155             switch (opc) {
4156             case INDEX_op_mov:
4157                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4158                    have proper constraints.  That said, special case
4159                    moves to propagate preferences backward.  */
4160                 if (IS_DEAD_ARG(1)) {
4161                     *la_temp_pref(arg_temp(op->args[0]))
4162                         = *la_temp_pref(arg_temp(op->args[1]));
4163                 }
4164                 break;
4165 
4166             default:
4167                 args_ct = opcode_args_ct(op);
4168                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4169                     const TCGArgConstraint *ct = &args_ct[i];
4170                     TCGRegSet set, *pset;
4171 
4172                     ts = arg_temp(op->args[i]);
4173                     pset = la_temp_pref(ts);
4174                     set = *pset;
4175 
4176                     set &= ct->regs;
4177                     if (ct->ialias) {
4178                         set &= output_pref(op, ct->alias_index);
4179                     }
4180                     /* If the combination is not possible, restart.  */
4181                     if (set == 0) {
4182                         set = ct->regs;
4183                     }
4184                     *pset = set;
4185                 }
4186                 break;
4187             }
4188             break;
4189         }
4190         op->life = arg_life;
4191     }
4192 }
4193 
4194 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4195 static bool __attribute__((noinline))
4196 liveness_pass_2(TCGContext *s)
4197 {
4198     int nb_globals = s->nb_globals;
4199     int nb_temps, i;
4200     bool changes = false;
4201     TCGOp *op, *op_next;
4202 
4203     /* Create a temporary for each indirect global.  */
4204     for (i = 0; i < nb_globals; ++i) {
4205         TCGTemp *its = &s->temps[i];
4206         if (its->indirect_reg) {
4207             TCGTemp *dts = tcg_temp_alloc(s);
4208             dts->type = its->type;
4209             dts->base_type = its->base_type;
4210             dts->temp_subindex = its->temp_subindex;
4211             dts->kind = TEMP_EBB;
4212             its->state_ptr = dts;
4213         } else {
4214             its->state_ptr = NULL;
4215         }
4216         /* All globals begin dead.  */
4217         its->state = TS_DEAD;
4218     }
4219     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4220         TCGTemp *its = &s->temps[i];
4221         its->state_ptr = NULL;
4222         its->state = TS_DEAD;
4223     }
4224 
4225     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4226         TCGOpcode opc = op->opc;
4227         const TCGOpDef *def = &tcg_op_defs[opc];
4228         TCGLifeData arg_life = op->life;
4229         int nb_iargs, nb_oargs, call_flags;
4230         TCGTemp *arg_ts, *dir_ts;
4231 
4232         if (opc == INDEX_op_call) {
4233             nb_oargs = TCGOP_CALLO(op);
4234             nb_iargs = TCGOP_CALLI(op);
4235             call_flags = tcg_call_flags(op);
4236         } else {
4237             nb_iargs = def->nb_iargs;
4238             nb_oargs = def->nb_oargs;
4239 
4240             /* Set flags similar to how calls require.  */
4241             if (def->flags & TCG_OPF_COND_BRANCH) {
4242                 /* Like reading globals: sync_globals */
4243                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4244             } else if (def->flags & TCG_OPF_BB_END) {
4245                 /* Like writing globals: save_globals */
4246                 call_flags = 0;
4247             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4248                 /* Like reading globals: sync_globals */
4249                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4250             } else {
4251                 /* No effect on globals.  */
4252                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4253                               TCG_CALL_NO_WRITE_GLOBALS);
4254             }
4255         }
4256 
4257         /* Make sure that input arguments are available.  */
4258         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4259             arg_ts = arg_temp(op->args[i]);
4260             dir_ts = arg_ts->state_ptr;
4261             if (dir_ts && arg_ts->state == TS_DEAD) {
4262                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4263                                   ? INDEX_op_ld_i32
4264                                   : INDEX_op_ld_i64);
4265                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4266                                                   arg_ts->type, 3);
4267 
4268                 lop->args[0] = temp_arg(dir_ts);
4269                 lop->args[1] = temp_arg(arg_ts->mem_base);
4270                 lop->args[2] = arg_ts->mem_offset;
4271 
4272                 /* Loaded, but synced with memory.  */
4273                 arg_ts->state = TS_MEM;
4274             }
4275         }
4276 
4277         /* Perform input replacement, and mark inputs that became dead.
4278            No action is required except keeping temp_state up to date
4279            so that we reload when needed.  */
4280         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4281             arg_ts = arg_temp(op->args[i]);
4282             dir_ts = arg_ts->state_ptr;
4283             if (dir_ts) {
4284                 op->args[i] = temp_arg(dir_ts);
4285                 changes = true;
4286                 if (IS_DEAD_ARG(i)) {
4287                     arg_ts->state = TS_DEAD;
4288                 }
4289             }
4290         }
4291 
4292         /* Liveness analysis should ensure that the following are
4293            all correct, for call sites and basic block end points.  */
4294         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4295             /* Nothing to do */
4296         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4297             for (i = 0; i < nb_globals; ++i) {
4298                 /* Liveness should see that globals are synced back,
4299                    that is, either TS_DEAD or TS_MEM.  */
4300                 arg_ts = &s->temps[i];
4301                 tcg_debug_assert(arg_ts->state_ptr == 0
4302                                  || arg_ts->state != 0);
4303             }
4304         } else {
4305             for (i = 0; i < nb_globals; ++i) {
4306                 /* Liveness should see that globals are saved back,
4307                    that is, TS_DEAD, waiting to be reloaded.  */
4308                 arg_ts = &s->temps[i];
4309                 tcg_debug_assert(arg_ts->state_ptr == 0
4310                                  || arg_ts->state == TS_DEAD);
4311             }
4312         }
4313 
4314         /* Outputs become available.  */
4315         if (opc == INDEX_op_mov) {
4316             arg_ts = arg_temp(op->args[0]);
4317             dir_ts = arg_ts->state_ptr;
4318             if (dir_ts) {
4319                 op->args[0] = temp_arg(dir_ts);
4320                 changes = true;
4321 
4322                 /* The output is now live and modified.  */
4323                 arg_ts->state = 0;
4324 
4325                 if (NEED_SYNC_ARG(0)) {
4326                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4327                                       ? INDEX_op_st_i32
4328                                       : INDEX_op_st_i64);
4329                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4330                                                      arg_ts->type, 3);
4331                     TCGTemp *out_ts = dir_ts;
4332 
4333                     if (IS_DEAD_ARG(0)) {
4334                         out_ts = arg_temp(op->args[1]);
4335                         arg_ts->state = TS_DEAD;
4336                         tcg_op_remove(s, op);
4337                     } else {
4338                         arg_ts->state = TS_MEM;
4339                     }
4340 
4341                     sop->args[0] = temp_arg(out_ts);
4342                     sop->args[1] = temp_arg(arg_ts->mem_base);
4343                     sop->args[2] = arg_ts->mem_offset;
4344                 } else {
4345                     tcg_debug_assert(!IS_DEAD_ARG(0));
4346                 }
4347             }
4348         } else {
4349             for (i = 0; i < nb_oargs; i++) {
4350                 arg_ts = arg_temp(op->args[i]);
4351                 dir_ts = arg_ts->state_ptr;
4352                 if (!dir_ts) {
4353                     continue;
4354                 }
4355                 op->args[i] = temp_arg(dir_ts);
4356                 changes = true;
4357 
4358                 /* The output is now live and modified.  */
4359                 arg_ts->state = 0;
4360 
4361                 /* Sync outputs upon their last write.  */
4362                 if (NEED_SYNC_ARG(i)) {
4363                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4364                                       ? INDEX_op_st_i32
4365                                       : INDEX_op_st_i64);
4366                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4367                                                      arg_ts->type, 3);
4368 
4369                     sop->args[0] = temp_arg(dir_ts);
4370                     sop->args[1] = temp_arg(arg_ts->mem_base);
4371                     sop->args[2] = arg_ts->mem_offset;
4372 
4373                     arg_ts->state = TS_MEM;
4374                 }
4375                 /* Drop outputs that are dead.  */
4376                 if (IS_DEAD_ARG(i)) {
4377                     arg_ts->state = TS_DEAD;
4378                 }
4379             }
4380         }
4381     }
4382 
4383     return changes;
4384 }
4385 
4386 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4387 {
4388     intptr_t off;
4389     int size, align;
4390 
4391     /* When allocating an object, look at the full type. */
4392     size = tcg_type_size(ts->base_type);
4393     switch (ts->base_type) {
4394     case TCG_TYPE_I32:
4395         align = 4;
4396         break;
4397     case TCG_TYPE_I64:
4398     case TCG_TYPE_V64:
4399         align = 8;
4400         break;
4401     case TCG_TYPE_I128:
4402     case TCG_TYPE_V128:
4403     case TCG_TYPE_V256:
4404         /*
4405          * Note that we do not require aligned storage for V256,
4406          * and that we provide alignment for I128 to match V128,
4407          * even if that's above what the host ABI requires.
4408          */
4409         align = 16;
4410         break;
4411     default:
4412         g_assert_not_reached();
4413     }
4414 
4415     /*
4416      * Assume the stack is sufficiently aligned.
4417      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4418      * and do not require 16 byte vector alignment.  This seems slightly
4419      * easier than fully parameterizing the above switch statement.
4420      */
4421     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4422     off = ROUND_UP(s->current_frame_offset, align);
4423 
4424     /* If we've exhausted the stack frame, restart with a smaller TB. */
4425     if (off + size > s->frame_end) {
4426         tcg_raise_tb_overflow(s);
4427     }
4428     s->current_frame_offset = off + size;
4429 #if defined(__sparc__)
4430     off += TCG_TARGET_STACK_BIAS;
4431 #endif
4432 
4433     /* If the object was subdivided, assign memory to all the parts. */
4434     if (ts->base_type != ts->type) {
4435         int part_size = tcg_type_size(ts->type);
4436         int part_count = size / part_size;
4437 
4438         /*
4439          * Each part is allocated sequentially in tcg_temp_new_internal.
4440          * Jump back to the first part by subtracting the current index.
4441          */
4442         ts -= ts->temp_subindex;
4443         for (int i = 0; i < part_count; ++i) {
4444             ts[i].mem_offset = off + i * part_size;
4445             ts[i].mem_base = s->frame_temp;
4446             ts[i].mem_allocated = 1;
4447         }
4448     } else {
4449         ts->mem_offset = off;
4450         ts->mem_base = s->frame_temp;
4451         ts->mem_allocated = 1;
4452     }
4453 }
4454 
4455 /* Assign @reg to @ts, and update reg_to_temp[]. */
4456 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4457 {
4458     if (ts->val_type == TEMP_VAL_REG) {
4459         TCGReg old = ts->reg;
4460         tcg_debug_assert(s->reg_to_temp[old] == ts);
4461         if (old == reg) {
4462             return;
4463         }
4464         s->reg_to_temp[old] = NULL;
4465     }
4466     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4467     s->reg_to_temp[reg] = ts;
4468     ts->val_type = TEMP_VAL_REG;
4469     ts->reg = reg;
4470 }
4471 
4472 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4473 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4474 {
4475     tcg_debug_assert(type != TEMP_VAL_REG);
4476     if (ts->val_type == TEMP_VAL_REG) {
4477         TCGReg reg = ts->reg;
4478         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4479         s->reg_to_temp[reg] = NULL;
4480     }
4481     ts->val_type = type;
4482 }
4483 
4484 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4485 
4486 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4487    mark it free; otherwise mark it dead.  */
4488 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4489 {
4490     TCGTempVal new_type;
4491 
4492     switch (ts->kind) {
4493     case TEMP_FIXED:
4494         return;
4495     case TEMP_GLOBAL:
4496     case TEMP_TB:
4497         new_type = TEMP_VAL_MEM;
4498         break;
4499     case TEMP_EBB:
4500         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4501         break;
4502     case TEMP_CONST:
4503         new_type = TEMP_VAL_CONST;
4504         break;
4505     default:
4506         g_assert_not_reached();
4507     }
4508     set_temp_val_nonreg(s, ts, new_type);
4509 }
4510 
4511 /* Mark a temporary as dead.  */
4512 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4513 {
4514     temp_free_or_dead(s, ts, 1);
4515 }
4516 
4517 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4518    registers needs to be allocated to store a constant.  If 'free_or_dead'
4519    is non-zero, subsequently release the temporary; if it is positive, the
4520    temp is dead; if it is negative, the temp is free.  */
4521 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4522                       TCGRegSet preferred_regs, int free_or_dead)
4523 {
4524     if (!temp_readonly(ts) && !ts->mem_coherent) {
4525         if (!ts->mem_allocated) {
4526             temp_allocate_frame(s, ts);
4527         }
4528         switch (ts->val_type) {
4529         case TEMP_VAL_CONST:
4530             /* If we're going to free the temp immediately, then we won't
4531                require it later in a register, so attempt to store the
4532                constant to memory directly.  */
4533             if (free_or_dead
4534                 && tcg_out_sti(s, ts->type, ts->val,
4535                                ts->mem_base->reg, ts->mem_offset)) {
4536                 break;
4537             }
4538             temp_load(s, ts, tcg_target_available_regs[ts->type],
4539                       allocated_regs, preferred_regs);
4540             /* fallthrough */
4541 
4542         case TEMP_VAL_REG:
4543             tcg_out_st(s, ts->type, ts->reg,
4544                        ts->mem_base->reg, ts->mem_offset);
4545             break;
4546 
4547         case TEMP_VAL_MEM:
4548             break;
4549 
4550         case TEMP_VAL_DEAD:
4551         default:
4552             g_assert_not_reached();
4553         }
4554         ts->mem_coherent = 1;
4555     }
4556     if (free_or_dead) {
4557         temp_free_or_dead(s, ts, free_or_dead);
4558     }
4559 }
4560 
4561 /* free register 'reg' by spilling the corresponding temporary if necessary */
4562 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4563 {
4564     TCGTemp *ts = s->reg_to_temp[reg];
4565     if (ts != NULL) {
4566         temp_sync(s, ts, allocated_regs, 0, -1);
4567     }
4568 }
4569 
4570 /**
4571  * tcg_reg_alloc:
4572  * @required_regs: Set of registers in which we must allocate.
4573  * @allocated_regs: Set of registers which must be avoided.
4574  * @preferred_regs: Set of registers we should prefer.
4575  * @rev: True if we search the registers in "indirect" order.
4576  *
4577  * The allocated register must be in @required_regs & ~@allocated_regs,
4578  * but if we can put it in @preferred_regs we may save a move later.
4579  */
4580 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4581                             TCGRegSet allocated_regs,
4582                             TCGRegSet preferred_regs, bool rev)
4583 {
4584     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4585     TCGRegSet reg_ct[2];
4586     const int *order;
4587 
4588     reg_ct[1] = required_regs & ~allocated_regs;
4589     tcg_debug_assert(reg_ct[1] != 0);
4590     reg_ct[0] = reg_ct[1] & preferred_regs;
4591 
4592     /* Skip the preferred_regs option if it cannot be satisfied,
4593        or if the preference made no difference.  */
4594     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4595 
4596     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4597 
4598     /* Try free registers, preferences first.  */
4599     for (j = f; j < 2; j++) {
4600         TCGRegSet set = reg_ct[j];
4601 
4602         if (tcg_regset_single(set)) {
4603             /* One register in the set.  */
4604             TCGReg reg = tcg_regset_first(set);
4605             if (s->reg_to_temp[reg] == NULL) {
4606                 return reg;
4607             }
4608         } else {
4609             for (i = 0; i < n; i++) {
4610                 TCGReg reg = order[i];
4611                 if (s->reg_to_temp[reg] == NULL &&
4612                     tcg_regset_test_reg(set, reg)) {
4613                     return reg;
4614                 }
4615             }
4616         }
4617     }
4618 
4619     /* We must spill something.  */
4620     for (j = f; j < 2; j++) {
4621         TCGRegSet set = reg_ct[j];
4622 
4623         if (tcg_regset_single(set)) {
4624             /* One register in the set.  */
4625             TCGReg reg = tcg_regset_first(set);
4626             tcg_reg_free(s, reg, allocated_regs);
4627             return reg;
4628         } else {
4629             for (i = 0; i < n; i++) {
4630                 TCGReg reg = order[i];
4631                 if (tcg_regset_test_reg(set, reg)) {
4632                     tcg_reg_free(s, reg, allocated_regs);
4633                     return reg;
4634                 }
4635             }
4636         }
4637     }
4638 
4639     g_assert_not_reached();
4640 }
4641 
4642 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4643                                  TCGRegSet allocated_regs,
4644                                  TCGRegSet preferred_regs, bool rev)
4645 {
4646     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4647     TCGRegSet reg_ct[2];
4648     const int *order;
4649 
4650     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4651     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4652     tcg_debug_assert(reg_ct[1] != 0);
4653     reg_ct[0] = reg_ct[1] & preferred_regs;
4654 
4655     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4656 
4657     /*
4658      * Skip the preferred_regs option if it cannot be satisfied,
4659      * or if the preference made no difference.
4660      */
4661     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4662 
4663     /*
4664      * Minimize the number of flushes by looking for 2 free registers first,
4665      * then a single flush, then two flushes.
4666      */
4667     for (fmin = 2; fmin >= 0; fmin--) {
4668         for (j = k; j < 2; j++) {
4669             TCGRegSet set = reg_ct[j];
4670 
4671             for (i = 0; i < n; i++) {
4672                 TCGReg reg = order[i];
4673 
4674                 if (tcg_regset_test_reg(set, reg)) {
4675                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4676                     if (f >= fmin) {
4677                         tcg_reg_free(s, reg, allocated_regs);
4678                         tcg_reg_free(s, reg + 1, allocated_regs);
4679                         return reg;
4680                     }
4681                 }
4682             }
4683         }
4684     }
4685     g_assert_not_reached();
4686 }
4687 
4688 /* Make sure the temporary is in a register.  If needed, allocate the register
4689    from DESIRED while avoiding ALLOCATED.  */
4690 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4691                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4692 {
4693     TCGReg reg;
4694 
4695     switch (ts->val_type) {
4696     case TEMP_VAL_REG:
4697         return;
4698     case TEMP_VAL_CONST:
4699         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4700                             preferred_regs, ts->indirect_base);
4701         if (ts->type <= TCG_TYPE_I64) {
4702             tcg_out_movi(s, ts->type, reg, ts->val);
4703         } else {
4704             uint64_t val = ts->val;
4705             MemOp vece = MO_64;
4706 
4707             /*
4708              * Find the minimal vector element that matches the constant.
4709              * The targets will, in general, have to do this search anyway,
4710              * do this generically.
4711              */
4712             if (val == dup_const(MO_8, val)) {
4713                 vece = MO_8;
4714             } else if (val == dup_const(MO_16, val)) {
4715                 vece = MO_16;
4716             } else if (val == dup_const(MO_32, val)) {
4717                 vece = MO_32;
4718             }
4719 
4720             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4721         }
4722         ts->mem_coherent = 0;
4723         break;
4724     case TEMP_VAL_MEM:
4725         if (!ts->mem_allocated) {
4726             temp_allocate_frame(s, ts);
4727         }
4728         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4729                             preferred_regs, ts->indirect_base);
4730         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4731         ts->mem_coherent = 1;
4732         break;
4733     case TEMP_VAL_DEAD:
4734     default:
4735         g_assert_not_reached();
4736     }
4737     set_temp_val_reg(s, ts, reg);
4738 }
4739 
4740 /* Save a temporary to memory. 'allocated_regs' is used in case a
4741    temporary registers needs to be allocated to store a constant.  */
4742 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4743 {
4744     /* The liveness analysis already ensures that globals are back
4745        in memory. Keep an tcg_debug_assert for safety. */
4746     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4747 }
4748 
4749 /* save globals to their canonical location and assume they can be
4750    modified be the following code. 'allocated_regs' is used in case a
4751    temporary registers needs to be allocated to store a constant. */
4752 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4753 {
4754     int i, n;
4755 
4756     for (i = 0, n = s->nb_globals; i < n; i++) {
4757         temp_save(s, &s->temps[i], allocated_regs);
4758     }
4759 }
4760 
4761 /* sync globals to their canonical location and assume they can be
4762    read by the following code. 'allocated_regs' is used in case a
4763    temporary registers needs to be allocated to store a constant. */
4764 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4765 {
4766     int i, n;
4767 
4768     for (i = 0, n = s->nb_globals; i < n; i++) {
4769         TCGTemp *ts = &s->temps[i];
4770         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4771                          || ts->kind == TEMP_FIXED
4772                          || ts->mem_coherent);
4773     }
4774 }
4775 
4776 /* at the end of a basic block, we assume all temporaries are dead and
4777    all globals are stored at their canonical location. */
4778 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4779 {
4780     int i;
4781 
4782     for (i = s->nb_globals; i < s->nb_temps; i++) {
4783         TCGTemp *ts = &s->temps[i];
4784 
4785         switch (ts->kind) {
4786         case TEMP_TB:
4787             temp_save(s, ts, allocated_regs);
4788             break;
4789         case TEMP_EBB:
4790             /* The liveness analysis already ensures that temps are dead.
4791                Keep an tcg_debug_assert for safety. */
4792             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4793             break;
4794         case TEMP_CONST:
4795             /* Similarly, we should have freed any allocated register. */
4796             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4797             break;
4798         default:
4799             g_assert_not_reached();
4800         }
4801     }
4802 
4803     save_globals(s, allocated_regs);
4804 }
4805 
4806 /*
4807  * At a conditional branch, we assume all temporaries are dead unless
4808  * explicitly live-across-conditional-branch; all globals and local
4809  * temps are synced to their location.
4810  */
4811 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4812 {
4813     sync_globals(s, allocated_regs);
4814 
4815     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4816         TCGTemp *ts = &s->temps[i];
4817         /*
4818          * The liveness analysis already ensures that temps are dead.
4819          * Keep tcg_debug_asserts for safety.
4820          */
4821         switch (ts->kind) {
4822         case TEMP_TB:
4823             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4824             break;
4825         case TEMP_EBB:
4826         case TEMP_CONST:
4827             break;
4828         default:
4829             g_assert_not_reached();
4830         }
4831     }
4832 }
4833 
4834 /*
4835  * Specialized code generation for INDEX_op_mov_* with a constant.
4836  */
4837 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4838                                   tcg_target_ulong val, TCGLifeData arg_life,
4839                                   TCGRegSet preferred_regs)
4840 {
4841     /* ENV should not be modified.  */
4842     tcg_debug_assert(!temp_readonly(ots));
4843 
4844     /* The movi is not explicitly generated here.  */
4845     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4846     ots->val = val;
4847     ots->mem_coherent = 0;
4848     if (NEED_SYNC_ARG(0)) {
4849         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4850     } else if (IS_DEAD_ARG(0)) {
4851         temp_dead(s, ots);
4852     }
4853 }
4854 
4855 /*
4856  * Specialized code generation for INDEX_op_mov_*.
4857  */
4858 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4859 {
4860     const TCGLifeData arg_life = op->life;
4861     TCGRegSet allocated_regs, preferred_regs;
4862     TCGTemp *ts, *ots;
4863     TCGType otype, itype;
4864     TCGReg oreg, ireg;
4865 
4866     allocated_regs = s->reserved_regs;
4867     preferred_regs = output_pref(op, 0);
4868     ots = arg_temp(op->args[0]);
4869     ts = arg_temp(op->args[1]);
4870 
4871     /* ENV should not be modified.  */
4872     tcg_debug_assert(!temp_readonly(ots));
4873 
4874     /* Note that otype != itype for no-op truncation.  */
4875     otype = ots->type;
4876     itype = ts->type;
4877 
4878     if (ts->val_type == TEMP_VAL_CONST) {
4879         /* propagate constant or generate sti */
4880         tcg_target_ulong val = ts->val;
4881         if (IS_DEAD_ARG(1)) {
4882             temp_dead(s, ts);
4883         }
4884         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4885         return;
4886     }
4887 
4888     /* If the source value is in memory we're going to be forced
4889        to have it in a register in order to perform the copy.  Copy
4890        the SOURCE value into its own register first, that way we
4891        don't have to reload SOURCE the next time it is used. */
4892     if (ts->val_type == TEMP_VAL_MEM) {
4893         temp_load(s, ts, tcg_target_available_regs[itype],
4894                   allocated_regs, preferred_regs);
4895     }
4896     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4897     ireg = ts->reg;
4898 
4899     if (IS_DEAD_ARG(0)) {
4900         /* mov to a non-saved dead register makes no sense (even with
4901            liveness analysis disabled). */
4902         tcg_debug_assert(NEED_SYNC_ARG(0));
4903         if (!ots->mem_allocated) {
4904             temp_allocate_frame(s, ots);
4905         }
4906         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4907         if (IS_DEAD_ARG(1)) {
4908             temp_dead(s, ts);
4909         }
4910         temp_dead(s, ots);
4911         return;
4912     }
4913 
4914     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4915         /*
4916          * The mov can be suppressed.  Kill input first, so that it
4917          * is unlinked from reg_to_temp, then set the output to the
4918          * reg that we saved from the input.
4919          */
4920         temp_dead(s, ts);
4921         oreg = ireg;
4922     } else {
4923         if (ots->val_type == TEMP_VAL_REG) {
4924             oreg = ots->reg;
4925         } else {
4926             /* Make sure to not spill the input register during allocation. */
4927             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4928                                  allocated_regs | ((TCGRegSet)1 << ireg),
4929                                  preferred_regs, ots->indirect_base);
4930         }
4931         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4932             /*
4933              * Cross register class move not supported.
4934              * Store the source register into the destination slot
4935              * and leave the destination temp as TEMP_VAL_MEM.
4936              */
4937             assert(!temp_readonly(ots));
4938             if (!ts->mem_allocated) {
4939                 temp_allocate_frame(s, ots);
4940             }
4941             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4942             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4943             ots->mem_coherent = 1;
4944             return;
4945         }
4946     }
4947     set_temp_val_reg(s, ots, oreg);
4948     ots->mem_coherent = 0;
4949 
4950     if (NEED_SYNC_ARG(0)) {
4951         temp_sync(s, ots, allocated_regs, 0, 0);
4952     }
4953 }
4954 
4955 /*
4956  * Specialized code generation for INDEX_op_dup_vec.
4957  */
4958 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4959 {
4960     const TCGLifeData arg_life = op->life;
4961     TCGRegSet dup_out_regs, dup_in_regs;
4962     const TCGArgConstraint *dup_args_ct;
4963     TCGTemp *its, *ots;
4964     TCGType itype, vtype;
4965     unsigned vece;
4966     int lowpart_ofs;
4967     bool ok;
4968 
4969     ots = arg_temp(op->args[0]);
4970     its = arg_temp(op->args[1]);
4971 
4972     /* ENV should not be modified.  */
4973     tcg_debug_assert(!temp_readonly(ots));
4974 
4975     itype = its->type;
4976     vece = TCGOP_VECE(op);
4977     vtype = TCGOP_TYPE(op);
4978 
4979     if (its->val_type == TEMP_VAL_CONST) {
4980         /* Propagate constant via movi -> dupi.  */
4981         tcg_target_ulong val = its->val;
4982         if (IS_DEAD_ARG(1)) {
4983             temp_dead(s, its);
4984         }
4985         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4986         return;
4987     }
4988 
4989     dup_args_ct = opcode_args_ct(op);
4990     dup_out_regs = dup_args_ct[0].regs;
4991     dup_in_regs = dup_args_ct[1].regs;
4992 
4993     /* Allocate the output register now.  */
4994     if (ots->val_type != TEMP_VAL_REG) {
4995         TCGRegSet allocated_regs = s->reserved_regs;
4996         TCGReg oreg;
4997 
4998         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4999             /* Make sure to not spill the input register. */
5000             tcg_regset_set_reg(allocated_regs, its->reg);
5001         }
5002         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5003                              output_pref(op, 0), ots->indirect_base);
5004         set_temp_val_reg(s, ots, oreg);
5005     }
5006 
5007     switch (its->val_type) {
5008     case TEMP_VAL_REG:
5009         /*
5010          * The dup constriaints must be broad, covering all possible VECE.
5011          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5012          * to fail, indicating that extra moves are required for that case.
5013          */
5014         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5015             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5016                 goto done;
5017             }
5018             /* Try again from memory or a vector input register.  */
5019         }
5020         if (!its->mem_coherent) {
5021             /*
5022              * The input register is not synced, and so an extra store
5023              * would be required to use memory.  Attempt an integer-vector
5024              * register move first.  We do not have a TCGRegSet for this.
5025              */
5026             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5027                 break;
5028             }
5029             /* Sync the temp back to its slot and load from there.  */
5030             temp_sync(s, its, s->reserved_regs, 0, 0);
5031         }
5032         /* fall through */
5033 
5034     case TEMP_VAL_MEM:
5035         lowpart_ofs = 0;
5036         if (HOST_BIG_ENDIAN) {
5037             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5038         }
5039         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5040                              its->mem_offset + lowpart_ofs)) {
5041             goto done;
5042         }
5043         /* Load the input into the destination vector register. */
5044         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5045         break;
5046 
5047     default:
5048         g_assert_not_reached();
5049     }
5050 
5051     /* We now have a vector input register, so dup must succeed. */
5052     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5053     tcg_debug_assert(ok);
5054 
5055  done:
5056     ots->mem_coherent = 0;
5057     if (IS_DEAD_ARG(1)) {
5058         temp_dead(s, its);
5059     }
5060     if (NEED_SYNC_ARG(0)) {
5061         temp_sync(s, ots, s->reserved_regs, 0, 0);
5062     }
5063     if (IS_DEAD_ARG(0)) {
5064         temp_dead(s, ots);
5065     }
5066 }
5067 
5068 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5069 {
5070     const TCGLifeData arg_life = op->life;
5071     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5072     TCGRegSet i_allocated_regs;
5073     TCGRegSet o_allocated_regs;
5074     int i, k, nb_iargs, nb_oargs;
5075     TCGReg reg;
5076     TCGArg arg;
5077     const TCGArgConstraint *args_ct;
5078     const TCGArgConstraint *arg_ct;
5079     TCGTemp *ts;
5080     TCGArg new_args[TCG_MAX_OP_ARGS];
5081     int const_args[TCG_MAX_OP_ARGS];
5082     TCGCond op_cond;
5083 
5084     nb_oargs = def->nb_oargs;
5085     nb_iargs = def->nb_iargs;
5086 
5087     /* copy constants */
5088     memcpy(new_args + nb_oargs + nb_iargs,
5089            op->args + nb_oargs + nb_iargs,
5090            sizeof(TCGArg) * def->nb_cargs);
5091 
5092     i_allocated_regs = s->reserved_regs;
5093     o_allocated_regs = s->reserved_regs;
5094 
5095     switch (op->opc) {
5096     case INDEX_op_brcond_i32:
5097     case INDEX_op_brcond_i64:
5098         op_cond = op->args[2];
5099         break;
5100     case INDEX_op_setcond_i32:
5101     case INDEX_op_setcond_i64:
5102     case INDEX_op_negsetcond_i32:
5103     case INDEX_op_negsetcond_i64:
5104     case INDEX_op_cmp_vec:
5105         op_cond = op->args[3];
5106         break;
5107     case INDEX_op_brcond2_i32:
5108         op_cond = op->args[4];
5109         break;
5110     case INDEX_op_movcond_i32:
5111     case INDEX_op_movcond_i64:
5112     case INDEX_op_setcond2_i32:
5113     case INDEX_op_cmpsel_vec:
5114         op_cond = op->args[5];
5115         break;
5116     default:
5117         /* No condition within opcode. */
5118         op_cond = TCG_COND_ALWAYS;
5119         break;
5120     }
5121 
5122     args_ct = opcode_args_ct(op);
5123 
5124     /* satisfy input constraints */
5125     for (k = 0; k < nb_iargs; k++) {
5126         TCGRegSet i_preferred_regs, i_required_regs;
5127         bool allocate_new_reg, copyto_new_reg;
5128         TCGTemp *ts2;
5129         int i1, i2;
5130 
5131         i = args_ct[nb_oargs + k].sort_index;
5132         arg = op->args[i];
5133         arg_ct = &args_ct[i];
5134         ts = arg_temp(arg);
5135 
5136         if (ts->val_type == TEMP_VAL_CONST) {
5137 #ifdef TCG_REG_ZERO
5138             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5139                 /* Hardware zero register: indicate register via non-const. */
5140                 const_args[i] = 0;
5141                 new_args[i] = TCG_REG_ZERO;
5142                 continue;
5143             }
5144 #endif
5145 
5146             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5147                                        op_cond, TCGOP_VECE(op))) {
5148                 /* constant is OK for instruction */
5149                 const_args[i] = 1;
5150                 new_args[i] = ts->val;
5151                 continue;
5152             }
5153         }
5154 
5155         reg = ts->reg;
5156         i_preferred_regs = 0;
5157         i_required_regs = arg_ct->regs;
5158         allocate_new_reg = false;
5159         copyto_new_reg = false;
5160 
5161         switch (arg_ct->pair) {
5162         case 0: /* not paired */
5163             if (arg_ct->ialias) {
5164                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5165 
5166                 /*
5167                  * If the input is readonly, then it cannot also be an
5168                  * output and aliased to itself.  If the input is not
5169                  * dead after the instruction, we must allocate a new
5170                  * register and move it.
5171                  */
5172                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5173                     || args_ct[arg_ct->alias_index].newreg) {
5174                     allocate_new_reg = true;
5175                 } else if (ts->val_type == TEMP_VAL_REG) {
5176                     /*
5177                      * Check if the current register has already been
5178                      * allocated for another input.
5179                      */
5180                     allocate_new_reg =
5181                         tcg_regset_test_reg(i_allocated_regs, reg);
5182                 }
5183             }
5184             if (!allocate_new_reg) {
5185                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5186                           i_preferred_regs);
5187                 reg = ts->reg;
5188                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5189             }
5190             if (allocate_new_reg) {
5191                 /*
5192                  * Allocate a new register matching the constraint
5193                  * and move the temporary register into it.
5194                  */
5195                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5196                           i_allocated_regs, 0);
5197                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5198                                     i_preferred_regs, ts->indirect_base);
5199                 copyto_new_reg = true;
5200             }
5201             break;
5202 
5203         case 1:
5204             /* First of an input pair; if i1 == i2, the second is an output. */
5205             i1 = i;
5206             i2 = arg_ct->pair_index;
5207             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5208 
5209             /*
5210              * It is easier to default to allocating a new pair
5211              * and to identify a few cases where it's not required.
5212              */
5213             if (arg_ct->ialias) {
5214                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5215                 if (IS_DEAD_ARG(i1) &&
5216                     IS_DEAD_ARG(i2) &&
5217                     !temp_readonly(ts) &&
5218                     ts->val_type == TEMP_VAL_REG &&
5219                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5220                     tcg_regset_test_reg(i_required_regs, reg) &&
5221                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5222                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5223                     (ts2
5224                      ? ts2->val_type == TEMP_VAL_REG &&
5225                        ts2->reg == reg + 1 &&
5226                        !temp_readonly(ts2)
5227                      : s->reg_to_temp[reg + 1] == NULL)) {
5228                     break;
5229                 }
5230             } else {
5231                 /* Without aliasing, the pair must also be an input. */
5232                 tcg_debug_assert(ts2);
5233                 if (ts->val_type == TEMP_VAL_REG &&
5234                     ts2->val_type == TEMP_VAL_REG &&
5235                     ts2->reg == reg + 1 &&
5236                     tcg_regset_test_reg(i_required_regs, reg)) {
5237                     break;
5238                 }
5239             }
5240             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5241                                      0, ts->indirect_base);
5242             goto do_pair;
5243 
5244         case 2: /* pair second */
5245             reg = new_args[arg_ct->pair_index] + 1;
5246             goto do_pair;
5247 
5248         case 3: /* ialias with second output, no first input */
5249             tcg_debug_assert(arg_ct->ialias);
5250             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5251 
5252             if (IS_DEAD_ARG(i) &&
5253                 !temp_readonly(ts) &&
5254                 ts->val_type == TEMP_VAL_REG &&
5255                 reg > 0 &&
5256                 s->reg_to_temp[reg - 1] == NULL &&
5257                 tcg_regset_test_reg(i_required_regs, reg) &&
5258                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5259                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5260                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5261                 break;
5262             }
5263             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5264                                      i_allocated_regs, 0,
5265                                      ts->indirect_base);
5266             tcg_regset_set_reg(i_allocated_regs, reg);
5267             reg += 1;
5268             goto do_pair;
5269 
5270         do_pair:
5271             /*
5272              * If an aliased input is not dead after the instruction,
5273              * we must allocate a new register and move it.
5274              */
5275             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5276                 TCGRegSet t_allocated_regs = i_allocated_regs;
5277 
5278                 /*
5279                  * Because of the alias, and the continued life, make sure
5280                  * that the temp is somewhere *other* than the reg pair,
5281                  * and we get a copy in reg.
5282                  */
5283                 tcg_regset_set_reg(t_allocated_regs, reg);
5284                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5285                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5286                     /* If ts was already in reg, copy it somewhere else. */
5287                     TCGReg nr;
5288                     bool ok;
5289 
5290                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5291                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5292                                        t_allocated_regs, 0, ts->indirect_base);
5293                     ok = tcg_out_mov(s, ts->type, nr, reg);
5294                     tcg_debug_assert(ok);
5295 
5296                     set_temp_val_reg(s, ts, nr);
5297                 } else {
5298                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5299                               t_allocated_regs, 0);
5300                     copyto_new_reg = true;
5301                 }
5302             } else {
5303                 /* Preferably allocate to reg, otherwise copy. */
5304                 i_required_regs = (TCGRegSet)1 << reg;
5305                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5306                           i_preferred_regs);
5307                 copyto_new_reg = ts->reg != reg;
5308             }
5309             break;
5310 
5311         default:
5312             g_assert_not_reached();
5313         }
5314 
5315         if (copyto_new_reg) {
5316             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5317                 /*
5318                  * Cross register class move not supported.  Sync the
5319                  * temp back to its slot and load from there.
5320                  */
5321                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5322                 tcg_out_ld(s, ts->type, reg,
5323                            ts->mem_base->reg, ts->mem_offset);
5324             }
5325         }
5326         new_args[i] = reg;
5327         const_args[i] = 0;
5328         tcg_regset_set_reg(i_allocated_regs, reg);
5329     }
5330 
5331     /* mark dead temporaries and free the associated registers */
5332     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5333         if (IS_DEAD_ARG(i)) {
5334             temp_dead(s, arg_temp(op->args[i]));
5335         }
5336     }
5337 
5338     if (def->flags & TCG_OPF_COND_BRANCH) {
5339         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5340     } else if (def->flags & TCG_OPF_BB_END) {
5341         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5342     } else {
5343         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5344             /* XXX: permit generic clobber register list ? */
5345             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5346                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5347                     tcg_reg_free(s, i, i_allocated_regs);
5348                 }
5349             }
5350         }
5351         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5352             /* sync globals if the op has side effects and might trigger
5353                an exception. */
5354             sync_globals(s, i_allocated_regs);
5355         }
5356 
5357         /* satisfy the output constraints */
5358         for (k = 0; k < nb_oargs; k++) {
5359             i = args_ct[k].sort_index;
5360             arg = op->args[i];
5361             arg_ct = &args_ct[i];
5362             ts = arg_temp(arg);
5363 
5364             /* ENV should not be modified.  */
5365             tcg_debug_assert(!temp_readonly(ts));
5366 
5367             switch (arg_ct->pair) {
5368             case 0: /* not paired */
5369                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5370                     reg = new_args[arg_ct->alias_index];
5371                 } else if (arg_ct->newreg) {
5372                     reg = tcg_reg_alloc(s, arg_ct->regs,
5373                                         i_allocated_regs | o_allocated_regs,
5374                                         output_pref(op, k), ts->indirect_base);
5375                 } else {
5376                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5377                                         output_pref(op, k), ts->indirect_base);
5378                 }
5379                 break;
5380 
5381             case 1: /* first of pair */
5382                 if (arg_ct->oalias) {
5383                     reg = new_args[arg_ct->alias_index];
5384                 } else if (arg_ct->newreg) {
5385                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5386                                              i_allocated_regs | o_allocated_regs,
5387                                              output_pref(op, k),
5388                                              ts->indirect_base);
5389                 } else {
5390                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5391                                              output_pref(op, k),
5392                                              ts->indirect_base);
5393                 }
5394                 break;
5395 
5396             case 2: /* second of pair */
5397                 if (arg_ct->oalias) {
5398                     reg = new_args[arg_ct->alias_index];
5399                 } else {
5400                     reg = new_args[arg_ct->pair_index] + 1;
5401                 }
5402                 break;
5403 
5404             case 3: /* first of pair, aliasing with a second input */
5405                 tcg_debug_assert(!arg_ct->newreg);
5406                 reg = new_args[arg_ct->pair_index] - 1;
5407                 break;
5408 
5409             default:
5410                 g_assert_not_reached();
5411             }
5412             tcg_regset_set_reg(o_allocated_regs, reg);
5413             set_temp_val_reg(s, ts, reg);
5414             ts->mem_coherent = 0;
5415             new_args[i] = reg;
5416         }
5417     }
5418 
5419     /* emit instruction */
5420     TCGType type = TCGOP_TYPE(op);
5421     switch (op->opc) {
5422     case INDEX_op_ext_i32_i64:
5423         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5424         break;
5425     case INDEX_op_extu_i32_i64:
5426         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5427         break;
5428     case INDEX_op_extrl_i64_i32:
5429         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5430         break;
5431 
5432     case INDEX_op_add:
5433     case INDEX_op_and:
5434     case INDEX_op_andc:
5435     case INDEX_op_eqv:
5436     case INDEX_op_nand:
5437     case INDEX_op_or:
5438     case INDEX_op_orc:
5439     case INDEX_op_xor:
5440         {
5441             const TCGOutOpBinary *out =
5442                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5443 
5444             /* Constants should never appear in the first source operand. */
5445             tcg_debug_assert(!const_args[1]);
5446             if (const_args[2]) {
5447                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5448             } else {
5449                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5450             }
5451         }
5452         break;
5453 
5454     default:
5455         if (def->flags & TCG_OPF_VECTOR) {
5456             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5457                            TCGOP_VECE(op), new_args, const_args);
5458         } else {
5459             tcg_out_op(s, op->opc, type, new_args, const_args);
5460         }
5461         break;
5462     }
5463 
5464     /* move the outputs in the correct register if needed */
5465     for(i = 0; i < nb_oargs; i++) {
5466         ts = arg_temp(op->args[i]);
5467 
5468         /* ENV should not be modified.  */
5469         tcg_debug_assert(!temp_readonly(ts));
5470 
5471         if (NEED_SYNC_ARG(i)) {
5472             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5473         } else if (IS_DEAD_ARG(i)) {
5474             temp_dead(s, ts);
5475         }
5476     }
5477 }
5478 
5479 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5480 {
5481     const TCGLifeData arg_life = op->life;
5482     TCGTemp *ots, *itsl, *itsh;
5483     TCGType vtype = TCGOP_TYPE(op);
5484 
5485     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5486     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5487     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5488 
5489     ots = arg_temp(op->args[0]);
5490     itsl = arg_temp(op->args[1]);
5491     itsh = arg_temp(op->args[2]);
5492 
5493     /* ENV should not be modified.  */
5494     tcg_debug_assert(!temp_readonly(ots));
5495 
5496     /* Allocate the output register now.  */
5497     if (ots->val_type != TEMP_VAL_REG) {
5498         TCGRegSet allocated_regs = s->reserved_regs;
5499         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5500         TCGReg oreg;
5501 
5502         /* Make sure to not spill the input registers. */
5503         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5504             tcg_regset_set_reg(allocated_regs, itsl->reg);
5505         }
5506         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5507             tcg_regset_set_reg(allocated_regs, itsh->reg);
5508         }
5509 
5510         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5511                              output_pref(op, 0), ots->indirect_base);
5512         set_temp_val_reg(s, ots, oreg);
5513     }
5514 
5515     /* Promote dup2 of immediates to dupi_vec. */
5516     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5517         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5518         MemOp vece = MO_64;
5519 
5520         if (val == dup_const(MO_8, val)) {
5521             vece = MO_8;
5522         } else if (val == dup_const(MO_16, val)) {
5523             vece = MO_16;
5524         } else if (val == dup_const(MO_32, val)) {
5525             vece = MO_32;
5526         }
5527 
5528         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5529         goto done;
5530     }
5531 
5532     /* If the two inputs form one 64-bit value, try dupm_vec. */
5533     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5534         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5535         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5536         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5537 
5538         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5539         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5540 
5541         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5542                              its->mem_base->reg, its->mem_offset)) {
5543             goto done;
5544         }
5545     }
5546 
5547     /* Fall back to generic expansion. */
5548     return false;
5549 
5550  done:
5551     ots->mem_coherent = 0;
5552     if (IS_DEAD_ARG(1)) {
5553         temp_dead(s, itsl);
5554     }
5555     if (IS_DEAD_ARG(2)) {
5556         temp_dead(s, itsh);
5557     }
5558     if (NEED_SYNC_ARG(0)) {
5559         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5560     } else if (IS_DEAD_ARG(0)) {
5561         temp_dead(s, ots);
5562     }
5563     return true;
5564 }
5565 
5566 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5567                          TCGRegSet allocated_regs)
5568 {
5569     if (ts->val_type == TEMP_VAL_REG) {
5570         if (ts->reg != reg) {
5571             tcg_reg_free(s, reg, allocated_regs);
5572             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5573                 /*
5574                  * Cross register class move not supported.  Sync the
5575                  * temp back to its slot and load from there.
5576                  */
5577                 temp_sync(s, ts, allocated_regs, 0, 0);
5578                 tcg_out_ld(s, ts->type, reg,
5579                            ts->mem_base->reg, ts->mem_offset);
5580             }
5581         }
5582     } else {
5583         TCGRegSet arg_set = 0;
5584 
5585         tcg_reg_free(s, reg, allocated_regs);
5586         tcg_regset_set_reg(arg_set, reg);
5587         temp_load(s, ts, arg_set, allocated_regs, 0);
5588     }
5589 }
5590 
5591 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5592                          TCGRegSet allocated_regs)
5593 {
5594     /*
5595      * When the destination is on the stack, load up the temp and store.
5596      * If there are many call-saved registers, the temp might live to
5597      * see another use; otherwise it'll be discarded.
5598      */
5599     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5600     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5601                arg_slot_stk_ofs(arg_slot));
5602 }
5603 
5604 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5605                             TCGTemp *ts, TCGRegSet *allocated_regs)
5606 {
5607     if (arg_slot_reg_p(l->arg_slot)) {
5608         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5609         load_arg_reg(s, reg, ts, *allocated_regs);
5610         tcg_regset_set_reg(*allocated_regs, reg);
5611     } else {
5612         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5613     }
5614 }
5615 
5616 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5617                          intptr_t ref_off, TCGRegSet *allocated_regs)
5618 {
5619     TCGReg reg;
5620 
5621     if (arg_slot_reg_p(arg_slot)) {
5622         reg = tcg_target_call_iarg_regs[arg_slot];
5623         tcg_reg_free(s, reg, *allocated_regs);
5624         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5625         tcg_regset_set_reg(*allocated_regs, reg);
5626     } else {
5627         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5628                             *allocated_regs, 0, false);
5629         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5630         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5631                    arg_slot_stk_ofs(arg_slot));
5632     }
5633 }
5634 
5635 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5636 {
5637     const int nb_oargs = TCGOP_CALLO(op);
5638     const int nb_iargs = TCGOP_CALLI(op);
5639     const TCGLifeData arg_life = op->life;
5640     const TCGHelperInfo *info = tcg_call_info(op);
5641     TCGRegSet allocated_regs = s->reserved_regs;
5642     int i;
5643 
5644     /*
5645      * Move inputs into place in reverse order,
5646      * so that we place stacked arguments first.
5647      */
5648     for (i = nb_iargs - 1; i >= 0; --i) {
5649         const TCGCallArgumentLoc *loc = &info->in[i];
5650         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5651 
5652         switch (loc->kind) {
5653         case TCG_CALL_ARG_NORMAL:
5654         case TCG_CALL_ARG_EXTEND_U:
5655         case TCG_CALL_ARG_EXTEND_S:
5656             load_arg_normal(s, loc, ts, &allocated_regs);
5657             break;
5658         case TCG_CALL_ARG_BY_REF:
5659             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5660             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5661                          arg_slot_stk_ofs(loc->ref_slot),
5662                          &allocated_regs);
5663             break;
5664         case TCG_CALL_ARG_BY_REF_N:
5665             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5666             break;
5667         default:
5668             g_assert_not_reached();
5669         }
5670     }
5671 
5672     /* Mark dead temporaries and free the associated registers.  */
5673     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5674         if (IS_DEAD_ARG(i)) {
5675             temp_dead(s, arg_temp(op->args[i]));
5676         }
5677     }
5678 
5679     /* Clobber call registers.  */
5680     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5681         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5682             tcg_reg_free(s, i, allocated_regs);
5683         }
5684     }
5685 
5686     /*
5687      * Save globals if they might be written by the helper,
5688      * sync them if they might be read.
5689      */
5690     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5691         /* Nothing to do */
5692     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5693         sync_globals(s, allocated_regs);
5694     } else {
5695         save_globals(s, allocated_regs);
5696     }
5697 
5698     /*
5699      * If the ABI passes a pointer to the returned struct as the first
5700      * argument, load that now.  Pass a pointer to the output home slot.
5701      */
5702     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5703         TCGTemp *ts = arg_temp(op->args[0]);
5704 
5705         if (!ts->mem_allocated) {
5706             temp_allocate_frame(s, ts);
5707         }
5708         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5709     }
5710 
5711     tcg_out_call(s, tcg_call_func(op), info);
5712 
5713     /* Assign output registers and emit moves if needed.  */
5714     switch (info->out_kind) {
5715     case TCG_CALL_RET_NORMAL:
5716         for (i = 0; i < nb_oargs; i++) {
5717             TCGTemp *ts = arg_temp(op->args[i]);
5718             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5719 
5720             /* ENV should not be modified.  */
5721             tcg_debug_assert(!temp_readonly(ts));
5722 
5723             set_temp_val_reg(s, ts, reg);
5724             ts->mem_coherent = 0;
5725         }
5726         break;
5727 
5728     case TCG_CALL_RET_BY_VEC:
5729         {
5730             TCGTemp *ts = arg_temp(op->args[0]);
5731 
5732             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5733             tcg_debug_assert(ts->temp_subindex == 0);
5734             if (!ts->mem_allocated) {
5735                 temp_allocate_frame(s, ts);
5736             }
5737             tcg_out_st(s, TCG_TYPE_V128,
5738                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5739                        ts->mem_base->reg, ts->mem_offset);
5740         }
5741         /* fall through to mark all parts in memory */
5742 
5743     case TCG_CALL_RET_BY_REF:
5744         /* The callee has performed a write through the reference. */
5745         for (i = 0; i < nb_oargs; i++) {
5746             TCGTemp *ts = arg_temp(op->args[i]);
5747             ts->val_type = TEMP_VAL_MEM;
5748         }
5749         break;
5750 
5751     default:
5752         g_assert_not_reached();
5753     }
5754 
5755     /* Flush or discard output registers as needed. */
5756     for (i = 0; i < nb_oargs; i++) {
5757         TCGTemp *ts = arg_temp(op->args[i]);
5758         if (NEED_SYNC_ARG(i)) {
5759             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5760         } else if (IS_DEAD_ARG(i)) {
5761             temp_dead(s, ts);
5762         }
5763     }
5764 }
5765 
5766 /**
5767  * atom_and_align_for_opc:
5768  * @s: tcg context
5769  * @opc: memory operation code
5770  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5771  * @allow_two_ops: true if we are prepared to issue two operations
5772  *
5773  * Return the alignment and atomicity to use for the inline fast path
5774  * for the given memory operation.  The alignment may be larger than
5775  * that specified in @opc, and the correct alignment will be diagnosed
5776  * by the slow path helper.
5777  *
5778  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5779  * and issue two loads or stores for subalignment.
5780  */
5781 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5782                                            MemOp host_atom, bool allow_two_ops)
5783 {
5784     MemOp align = memop_alignment_bits(opc);
5785     MemOp size = opc & MO_SIZE;
5786     MemOp half = size ? size - 1 : 0;
5787     MemOp atom = opc & MO_ATOM_MASK;
5788     MemOp atmax;
5789 
5790     switch (atom) {
5791     case MO_ATOM_NONE:
5792         /* The operation requires no specific atomicity. */
5793         atmax = MO_8;
5794         break;
5795 
5796     case MO_ATOM_IFALIGN:
5797         atmax = size;
5798         break;
5799 
5800     case MO_ATOM_IFALIGN_PAIR:
5801         atmax = half;
5802         break;
5803 
5804     case MO_ATOM_WITHIN16:
5805         atmax = size;
5806         if (size == MO_128) {
5807             /* Misalignment implies !within16, and therefore no atomicity. */
5808         } else if (host_atom != MO_ATOM_WITHIN16) {
5809             /* The host does not implement within16, so require alignment. */
5810             align = MAX(align, size);
5811         }
5812         break;
5813 
5814     case MO_ATOM_WITHIN16_PAIR:
5815         atmax = size;
5816         /*
5817          * Misalignment implies !within16, and therefore half atomicity.
5818          * Any host prepared for two operations can implement this with
5819          * half alignment.
5820          */
5821         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5822             align = MAX(align, half);
5823         }
5824         break;
5825 
5826     case MO_ATOM_SUBALIGN:
5827         atmax = size;
5828         if (host_atom != MO_ATOM_SUBALIGN) {
5829             /* If unaligned but not odd, there are subobjects up to half. */
5830             if (allow_two_ops) {
5831                 align = MAX(align, half);
5832             } else {
5833                 align = MAX(align, size);
5834             }
5835         }
5836         break;
5837 
5838     default:
5839         g_assert_not_reached();
5840     }
5841 
5842     return (TCGAtomAlign){ .atom = atmax, .align = align };
5843 }
5844 
5845 /*
5846  * Similarly for qemu_ld/st slow path helpers.
5847  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5848  * using only the provided backend tcg_out_* functions.
5849  */
5850 
5851 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5852 {
5853     int ofs = arg_slot_stk_ofs(slot);
5854 
5855     /*
5856      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5857      * require extension to uint64_t, adjust the address for uint32_t.
5858      */
5859     if (HOST_BIG_ENDIAN &&
5860         TCG_TARGET_REG_BITS == 64 &&
5861         type == TCG_TYPE_I32) {
5862         ofs += 4;
5863     }
5864     return ofs;
5865 }
5866 
5867 static void tcg_out_helper_load_slots(TCGContext *s,
5868                                       unsigned nmov, TCGMovExtend *mov,
5869                                       const TCGLdstHelperParam *parm)
5870 {
5871     unsigned i;
5872     TCGReg dst3;
5873 
5874     /*
5875      * Start from the end, storing to the stack first.
5876      * This frees those registers, so we need not consider overlap.
5877      */
5878     for (i = nmov; i-- > 0; ) {
5879         unsigned slot = mov[i].dst;
5880 
5881         if (arg_slot_reg_p(slot)) {
5882             goto found_reg;
5883         }
5884 
5885         TCGReg src = mov[i].src;
5886         TCGType dst_type = mov[i].dst_type;
5887         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5888 
5889         /* The argument is going onto the stack; extend into scratch. */
5890         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5891             tcg_debug_assert(parm->ntmp != 0);
5892             mov[i].dst = src = parm->tmp[0];
5893             tcg_out_movext1(s, &mov[i]);
5894         }
5895 
5896         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5897                    tcg_out_helper_stk_ofs(dst_type, slot));
5898     }
5899     return;
5900 
5901  found_reg:
5902     /*
5903      * The remaining arguments are in registers.
5904      * Convert slot numbers to argument registers.
5905      */
5906     nmov = i + 1;
5907     for (i = 0; i < nmov; ++i) {
5908         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5909     }
5910 
5911     switch (nmov) {
5912     case 4:
5913         /* The backend must have provided enough temps for the worst case. */
5914         tcg_debug_assert(parm->ntmp >= 2);
5915 
5916         dst3 = mov[3].dst;
5917         for (unsigned j = 0; j < 3; ++j) {
5918             if (dst3 == mov[j].src) {
5919                 /*
5920                  * Conflict. Copy the source to a temporary, perform the
5921                  * remaining moves, then the extension from our scratch
5922                  * on the way out.
5923                  */
5924                 TCGReg scratch = parm->tmp[1];
5925 
5926                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5927                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5928                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5929                 break;
5930             }
5931         }
5932 
5933         /* No conflicts: perform this move and continue. */
5934         tcg_out_movext1(s, &mov[3]);
5935         /* fall through */
5936 
5937     case 3:
5938         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5939                         parm->ntmp ? parm->tmp[0] : -1);
5940         break;
5941     case 2:
5942         tcg_out_movext2(s, mov, mov + 1,
5943                         parm->ntmp ? parm->tmp[0] : -1);
5944         break;
5945     case 1:
5946         tcg_out_movext1(s, mov);
5947         break;
5948     default:
5949         g_assert_not_reached();
5950     }
5951 }
5952 
5953 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5954                                     TCGType type, tcg_target_long imm,
5955                                     const TCGLdstHelperParam *parm)
5956 {
5957     if (arg_slot_reg_p(slot)) {
5958         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5959     } else {
5960         int ofs = tcg_out_helper_stk_ofs(type, slot);
5961         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5962             tcg_debug_assert(parm->ntmp != 0);
5963             tcg_out_movi(s, type, parm->tmp[0], imm);
5964             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5965         }
5966     }
5967 }
5968 
5969 static void tcg_out_helper_load_common_args(TCGContext *s,
5970                                             const TCGLabelQemuLdst *ldst,
5971                                             const TCGLdstHelperParam *parm,
5972                                             const TCGHelperInfo *info,
5973                                             unsigned next_arg)
5974 {
5975     TCGMovExtend ptr_mov = {
5976         .dst_type = TCG_TYPE_PTR,
5977         .src_type = TCG_TYPE_PTR,
5978         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5979     };
5980     const TCGCallArgumentLoc *loc = &info->in[0];
5981     TCGType type;
5982     unsigned slot;
5983     tcg_target_ulong imm;
5984 
5985     /*
5986      * Handle env, which is always first.
5987      */
5988     ptr_mov.dst = loc->arg_slot;
5989     ptr_mov.src = TCG_AREG0;
5990     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5991 
5992     /*
5993      * Handle oi.
5994      */
5995     imm = ldst->oi;
5996     loc = &info->in[next_arg];
5997     type = TCG_TYPE_I32;
5998     switch (loc->kind) {
5999     case TCG_CALL_ARG_NORMAL:
6000         break;
6001     case TCG_CALL_ARG_EXTEND_U:
6002     case TCG_CALL_ARG_EXTEND_S:
6003         /* No extension required for MemOpIdx. */
6004         tcg_debug_assert(imm <= INT32_MAX);
6005         type = TCG_TYPE_REG;
6006         break;
6007     default:
6008         g_assert_not_reached();
6009     }
6010     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6011     next_arg++;
6012 
6013     /*
6014      * Handle ra.
6015      */
6016     loc = &info->in[next_arg];
6017     slot = loc->arg_slot;
6018     if (parm->ra_gen) {
6019         int arg_reg = -1;
6020         TCGReg ra_reg;
6021 
6022         if (arg_slot_reg_p(slot)) {
6023             arg_reg = tcg_target_call_iarg_regs[slot];
6024         }
6025         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6026 
6027         ptr_mov.dst = slot;
6028         ptr_mov.src = ra_reg;
6029         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6030     } else {
6031         imm = (uintptr_t)ldst->raddr;
6032         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6033     }
6034 }
6035 
6036 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6037                                        const TCGCallArgumentLoc *loc,
6038                                        TCGType dst_type, TCGType src_type,
6039                                        TCGReg lo, TCGReg hi)
6040 {
6041     MemOp reg_mo;
6042 
6043     if (dst_type <= TCG_TYPE_REG) {
6044         MemOp src_ext;
6045 
6046         switch (loc->kind) {
6047         case TCG_CALL_ARG_NORMAL:
6048             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6049             break;
6050         case TCG_CALL_ARG_EXTEND_U:
6051             dst_type = TCG_TYPE_REG;
6052             src_ext = MO_UL;
6053             break;
6054         case TCG_CALL_ARG_EXTEND_S:
6055             dst_type = TCG_TYPE_REG;
6056             src_ext = MO_SL;
6057             break;
6058         default:
6059             g_assert_not_reached();
6060         }
6061 
6062         mov[0].dst = loc->arg_slot;
6063         mov[0].dst_type = dst_type;
6064         mov[0].src = lo;
6065         mov[0].src_type = src_type;
6066         mov[0].src_ext = src_ext;
6067         return 1;
6068     }
6069 
6070     if (TCG_TARGET_REG_BITS == 32) {
6071         assert(dst_type == TCG_TYPE_I64);
6072         reg_mo = MO_32;
6073     } else {
6074         assert(dst_type == TCG_TYPE_I128);
6075         reg_mo = MO_64;
6076     }
6077 
6078     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6079     mov[0].src = lo;
6080     mov[0].dst_type = TCG_TYPE_REG;
6081     mov[0].src_type = TCG_TYPE_REG;
6082     mov[0].src_ext = reg_mo;
6083 
6084     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6085     mov[1].src = hi;
6086     mov[1].dst_type = TCG_TYPE_REG;
6087     mov[1].src_type = TCG_TYPE_REG;
6088     mov[1].src_ext = reg_mo;
6089 
6090     return 2;
6091 }
6092 
6093 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6094                                    const TCGLdstHelperParam *parm)
6095 {
6096     const TCGHelperInfo *info;
6097     const TCGCallArgumentLoc *loc;
6098     TCGMovExtend mov[2];
6099     unsigned next_arg, nmov;
6100     MemOp mop = get_memop(ldst->oi);
6101 
6102     switch (mop & MO_SIZE) {
6103     case MO_8:
6104     case MO_16:
6105     case MO_32:
6106         info = &info_helper_ld32_mmu;
6107         break;
6108     case MO_64:
6109         info = &info_helper_ld64_mmu;
6110         break;
6111     case MO_128:
6112         info = &info_helper_ld128_mmu;
6113         break;
6114     default:
6115         g_assert_not_reached();
6116     }
6117 
6118     /* Defer env argument. */
6119     next_arg = 1;
6120 
6121     loc = &info->in[next_arg];
6122     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6123         /*
6124          * 32-bit host with 32-bit guest: zero-extend the guest address
6125          * to 64-bits for the helper by storing the low part, then
6126          * load a zero for the high part.
6127          */
6128         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6129                                TCG_TYPE_I32, TCG_TYPE_I32,
6130                                ldst->addr_reg, -1);
6131         tcg_out_helper_load_slots(s, 1, mov, parm);
6132 
6133         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6134                                 TCG_TYPE_I32, 0, parm);
6135         next_arg += 2;
6136     } else {
6137         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6138                                       ldst->addr_reg, -1);
6139         tcg_out_helper_load_slots(s, nmov, mov, parm);
6140         next_arg += nmov;
6141     }
6142 
6143     switch (info->out_kind) {
6144     case TCG_CALL_RET_NORMAL:
6145     case TCG_CALL_RET_BY_VEC:
6146         break;
6147     case TCG_CALL_RET_BY_REF:
6148         /*
6149          * The return reference is in the first argument slot.
6150          * We need memory in which to return: re-use the top of stack.
6151          */
6152         {
6153             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6154 
6155             if (arg_slot_reg_p(0)) {
6156                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6157                                  TCG_REG_CALL_STACK, ofs_slot0);
6158             } else {
6159                 tcg_debug_assert(parm->ntmp != 0);
6160                 tcg_out_addi_ptr(s, parm->tmp[0],
6161                                  TCG_REG_CALL_STACK, ofs_slot0);
6162                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6163                            TCG_REG_CALL_STACK, ofs_slot0);
6164             }
6165         }
6166         break;
6167     default:
6168         g_assert_not_reached();
6169     }
6170 
6171     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6172 }
6173 
6174 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6175                                   bool load_sign,
6176                                   const TCGLdstHelperParam *parm)
6177 {
6178     MemOp mop = get_memop(ldst->oi);
6179     TCGMovExtend mov[2];
6180     int ofs_slot0;
6181 
6182     switch (ldst->type) {
6183     case TCG_TYPE_I64:
6184         if (TCG_TARGET_REG_BITS == 32) {
6185             break;
6186         }
6187         /* fall through */
6188 
6189     case TCG_TYPE_I32:
6190         mov[0].dst = ldst->datalo_reg;
6191         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6192         mov[0].dst_type = ldst->type;
6193         mov[0].src_type = TCG_TYPE_REG;
6194 
6195         /*
6196          * If load_sign, then we allowed the helper to perform the
6197          * appropriate sign extension to tcg_target_ulong, and all
6198          * we need now is a plain move.
6199          *
6200          * If they do not, then we expect the relevant extension
6201          * instruction to be no more expensive than a move, and
6202          * we thus save the icache etc by only using one of two
6203          * helper functions.
6204          */
6205         if (load_sign || !(mop & MO_SIGN)) {
6206             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6207                 mov[0].src_ext = MO_32;
6208             } else {
6209                 mov[0].src_ext = MO_64;
6210             }
6211         } else {
6212             mov[0].src_ext = mop & MO_SSIZE;
6213         }
6214         tcg_out_movext1(s, mov);
6215         return;
6216 
6217     case TCG_TYPE_I128:
6218         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6219         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6220         switch (TCG_TARGET_CALL_RET_I128) {
6221         case TCG_CALL_RET_NORMAL:
6222             break;
6223         case TCG_CALL_RET_BY_VEC:
6224             tcg_out_st(s, TCG_TYPE_V128,
6225                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6226                        TCG_REG_CALL_STACK, ofs_slot0);
6227             /* fall through */
6228         case TCG_CALL_RET_BY_REF:
6229             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6230                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6231             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6232                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6233             return;
6234         default:
6235             g_assert_not_reached();
6236         }
6237         break;
6238 
6239     default:
6240         g_assert_not_reached();
6241     }
6242 
6243     mov[0].dst = ldst->datalo_reg;
6244     mov[0].src =
6245         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6246     mov[0].dst_type = TCG_TYPE_REG;
6247     mov[0].src_type = TCG_TYPE_REG;
6248     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6249 
6250     mov[1].dst = ldst->datahi_reg;
6251     mov[1].src =
6252         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6253     mov[1].dst_type = TCG_TYPE_REG;
6254     mov[1].src_type = TCG_TYPE_REG;
6255     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6256 
6257     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6258 }
6259 
6260 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6261                                    const TCGLdstHelperParam *parm)
6262 {
6263     const TCGHelperInfo *info;
6264     const TCGCallArgumentLoc *loc;
6265     TCGMovExtend mov[4];
6266     TCGType data_type;
6267     unsigned next_arg, nmov, n;
6268     MemOp mop = get_memop(ldst->oi);
6269 
6270     switch (mop & MO_SIZE) {
6271     case MO_8:
6272     case MO_16:
6273     case MO_32:
6274         info = &info_helper_st32_mmu;
6275         data_type = TCG_TYPE_I32;
6276         break;
6277     case MO_64:
6278         info = &info_helper_st64_mmu;
6279         data_type = TCG_TYPE_I64;
6280         break;
6281     case MO_128:
6282         info = &info_helper_st128_mmu;
6283         data_type = TCG_TYPE_I128;
6284         break;
6285     default:
6286         g_assert_not_reached();
6287     }
6288 
6289     /* Defer env argument. */
6290     next_arg = 1;
6291     nmov = 0;
6292 
6293     /* Handle addr argument. */
6294     loc = &info->in[next_arg];
6295     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6296     if (TCG_TARGET_REG_BITS == 32) {
6297         /*
6298          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6299          * to 64-bits for the helper by storing the low part.  Later,
6300          * after we have processed the register inputs, we will load a
6301          * zero for the high part.
6302          */
6303         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6304                                TCG_TYPE_I32, TCG_TYPE_I32,
6305                                ldst->addr_reg, -1);
6306         next_arg += 2;
6307         nmov += 1;
6308     } else {
6309         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6310                                    ldst->addr_reg, -1);
6311         next_arg += n;
6312         nmov += n;
6313     }
6314 
6315     /* Handle data argument. */
6316     loc = &info->in[next_arg];
6317     switch (loc->kind) {
6318     case TCG_CALL_ARG_NORMAL:
6319     case TCG_CALL_ARG_EXTEND_U:
6320     case TCG_CALL_ARG_EXTEND_S:
6321         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6322                                    ldst->datalo_reg, ldst->datahi_reg);
6323         next_arg += n;
6324         nmov += n;
6325         tcg_out_helper_load_slots(s, nmov, mov, parm);
6326         break;
6327 
6328     case TCG_CALL_ARG_BY_REF:
6329         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6330         tcg_debug_assert(data_type == TCG_TYPE_I128);
6331         tcg_out_st(s, TCG_TYPE_I64,
6332                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6333                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6334         tcg_out_st(s, TCG_TYPE_I64,
6335                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6336                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6337 
6338         tcg_out_helper_load_slots(s, nmov, mov, parm);
6339 
6340         if (arg_slot_reg_p(loc->arg_slot)) {
6341             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6342                              TCG_REG_CALL_STACK,
6343                              arg_slot_stk_ofs(loc->ref_slot));
6344         } else {
6345             tcg_debug_assert(parm->ntmp != 0);
6346             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6347                              arg_slot_stk_ofs(loc->ref_slot));
6348             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6349                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6350         }
6351         next_arg += 2;
6352         break;
6353 
6354     default:
6355         g_assert_not_reached();
6356     }
6357 
6358     if (TCG_TARGET_REG_BITS == 32) {
6359         /* Zero extend the address by loading a zero for the high part. */
6360         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6361         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6362     }
6363 
6364     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6365 }
6366 
6367 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6368 {
6369     int i, start_words, num_insns;
6370     TCGOp *op;
6371 
6372     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6373                  && qemu_log_in_addr_range(pc_start))) {
6374         FILE *logfile = qemu_log_trylock();
6375         if (logfile) {
6376             fprintf(logfile, "OP:\n");
6377             tcg_dump_ops(s, logfile, false);
6378             fprintf(logfile, "\n");
6379             qemu_log_unlock(logfile);
6380         }
6381     }
6382 
6383 #ifdef CONFIG_DEBUG_TCG
6384     /* Ensure all labels referenced have been emitted.  */
6385     {
6386         TCGLabel *l;
6387         bool error = false;
6388 
6389         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6390             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6391                 qemu_log_mask(CPU_LOG_TB_OP,
6392                               "$L%d referenced but not present.\n", l->id);
6393                 error = true;
6394             }
6395         }
6396         assert(!error);
6397     }
6398 #endif
6399 
6400     /* Do not reuse any EBB that may be allocated within the TB. */
6401     tcg_temp_ebb_reset_freed(s);
6402 
6403     tcg_optimize(s);
6404 
6405     reachable_code_pass(s);
6406     liveness_pass_0(s);
6407     liveness_pass_1(s);
6408 
6409     if (s->nb_indirects > 0) {
6410         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6411                      && qemu_log_in_addr_range(pc_start))) {
6412             FILE *logfile = qemu_log_trylock();
6413             if (logfile) {
6414                 fprintf(logfile, "OP before indirect lowering:\n");
6415                 tcg_dump_ops(s, logfile, false);
6416                 fprintf(logfile, "\n");
6417                 qemu_log_unlock(logfile);
6418             }
6419         }
6420 
6421         /* Replace indirect temps with direct temps.  */
6422         if (liveness_pass_2(s)) {
6423             /* If changes were made, re-run liveness.  */
6424             liveness_pass_1(s);
6425         }
6426     }
6427 
6428     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6429                  && qemu_log_in_addr_range(pc_start))) {
6430         FILE *logfile = qemu_log_trylock();
6431         if (logfile) {
6432             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6433             tcg_dump_ops(s, logfile, true);
6434             fprintf(logfile, "\n");
6435             qemu_log_unlock(logfile);
6436         }
6437     }
6438 
6439     /* Initialize goto_tb jump offsets. */
6440     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6441     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6442     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6443     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6444 
6445     tcg_reg_alloc_start(s);
6446 
6447     /*
6448      * Reset the buffer pointers when restarting after overflow.
6449      * TODO: Move this into translate-all.c with the rest of the
6450      * buffer management.  Having only this done here is confusing.
6451      */
6452     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6453     s->code_ptr = s->code_buf;
6454     s->data_gen_ptr = NULL;
6455 
6456     QSIMPLEQ_INIT(&s->ldst_labels);
6457     s->pool_labels = NULL;
6458 
6459     start_words = s->insn_start_words;
6460     s->gen_insn_data =
6461         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6462 
6463     tcg_out_tb_start(s);
6464 
6465     num_insns = -1;
6466     QTAILQ_FOREACH(op, &s->ops, link) {
6467         TCGOpcode opc = op->opc;
6468 
6469         switch (opc) {
6470         case INDEX_op_mov:
6471         case INDEX_op_mov_vec:
6472             tcg_reg_alloc_mov(s, op);
6473             break;
6474         case INDEX_op_dup_vec:
6475             tcg_reg_alloc_dup(s, op);
6476             break;
6477         case INDEX_op_insn_start:
6478             if (num_insns >= 0) {
6479                 size_t off = tcg_current_code_size(s);
6480                 s->gen_insn_end_off[num_insns] = off;
6481                 /* Assert that we do not overflow our stored offset.  */
6482                 assert(s->gen_insn_end_off[num_insns] == off);
6483             }
6484             num_insns++;
6485             for (i = 0; i < start_words; ++i) {
6486                 s->gen_insn_data[num_insns * start_words + i] =
6487                     tcg_get_insn_start_param(op, i);
6488             }
6489             break;
6490         case INDEX_op_discard:
6491             temp_dead(s, arg_temp(op->args[0]));
6492             break;
6493         case INDEX_op_set_label:
6494             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6495             tcg_out_label(s, arg_label(op->args[0]));
6496             break;
6497         case INDEX_op_call:
6498             tcg_reg_alloc_call(s, op);
6499             break;
6500         case INDEX_op_exit_tb:
6501             tcg_out_exit_tb(s, op->args[0]);
6502             break;
6503         case INDEX_op_goto_tb:
6504             tcg_out_goto_tb(s, op->args[0]);
6505             break;
6506         case INDEX_op_dup2_vec:
6507             if (tcg_reg_alloc_dup2(s, op)) {
6508                 break;
6509             }
6510             /* fall through */
6511         default:
6512             /* Sanity check that we've not introduced any unhandled opcodes. */
6513             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6514                                               TCGOP_FLAGS(op)));
6515             /* Note: in order to speed up the code, it would be much
6516                faster to have specialized register allocator functions for
6517                some common argument patterns */
6518             tcg_reg_alloc_op(s, op);
6519             break;
6520         }
6521         /* Test for (pending) buffer overflow.  The assumption is that any
6522            one operation beginning below the high water mark cannot overrun
6523            the buffer completely.  Thus we can test for overflow after
6524            generating code without having to check during generation.  */
6525         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6526             return -1;
6527         }
6528         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6529         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6530             return -2;
6531         }
6532     }
6533     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6534     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6535 
6536     /* Generate TB finalization at the end of block */
6537     i = tcg_out_ldst_finalize(s);
6538     if (i < 0) {
6539         return i;
6540     }
6541     i = tcg_out_pool_finalize(s);
6542     if (i < 0) {
6543         return i;
6544     }
6545     if (!tcg_resolve_relocs(s)) {
6546         return -2;
6547     }
6548 
6549 #ifndef CONFIG_TCG_INTERPRETER
6550     /* flush instruction cache */
6551     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6552                         (uintptr_t)s->code_buf,
6553                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6554 #endif
6555 
6556     return tcg_current_code_size(s);
6557 }
6558 
6559 #ifdef ELF_HOST_MACHINE
6560 /* In order to use this feature, the backend needs to do three things:
6561 
6562    (1) Define ELF_HOST_MACHINE to indicate both what value to
6563        put into the ELF image and to indicate support for the feature.
6564 
6565    (2) Define tcg_register_jit.  This should create a buffer containing
6566        the contents of a .debug_frame section that describes the post-
6567        prologue unwind info for the tcg machine.
6568 
6569    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6570 */
6571 
6572 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6573 typedef enum {
6574     JIT_NOACTION = 0,
6575     JIT_REGISTER_FN,
6576     JIT_UNREGISTER_FN
6577 } jit_actions_t;
6578 
6579 struct jit_code_entry {
6580     struct jit_code_entry *next_entry;
6581     struct jit_code_entry *prev_entry;
6582     const void *symfile_addr;
6583     uint64_t symfile_size;
6584 };
6585 
6586 struct jit_descriptor {
6587     uint32_t version;
6588     uint32_t action_flag;
6589     struct jit_code_entry *relevant_entry;
6590     struct jit_code_entry *first_entry;
6591 };
6592 
6593 void __jit_debug_register_code(void) __attribute__((noinline));
6594 void __jit_debug_register_code(void)
6595 {
6596     asm("");
6597 }
6598 
6599 /* Must statically initialize the version, because GDB may check
6600    the version before we can set it.  */
6601 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6602 
6603 /* End GDB interface.  */
6604 
6605 static int find_string(const char *strtab, const char *str)
6606 {
6607     const char *p = strtab + 1;
6608 
6609     while (1) {
6610         if (strcmp(p, str) == 0) {
6611             return p - strtab;
6612         }
6613         p += strlen(p) + 1;
6614     }
6615 }
6616 
6617 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6618                                  const void *debug_frame,
6619                                  size_t debug_frame_size)
6620 {
6621     struct __attribute__((packed)) DebugInfo {
6622         uint32_t  len;
6623         uint16_t  version;
6624         uint32_t  abbrev;
6625         uint8_t   ptr_size;
6626         uint8_t   cu_die;
6627         uint16_t  cu_lang;
6628         uintptr_t cu_low_pc;
6629         uintptr_t cu_high_pc;
6630         uint8_t   fn_die;
6631         char      fn_name[16];
6632         uintptr_t fn_low_pc;
6633         uintptr_t fn_high_pc;
6634         uint8_t   cu_eoc;
6635     };
6636 
6637     struct ElfImage {
6638         ElfW(Ehdr) ehdr;
6639         ElfW(Phdr) phdr;
6640         ElfW(Shdr) shdr[7];
6641         ElfW(Sym)  sym[2];
6642         struct DebugInfo di;
6643         uint8_t    da[24];
6644         char       str[80];
6645     };
6646 
6647     struct ElfImage *img;
6648 
6649     static const struct ElfImage img_template = {
6650         .ehdr = {
6651             .e_ident[EI_MAG0] = ELFMAG0,
6652             .e_ident[EI_MAG1] = ELFMAG1,
6653             .e_ident[EI_MAG2] = ELFMAG2,
6654             .e_ident[EI_MAG3] = ELFMAG3,
6655             .e_ident[EI_CLASS] = ELF_CLASS,
6656             .e_ident[EI_DATA] = ELF_DATA,
6657             .e_ident[EI_VERSION] = EV_CURRENT,
6658             .e_type = ET_EXEC,
6659             .e_machine = ELF_HOST_MACHINE,
6660             .e_version = EV_CURRENT,
6661             .e_phoff = offsetof(struct ElfImage, phdr),
6662             .e_shoff = offsetof(struct ElfImage, shdr),
6663             .e_ehsize = sizeof(ElfW(Shdr)),
6664             .e_phentsize = sizeof(ElfW(Phdr)),
6665             .e_phnum = 1,
6666             .e_shentsize = sizeof(ElfW(Shdr)),
6667             .e_shnum = ARRAY_SIZE(img->shdr),
6668             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6669 #ifdef ELF_HOST_FLAGS
6670             .e_flags = ELF_HOST_FLAGS,
6671 #endif
6672 #ifdef ELF_OSABI
6673             .e_ident[EI_OSABI] = ELF_OSABI,
6674 #endif
6675         },
6676         .phdr = {
6677             .p_type = PT_LOAD,
6678             .p_flags = PF_X,
6679         },
6680         .shdr = {
6681             [0] = { .sh_type = SHT_NULL },
6682             /* Trick: The contents of code_gen_buffer are not present in
6683                this fake ELF file; that got allocated elsewhere.  Therefore
6684                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6685                will not look for contents.  We can record any address.  */
6686             [1] = { /* .text */
6687                 .sh_type = SHT_NOBITS,
6688                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6689             },
6690             [2] = { /* .debug_info */
6691                 .sh_type = SHT_PROGBITS,
6692                 .sh_offset = offsetof(struct ElfImage, di),
6693                 .sh_size = sizeof(struct DebugInfo),
6694             },
6695             [3] = { /* .debug_abbrev */
6696                 .sh_type = SHT_PROGBITS,
6697                 .sh_offset = offsetof(struct ElfImage, da),
6698                 .sh_size = sizeof(img->da),
6699             },
6700             [4] = { /* .debug_frame */
6701                 .sh_type = SHT_PROGBITS,
6702                 .sh_offset = sizeof(struct ElfImage),
6703             },
6704             [5] = { /* .symtab */
6705                 .sh_type = SHT_SYMTAB,
6706                 .sh_offset = offsetof(struct ElfImage, sym),
6707                 .sh_size = sizeof(img->sym),
6708                 .sh_info = 1,
6709                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6710                 .sh_entsize = sizeof(ElfW(Sym)),
6711             },
6712             [6] = { /* .strtab */
6713                 .sh_type = SHT_STRTAB,
6714                 .sh_offset = offsetof(struct ElfImage, str),
6715                 .sh_size = sizeof(img->str),
6716             }
6717         },
6718         .sym = {
6719             [1] = { /* code_gen_buffer */
6720                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6721                 .st_shndx = 1,
6722             }
6723         },
6724         .di = {
6725             .len = sizeof(struct DebugInfo) - 4,
6726             .version = 2,
6727             .ptr_size = sizeof(void *),
6728             .cu_die = 1,
6729             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6730             .fn_die = 2,
6731             .fn_name = "code_gen_buffer"
6732         },
6733         .da = {
6734             1,          /* abbrev number (the cu) */
6735             0x11, 1,    /* DW_TAG_compile_unit, has children */
6736             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6737             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6738             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6739             0, 0,       /* end of abbrev */
6740             2,          /* abbrev number (the fn) */
6741             0x2e, 0,    /* DW_TAG_subprogram, no children */
6742             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6743             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6744             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6745             0, 0,       /* end of abbrev */
6746             0           /* no more abbrev */
6747         },
6748         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6749                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6750     };
6751 
6752     /* We only need a single jit entry; statically allocate it.  */
6753     static struct jit_code_entry one_entry;
6754 
6755     uintptr_t buf = (uintptr_t)buf_ptr;
6756     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6757     DebugFrameHeader *dfh;
6758 
6759     img = g_malloc(img_size);
6760     *img = img_template;
6761 
6762     img->phdr.p_vaddr = buf;
6763     img->phdr.p_paddr = buf;
6764     img->phdr.p_memsz = buf_size;
6765 
6766     img->shdr[1].sh_name = find_string(img->str, ".text");
6767     img->shdr[1].sh_addr = buf;
6768     img->shdr[1].sh_size = buf_size;
6769 
6770     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6771     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6772 
6773     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6774     img->shdr[4].sh_size = debug_frame_size;
6775 
6776     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6777     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6778 
6779     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6780     img->sym[1].st_value = buf;
6781     img->sym[1].st_size = buf_size;
6782 
6783     img->di.cu_low_pc = buf;
6784     img->di.cu_high_pc = buf + buf_size;
6785     img->di.fn_low_pc = buf;
6786     img->di.fn_high_pc = buf + buf_size;
6787 
6788     dfh = (DebugFrameHeader *)(img + 1);
6789     memcpy(dfh, debug_frame, debug_frame_size);
6790     dfh->fde.func_start = buf;
6791     dfh->fde.func_len = buf_size;
6792 
6793 #ifdef DEBUG_JIT
6794     /* Enable this block to be able to debug the ELF image file creation.
6795        One can use readelf, objdump, or other inspection utilities.  */
6796     {
6797         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6798         FILE *f = fopen(jit, "w+b");
6799         if (f) {
6800             if (fwrite(img, img_size, 1, f) != img_size) {
6801                 /* Avoid stupid unused return value warning for fwrite.  */
6802             }
6803             fclose(f);
6804         }
6805     }
6806 #endif
6807 
6808     one_entry.symfile_addr = img;
6809     one_entry.symfile_size = img_size;
6810 
6811     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6812     __jit_debug_descriptor.relevant_entry = &one_entry;
6813     __jit_debug_descriptor.first_entry = &one_entry;
6814     __jit_debug_register_code();
6815 }
6816 #else
6817 /* No support for the feature.  Provide the entry point expected by exec.c,
6818    and implement the internal function we declared earlier.  */
6819 
6820 static void tcg_register_jit_int(const void *buf, size_t size,
6821                                  const void *debug_frame,
6822                                  size_t debug_frame_size)
6823 {
6824 }
6825 
6826 void tcg_register_jit(const void *buf, size_t buf_size)
6827 {
6828 }
6829 #endif /* ELF_HOST_MACHINE */
6830 
6831 #if !TCG_TARGET_MAYBE_vec
6832 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6833 {
6834     g_assert_not_reached();
6835 }
6836 #endif
6837