xref: /openbmc/qemu/tcg/tcg.c (revision b5701261da6607e61ef1fe605d85bf31806fcd34)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 #include "tcg-target.c.inc"
982 
983 #ifndef CONFIG_TCG_INTERPRETER
984 /* Validate CPUTLBDescFast placement. */
985 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
986                         sizeof(CPUNegativeOffsetState))
987                   < MIN_TLB_MASK_TABLE_OFS);
988 #endif
989 
990 /* Register allocation descriptions for every TCGOpcode. */
991 static const TCGOutOp * const all_outop[NB_OPS] = {
992 };
993 
994 /*
995  * All TCG threads except the parent (i.e. the one that called tcg_context_init
996  * and registered the target's TCG globals) must register with this function
997  * before initiating translation.
998  *
999  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1000  * of tcg_region_init() for the reasoning behind this.
1001  *
1002  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1003  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1004  * is not used anymore for translation once this function is called.
1005  *
1006  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1007  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1008  * modes.
1009  */
1010 #ifdef CONFIG_USER_ONLY
1011 void tcg_register_thread(void)
1012 {
1013     tcg_ctx = &tcg_init_ctx;
1014 }
1015 #else
1016 void tcg_register_thread(void)
1017 {
1018     TCGContext *s = g_malloc(sizeof(*s));
1019     unsigned int i, n;
1020 
1021     *s = tcg_init_ctx;
1022 
1023     /* Relink mem_base.  */
1024     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1025         if (tcg_init_ctx.temps[i].mem_base) {
1026             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1027             tcg_debug_assert(b >= 0 && b < n);
1028             s->temps[i].mem_base = &s->temps[b];
1029         }
1030     }
1031 
1032     /* Claim an entry in tcg_ctxs */
1033     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1034     g_assert(n < tcg_max_ctxs);
1035     qatomic_set(&tcg_ctxs[n], s);
1036 
1037     if (n > 0) {
1038         tcg_region_initial_alloc(s);
1039     }
1040 
1041     tcg_ctx = s;
1042 }
1043 #endif /* !CONFIG_USER_ONLY */
1044 
1045 /* pool based memory allocation */
1046 void *tcg_malloc_internal(TCGContext *s, int size)
1047 {
1048     TCGPool *p;
1049     int pool_size;
1050 
1051     if (size > TCG_POOL_CHUNK_SIZE) {
1052         /* big malloc: insert a new pool (XXX: could optimize) */
1053         p = g_malloc(sizeof(TCGPool) + size);
1054         p->size = size;
1055         p->next = s->pool_first_large;
1056         s->pool_first_large = p;
1057         return p->data;
1058     } else {
1059         p = s->pool_current;
1060         if (!p) {
1061             p = s->pool_first;
1062             if (!p)
1063                 goto new_pool;
1064         } else {
1065             if (!p->next) {
1066             new_pool:
1067                 pool_size = TCG_POOL_CHUNK_SIZE;
1068                 p = g_malloc(sizeof(TCGPool) + pool_size);
1069                 p->size = pool_size;
1070                 p->next = NULL;
1071                 if (s->pool_current) {
1072                     s->pool_current->next = p;
1073                 } else {
1074                     s->pool_first = p;
1075                 }
1076             } else {
1077                 p = p->next;
1078             }
1079         }
1080     }
1081     s->pool_current = p;
1082     s->pool_cur = p->data + size;
1083     s->pool_end = p->data + p->size;
1084     return p->data;
1085 }
1086 
1087 void tcg_pool_reset(TCGContext *s)
1088 {
1089     TCGPool *p, *t;
1090     for (p = s->pool_first_large; p; p = t) {
1091         t = p->next;
1092         g_free(p);
1093     }
1094     s->pool_first_large = NULL;
1095     s->pool_cur = s->pool_end = NULL;
1096     s->pool_current = NULL;
1097 }
1098 
1099 /*
1100  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1101  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1102  * We only use these for layout in tcg_out_ld_helper_ret and
1103  * tcg_out_st_helper_args, and share them between several of
1104  * the helpers, with the end result that it's easier to build manually.
1105  */
1106 
1107 #if TCG_TARGET_REG_BITS == 32
1108 # define dh_typecode_ttl  dh_typecode_i32
1109 #else
1110 # define dh_typecode_ttl  dh_typecode_i64
1111 #endif
1112 
1113 static TCGHelperInfo info_helper_ld32_mmu = {
1114     .flags = TCG_CALL_NO_WG,
1115     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1116               | dh_typemask(env, 1)
1117               | dh_typemask(i64, 2)  /* uint64_t addr */
1118               | dh_typemask(i32, 3)  /* unsigned oi */
1119               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1120 };
1121 
1122 static TCGHelperInfo info_helper_ld64_mmu = {
1123     .flags = TCG_CALL_NO_WG,
1124     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1125               | dh_typemask(env, 1)
1126               | dh_typemask(i64, 2)  /* uint64_t addr */
1127               | dh_typemask(i32, 3)  /* unsigned oi */
1128               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1129 };
1130 
1131 static TCGHelperInfo info_helper_ld128_mmu = {
1132     .flags = TCG_CALL_NO_WG,
1133     .typemask = dh_typemask(i128, 0) /* return Int128 */
1134               | dh_typemask(env, 1)
1135               | dh_typemask(i64, 2)  /* uint64_t addr */
1136               | dh_typemask(i32, 3)  /* unsigned oi */
1137               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1138 };
1139 
1140 static TCGHelperInfo info_helper_st32_mmu = {
1141     .flags = TCG_CALL_NO_WG,
1142     .typemask = dh_typemask(void, 0)
1143               | dh_typemask(env, 1)
1144               | dh_typemask(i64, 2)  /* uint64_t addr */
1145               | dh_typemask(i32, 3)  /* uint32_t data */
1146               | dh_typemask(i32, 4)  /* unsigned oi */
1147               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1148 };
1149 
1150 static TCGHelperInfo info_helper_st64_mmu = {
1151     .flags = TCG_CALL_NO_WG,
1152     .typemask = dh_typemask(void, 0)
1153               | dh_typemask(env, 1)
1154               | dh_typemask(i64, 2)  /* uint64_t addr */
1155               | dh_typemask(i64, 3)  /* uint64_t data */
1156               | dh_typemask(i32, 4)  /* unsigned oi */
1157               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1158 };
1159 
1160 static TCGHelperInfo info_helper_st128_mmu = {
1161     .flags = TCG_CALL_NO_WG,
1162     .typemask = dh_typemask(void, 0)
1163               | dh_typemask(env, 1)
1164               | dh_typemask(i64, 2)  /* uint64_t addr */
1165               | dh_typemask(i128, 3) /* Int128 data */
1166               | dh_typemask(i32, 4)  /* unsigned oi */
1167               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1168 };
1169 
1170 #ifdef CONFIG_TCG_INTERPRETER
1171 static ffi_type *typecode_to_ffi(int argmask)
1172 {
1173     /*
1174      * libffi does not support __int128_t, so we have forced Int128
1175      * to use the structure definition instead of the builtin type.
1176      */
1177     static ffi_type *ffi_type_i128_elements[3] = {
1178         &ffi_type_uint64,
1179         &ffi_type_uint64,
1180         NULL
1181     };
1182     static ffi_type ffi_type_i128 = {
1183         .size = 16,
1184         .alignment = __alignof__(Int128),
1185         .type = FFI_TYPE_STRUCT,
1186         .elements = ffi_type_i128_elements,
1187     };
1188 
1189     switch (argmask) {
1190     case dh_typecode_void:
1191         return &ffi_type_void;
1192     case dh_typecode_i32:
1193         return &ffi_type_uint32;
1194     case dh_typecode_s32:
1195         return &ffi_type_sint32;
1196     case dh_typecode_i64:
1197         return &ffi_type_uint64;
1198     case dh_typecode_s64:
1199         return &ffi_type_sint64;
1200     case dh_typecode_ptr:
1201         return &ffi_type_pointer;
1202     case dh_typecode_i128:
1203         return &ffi_type_i128;
1204     }
1205     g_assert_not_reached();
1206 }
1207 
1208 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1209 {
1210     unsigned typemask = info->typemask;
1211     struct {
1212         ffi_cif cif;
1213         ffi_type *args[];
1214     } *ca;
1215     ffi_status status;
1216     int nargs;
1217 
1218     /* Ignoring the return type, find the last non-zero field. */
1219     nargs = 32 - clz32(typemask >> 3);
1220     nargs = DIV_ROUND_UP(nargs, 3);
1221     assert(nargs <= MAX_CALL_IARGS);
1222 
1223     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1224     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1225     ca->cif.nargs = nargs;
1226 
1227     if (nargs != 0) {
1228         ca->cif.arg_types = ca->args;
1229         for (int j = 0; j < nargs; ++j) {
1230             int typecode = extract32(typemask, (j + 1) * 3, 3);
1231             ca->args[j] = typecode_to_ffi(typecode);
1232         }
1233     }
1234 
1235     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1236                           ca->cif.rtype, ca->cif.arg_types);
1237     assert(status == FFI_OK);
1238 
1239     return &ca->cif;
1240 }
1241 
1242 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1243 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1244 #else
1245 #define HELPER_INFO_INIT(I)      (&(I)->init)
1246 #define HELPER_INFO_INIT_VAL(I)  1
1247 #endif /* CONFIG_TCG_INTERPRETER */
1248 
1249 static inline bool arg_slot_reg_p(unsigned arg_slot)
1250 {
1251     /*
1252      * Split the sizeof away from the comparison to avoid Werror from
1253      * "unsigned < 0 is always false", when iarg_regs is empty.
1254      */
1255     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1256     return arg_slot < nreg;
1257 }
1258 
1259 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1260 {
1261     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1262     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1263 
1264     tcg_debug_assert(stk_slot < max);
1265     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1266 }
1267 
1268 typedef struct TCGCumulativeArgs {
1269     int arg_idx;                /* tcg_gen_callN args[] */
1270     int info_in_idx;            /* TCGHelperInfo in[] */
1271     int arg_slot;               /* regs+stack slot */
1272     int ref_slot;               /* stack slots for references */
1273 } TCGCumulativeArgs;
1274 
1275 static void layout_arg_even(TCGCumulativeArgs *cum)
1276 {
1277     cum->arg_slot += cum->arg_slot & 1;
1278 }
1279 
1280 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1281                          TCGCallArgumentKind kind)
1282 {
1283     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1284 
1285     *loc = (TCGCallArgumentLoc){
1286         .kind = kind,
1287         .arg_idx = cum->arg_idx,
1288         .arg_slot = cum->arg_slot,
1289     };
1290     cum->info_in_idx++;
1291     cum->arg_slot++;
1292 }
1293 
1294 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1295                                 TCGHelperInfo *info, int n)
1296 {
1297     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1298 
1299     for (int i = 0; i < n; ++i) {
1300         /* Layout all using the same arg_idx, adjusting the subindex. */
1301         loc[i] = (TCGCallArgumentLoc){
1302             .kind = TCG_CALL_ARG_NORMAL,
1303             .arg_idx = cum->arg_idx,
1304             .tmp_subindex = i,
1305             .arg_slot = cum->arg_slot + i,
1306         };
1307     }
1308     cum->info_in_idx += n;
1309     cum->arg_slot += n;
1310 }
1311 
1312 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1313 {
1314     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1315     int n = 128 / TCG_TARGET_REG_BITS;
1316 
1317     /* The first subindex carries the pointer. */
1318     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1319 
1320     /*
1321      * The callee is allowed to clobber memory associated with
1322      * structure pass by-reference.  Therefore we must make copies.
1323      * Allocate space from "ref_slot", which will be adjusted to
1324      * follow the parameters on the stack.
1325      */
1326     loc[0].ref_slot = cum->ref_slot;
1327 
1328     /*
1329      * Subsequent words also go into the reference slot, but
1330      * do not accumulate into the regular arguments.
1331      */
1332     for (int i = 1; i < n; ++i) {
1333         loc[i] = (TCGCallArgumentLoc){
1334             .kind = TCG_CALL_ARG_BY_REF_N,
1335             .arg_idx = cum->arg_idx,
1336             .tmp_subindex = i,
1337             .ref_slot = cum->ref_slot + i,
1338         };
1339     }
1340     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1341     cum->ref_slot += n;
1342 }
1343 
1344 static void init_call_layout(TCGHelperInfo *info)
1345 {
1346     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1347     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1348     unsigned typemask = info->typemask;
1349     unsigned typecode;
1350     TCGCumulativeArgs cum = { };
1351 
1352     /*
1353      * Parse and place any function return value.
1354      */
1355     typecode = typemask & 7;
1356     switch (typecode) {
1357     case dh_typecode_void:
1358         info->nr_out = 0;
1359         break;
1360     case dh_typecode_i32:
1361     case dh_typecode_s32:
1362     case dh_typecode_ptr:
1363         info->nr_out = 1;
1364         info->out_kind = TCG_CALL_RET_NORMAL;
1365         break;
1366     case dh_typecode_i64:
1367     case dh_typecode_s64:
1368         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1369         info->out_kind = TCG_CALL_RET_NORMAL;
1370         /* Query the last register now to trigger any assert early. */
1371         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1372         break;
1373     case dh_typecode_i128:
1374         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1375         info->out_kind = TCG_TARGET_CALL_RET_I128;
1376         switch (TCG_TARGET_CALL_RET_I128) {
1377         case TCG_CALL_RET_NORMAL:
1378             /* Query the last register now to trigger any assert early. */
1379             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1380             break;
1381         case TCG_CALL_RET_BY_VEC:
1382             /* Query the single register now to trigger any assert early. */
1383             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1384             break;
1385         case TCG_CALL_RET_BY_REF:
1386             /*
1387              * Allocate the first argument to the output.
1388              * We don't need to store this anywhere, just make it
1389              * unavailable for use in the input loop below.
1390              */
1391             cum.arg_slot = 1;
1392             break;
1393         default:
1394             qemu_build_not_reached();
1395         }
1396         break;
1397     default:
1398         g_assert_not_reached();
1399     }
1400 
1401     /*
1402      * Parse and place function arguments.
1403      */
1404     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1405         TCGCallArgumentKind kind;
1406         TCGType type;
1407 
1408         typecode = typemask & 7;
1409         switch (typecode) {
1410         case dh_typecode_i32:
1411         case dh_typecode_s32:
1412             type = TCG_TYPE_I32;
1413             break;
1414         case dh_typecode_i64:
1415         case dh_typecode_s64:
1416             type = TCG_TYPE_I64;
1417             break;
1418         case dh_typecode_ptr:
1419             type = TCG_TYPE_PTR;
1420             break;
1421         case dh_typecode_i128:
1422             type = TCG_TYPE_I128;
1423             break;
1424         default:
1425             g_assert_not_reached();
1426         }
1427 
1428         switch (type) {
1429         case TCG_TYPE_I32:
1430             switch (TCG_TARGET_CALL_ARG_I32) {
1431             case TCG_CALL_ARG_EVEN:
1432                 layout_arg_even(&cum);
1433                 /* fall through */
1434             case TCG_CALL_ARG_NORMAL:
1435                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1436                 break;
1437             case TCG_CALL_ARG_EXTEND:
1438                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1439                 layout_arg_1(&cum, info, kind);
1440                 break;
1441             default:
1442                 qemu_build_not_reached();
1443             }
1444             break;
1445 
1446         case TCG_TYPE_I64:
1447             switch (TCG_TARGET_CALL_ARG_I64) {
1448             case TCG_CALL_ARG_EVEN:
1449                 layout_arg_even(&cum);
1450                 /* fall through */
1451             case TCG_CALL_ARG_NORMAL:
1452                 if (TCG_TARGET_REG_BITS == 32) {
1453                     layout_arg_normal_n(&cum, info, 2);
1454                 } else {
1455                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1456                 }
1457                 break;
1458             default:
1459                 qemu_build_not_reached();
1460             }
1461             break;
1462 
1463         case TCG_TYPE_I128:
1464             switch (TCG_TARGET_CALL_ARG_I128) {
1465             case TCG_CALL_ARG_EVEN:
1466                 layout_arg_even(&cum);
1467                 /* fall through */
1468             case TCG_CALL_ARG_NORMAL:
1469                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1470                 break;
1471             case TCG_CALL_ARG_BY_REF:
1472                 layout_arg_by_ref(&cum, info);
1473                 break;
1474             default:
1475                 qemu_build_not_reached();
1476             }
1477             break;
1478 
1479         default:
1480             g_assert_not_reached();
1481         }
1482     }
1483     info->nr_in = cum.info_in_idx;
1484 
1485     /* Validate that we didn't overrun the input array. */
1486     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1487     /* Validate the backend has enough argument space. */
1488     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1489 
1490     /*
1491      * Relocate the "ref_slot" area to the end of the parameters.
1492      * Minimizing this stack offset helps code size for x86,
1493      * which has a signed 8-bit offset encoding.
1494      */
1495     if (cum.ref_slot != 0) {
1496         int ref_base = 0;
1497 
1498         if (cum.arg_slot > max_reg_slots) {
1499             int align = __alignof(Int128) / sizeof(tcg_target_long);
1500 
1501             ref_base = cum.arg_slot - max_reg_slots;
1502             if (align > 1) {
1503                 ref_base = ROUND_UP(ref_base, align);
1504             }
1505         }
1506         assert(ref_base + cum.ref_slot <= max_stk_slots);
1507         ref_base += max_reg_slots;
1508 
1509         if (ref_base != 0) {
1510             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1511                 TCGCallArgumentLoc *loc = &info->in[i];
1512                 switch (loc->kind) {
1513                 case TCG_CALL_ARG_BY_REF:
1514                 case TCG_CALL_ARG_BY_REF_N:
1515                     loc->ref_slot += ref_base;
1516                     break;
1517                 default:
1518                     break;
1519                 }
1520             }
1521         }
1522     }
1523 }
1524 
1525 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1526 static void process_constraint_sets(void);
1527 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1528                                             TCGReg reg, const char *name);
1529 
1530 static void tcg_context_init(unsigned max_threads)
1531 {
1532     TCGContext *s = &tcg_init_ctx;
1533     int n, i;
1534     TCGTemp *ts;
1535 
1536     memset(s, 0, sizeof(*s));
1537     s->nb_globals = 0;
1538 
1539     init_call_layout(&info_helper_ld32_mmu);
1540     init_call_layout(&info_helper_ld64_mmu);
1541     init_call_layout(&info_helper_ld128_mmu);
1542     init_call_layout(&info_helper_st32_mmu);
1543     init_call_layout(&info_helper_st64_mmu);
1544     init_call_layout(&info_helper_st128_mmu);
1545 
1546     tcg_target_init(s);
1547     process_constraint_sets();
1548 
1549     /* Reverse the order of the saved registers, assuming they're all at
1550        the start of tcg_target_reg_alloc_order.  */
1551     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1552         int r = tcg_target_reg_alloc_order[n];
1553         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1554             break;
1555         }
1556     }
1557     for (i = 0; i < n; ++i) {
1558         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1559     }
1560     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1561         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1562     }
1563 
1564     tcg_ctx = s;
1565     /*
1566      * In user-mode we simply share the init context among threads, since we
1567      * use a single region. See the documentation tcg_region_init() for the
1568      * reasoning behind this.
1569      * In system-mode we will have at most max_threads TCG threads.
1570      */
1571 #ifdef CONFIG_USER_ONLY
1572     tcg_ctxs = &tcg_ctx;
1573     tcg_cur_ctxs = 1;
1574     tcg_max_ctxs = 1;
1575 #else
1576     tcg_max_ctxs = max_threads;
1577     tcg_ctxs = g_new0(TCGContext *, max_threads);
1578 #endif
1579 
1580     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1581     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1582     tcg_env = temp_tcgv_ptr(ts);
1583 }
1584 
1585 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1586 {
1587     tcg_context_init(max_threads);
1588     tcg_region_init(tb_size, splitwx, max_threads);
1589 }
1590 
1591 /*
1592  * Allocate TBs right before their corresponding translated code, making
1593  * sure that TBs and code are on different cache lines.
1594  */
1595 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1596 {
1597     uintptr_t align = qemu_icache_linesize;
1598     TranslationBlock *tb;
1599     void *next;
1600 
1601  retry:
1602     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1603     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1604 
1605     if (unlikely(next > s->code_gen_highwater)) {
1606         if (tcg_region_alloc(s)) {
1607             return NULL;
1608         }
1609         goto retry;
1610     }
1611     qatomic_set(&s->code_gen_ptr, next);
1612     return tb;
1613 }
1614 
1615 void tcg_prologue_init(void)
1616 {
1617     TCGContext *s = tcg_ctx;
1618     size_t prologue_size;
1619 
1620     s->code_ptr = s->code_gen_ptr;
1621     s->code_buf = s->code_gen_ptr;
1622     s->data_gen_ptr = NULL;
1623 
1624 #ifndef CONFIG_TCG_INTERPRETER
1625     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1626 #endif
1627 
1628     s->pool_labels = NULL;
1629 
1630     qemu_thread_jit_write();
1631     /* Generate the prologue.  */
1632     tcg_target_qemu_prologue(s);
1633 
1634     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1635     {
1636         int result = tcg_out_pool_finalize(s);
1637         tcg_debug_assert(result == 0);
1638     }
1639 
1640     prologue_size = tcg_current_code_size(s);
1641     perf_report_prologue(s->code_gen_ptr, prologue_size);
1642 
1643 #ifndef CONFIG_TCG_INTERPRETER
1644     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1645                         (uintptr_t)s->code_buf, prologue_size);
1646 #endif
1647 
1648     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1649         FILE *logfile = qemu_log_trylock();
1650         if (logfile) {
1651             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1652             if (s->data_gen_ptr) {
1653                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1654                 size_t data_size = prologue_size - code_size;
1655                 size_t i;
1656 
1657                 disas(logfile, s->code_gen_ptr, code_size);
1658 
1659                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1660                     if (sizeof(tcg_target_ulong) == 8) {
1661                         fprintf(logfile,
1662                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1663                                 (uintptr_t)s->data_gen_ptr + i,
1664                                 *(uint64_t *)(s->data_gen_ptr + i));
1665                     } else {
1666                         fprintf(logfile,
1667                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1668                                 (uintptr_t)s->data_gen_ptr + i,
1669                                 *(uint32_t *)(s->data_gen_ptr + i));
1670                     }
1671                 }
1672             } else {
1673                 disas(logfile, s->code_gen_ptr, prologue_size);
1674             }
1675             fprintf(logfile, "\n");
1676             qemu_log_unlock(logfile);
1677         }
1678     }
1679 
1680 #ifndef CONFIG_TCG_INTERPRETER
1681     /*
1682      * Assert that goto_ptr is implemented completely, setting an epilogue.
1683      * For tci, we use NULL as the signal to return from the interpreter,
1684      * so skip this check.
1685      */
1686     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1687 #endif
1688 
1689     tcg_region_prologue_set(s);
1690 }
1691 
1692 void tcg_func_start(TCGContext *s)
1693 {
1694     tcg_pool_reset(s);
1695     s->nb_temps = s->nb_globals;
1696 
1697     /* No temps have been previously allocated for size or locality.  */
1698     tcg_temp_ebb_reset_freed(s);
1699 
1700     /* No constant temps have been previously allocated. */
1701     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1702         if (s->const_table[i]) {
1703             g_hash_table_remove_all(s->const_table[i]);
1704         }
1705     }
1706 
1707     s->nb_ops = 0;
1708     s->nb_labels = 0;
1709     s->current_frame_offset = s->frame_start;
1710 
1711 #ifdef CONFIG_DEBUG_TCG
1712     s->goto_tb_issue_mask = 0;
1713 #endif
1714 
1715     QTAILQ_INIT(&s->ops);
1716     QTAILQ_INIT(&s->free_ops);
1717     s->emit_before_op = NULL;
1718     QSIMPLEQ_INIT(&s->labels);
1719 
1720     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1721     tcg_debug_assert(s->insn_start_words > 0);
1722 }
1723 
1724 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1725 {
1726     int n = s->nb_temps++;
1727 
1728     if (n >= TCG_MAX_TEMPS) {
1729         tcg_raise_tb_overflow(s);
1730     }
1731     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1732 }
1733 
1734 static TCGTemp *tcg_global_alloc(TCGContext *s)
1735 {
1736     TCGTemp *ts;
1737 
1738     tcg_debug_assert(s->nb_globals == s->nb_temps);
1739     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1740     s->nb_globals++;
1741     ts = tcg_temp_alloc(s);
1742     ts->kind = TEMP_GLOBAL;
1743 
1744     return ts;
1745 }
1746 
1747 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1748                                             TCGReg reg, const char *name)
1749 {
1750     TCGTemp *ts;
1751 
1752     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1753 
1754     ts = tcg_global_alloc(s);
1755     ts->base_type = type;
1756     ts->type = type;
1757     ts->kind = TEMP_FIXED;
1758     ts->reg = reg;
1759     ts->name = name;
1760     tcg_regset_set_reg(s->reserved_regs, reg);
1761 
1762     return ts;
1763 }
1764 
1765 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1766 {
1767     s->frame_start = start;
1768     s->frame_end = start + size;
1769     s->frame_temp
1770         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1771 }
1772 
1773 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1774                                             const char *name, TCGType type)
1775 {
1776     TCGContext *s = tcg_ctx;
1777     TCGTemp *base_ts = tcgv_ptr_temp(base);
1778     TCGTemp *ts = tcg_global_alloc(s);
1779     int indirect_reg = 0;
1780 
1781     switch (base_ts->kind) {
1782     case TEMP_FIXED:
1783         break;
1784     case TEMP_GLOBAL:
1785         /* We do not support double-indirect registers.  */
1786         tcg_debug_assert(!base_ts->indirect_reg);
1787         base_ts->indirect_base = 1;
1788         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1789                             ? 2 : 1);
1790         indirect_reg = 1;
1791         break;
1792     default:
1793         g_assert_not_reached();
1794     }
1795 
1796     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1797         TCGTemp *ts2 = tcg_global_alloc(s);
1798         char buf[64];
1799 
1800         ts->base_type = TCG_TYPE_I64;
1801         ts->type = TCG_TYPE_I32;
1802         ts->indirect_reg = indirect_reg;
1803         ts->mem_allocated = 1;
1804         ts->mem_base = base_ts;
1805         ts->mem_offset = offset;
1806         pstrcpy(buf, sizeof(buf), name);
1807         pstrcat(buf, sizeof(buf), "_0");
1808         ts->name = strdup(buf);
1809 
1810         tcg_debug_assert(ts2 == ts + 1);
1811         ts2->base_type = TCG_TYPE_I64;
1812         ts2->type = TCG_TYPE_I32;
1813         ts2->indirect_reg = indirect_reg;
1814         ts2->mem_allocated = 1;
1815         ts2->mem_base = base_ts;
1816         ts2->mem_offset = offset + 4;
1817         ts2->temp_subindex = 1;
1818         pstrcpy(buf, sizeof(buf), name);
1819         pstrcat(buf, sizeof(buf), "_1");
1820         ts2->name = strdup(buf);
1821     } else {
1822         ts->base_type = type;
1823         ts->type = type;
1824         ts->indirect_reg = indirect_reg;
1825         ts->mem_allocated = 1;
1826         ts->mem_base = base_ts;
1827         ts->mem_offset = offset;
1828         ts->name = name;
1829     }
1830     return ts;
1831 }
1832 
1833 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1834 {
1835     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1836     return temp_tcgv_i32(ts);
1837 }
1838 
1839 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1840 {
1841     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1842     return temp_tcgv_i64(ts);
1843 }
1844 
1845 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1846 {
1847     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1848     return temp_tcgv_ptr(ts);
1849 }
1850 
1851 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1852 {
1853     TCGContext *s = tcg_ctx;
1854     TCGTemp *ts;
1855     int n;
1856 
1857     if (kind == TEMP_EBB) {
1858         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1859 
1860         if (idx < TCG_MAX_TEMPS) {
1861             /* There is already an available temp with the right type.  */
1862             clear_bit(idx, s->free_temps[type].l);
1863 
1864             ts = &s->temps[idx];
1865             ts->temp_allocated = 1;
1866             tcg_debug_assert(ts->base_type == type);
1867             tcg_debug_assert(ts->kind == kind);
1868             return ts;
1869         }
1870     } else {
1871         tcg_debug_assert(kind == TEMP_TB);
1872     }
1873 
1874     switch (type) {
1875     case TCG_TYPE_I32:
1876     case TCG_TYPE_V64:
1877     case TCG_TYPE_V128:
1878     case TCG_TYPE_V256:
1879         n = 1;
1880         break;
1881     case TCG_TYPE_I64:
1882         n = 64 / TCG_TARGET_REG_BITS;
1883         break;
1884     case TCG_TYPE_I128:
1885         n = 128 / TCG_TARGET_REG_BITS;
1886         break;
1887     default:
1888         g_assert_not_reached();
1889     }
1890 
1891     ts = tcg_temp_alloc(s);
1892     ts->base_type = type;
1893     ts->temp_allocated = 1;
1894     ts->kind = kind;
1895 
1896     if (n == 1) {
1897         ts->type = type;
1898     } else {
1899         ts->type = TCG_TYPE_REG;
1900 
1901         for (int i = 1; i < n; ++i) {
1902             TCGTemp *ts2 = tcg_temp_alloc(s);
1903 
1904             tcg_debug_assert(ts2 == ts + i);
1905             ts2->base_type = type;
1906             ts2->type = TCG_TYPE_REG;
1907             ts2->temp_allocated = 1;
1908             ts2->temp_subindex = i;
1909             ts2->kind = kind;
1910         }
1911     }
1912     return ts;
1913 }
1914 
1915 TCGv_i32 tcg_temp_new_i32(void)
1916 {
1917     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1918 }
1919 
1920 TCGv_i32 tcg_temp_ebb_new_i32(void)
1921 {
1922     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1923 }
1924 
1925 TCGv_i64 tcg_temp_new_i64(void)
1926 {
1927     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1928 }
1929 
1930 TCGv_i64 tcg_temp_ebb_new_i64(void)
1931 {
1932     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1933 }
1934 
1935 TCGv_ptr tcg_temp_new_ptr(void)
1936 {
1937     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1938 }
1939 
1940 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1941 {
1942     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1943 }
1944 
1945 TCGv_i128 tcg_temp_new_i128(void)
1946 {
1947     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1948 }
1949 
1950 TCGv_i128 tcg_temp_ebb_new_i128(void)
1951 {
1952     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1953 }
1954 
1955 TCGv_vec tcg_temp_new_vec(TCGType type)
1956 {
1957     TCGTemp *t;
1958 
1959 #ifdef CONFIG_DEBUG_TCG
1960     switch (type) {
1961     case TCG_TYPE_V64:
1962         assert(TCG_TARGET_HAS_v64);
1963         break;
1964     case TCG_TYPE_V128:
1965         assert(TCG_TARGET_HAS_v128);
1966         break;
1967     case TCG_TYPE_V256:
1968         assert(TCG_TARGET_HAS_v256);
1969         break;
1970     default:
1971         g_assert_not_reached();
1972     }
1973 #endif
1974 
1975     t = tcg_temp_new_internal(type, TEMP_EBB);
1976     return temp_tcgv_vec(t);
1977 }
1978 
1979 /* Create a new temp of the same type as an existing temp.  */
1980 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1981 {
1982     TCGTemp *t = tcgv_vec_temp(match);
1983 
1984     tcg_debug_assert(t->temp_allocated != 0);
1985 
1986     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1987     return temp_tcgv_vec(t);
1988 }
1989 
1990 void tcg_temp_free_internal(TCGTemp *ts)
1991 {
1992     TCGContext *s = tcg_ctx;
1993 
1994     switch (ts->kind) {
1995     case TEMP_CONST:
1996     case TEMP_TB:
1997         /* Silently ignore free. */
1998         break;
1999     case TEMP_EBB:
2000         tcg_debug_assert(ts->temp_allocated != 0);
2001         ts->temp_allocated = 0;
2002         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2003         break;
2004     default:
2005         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2006         g_assert_not_reached();
2007     }
2008 }
2009 
2010 void tcg_temp_free_i32(TCGv_i32 arg)
2011 {
2012     tcg_temp_free_internal(tcgv_i32_temp(arg));
2013 }
2014 
2015 void tcg_temp_free_i64(TCGv_i64 arg)
2016 {
2017     tcg_temp_free_internal(tcgv_i64_temp(arg));
2018 }
2019 
2020 void tcg_temp_free_i128(TCGv_i128 arg)
2021 {
2022     tcg_temp_free_internal(tcgv_i128_temp(arg));
2023 }
2024 
2025 void tcg_temp_free_ptr(TCGv_ptr arg)
2026 {
2027     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2028 }
2029 
2030 void tcg_temp_free_vec(TCGv_vec arg)
2031 {
2032     tcg_temp_free_internal(tcgv_vec_temp(arg));
2033 }
2034 
2035 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2036 {
2037     TCGContext *s = tcg_ctx;
2038     GHashTable *h = s->const_table[type];
2039     TCGTemp *ts;
2040 
2041     if (h == NULL) {
2042         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2043         s->const_table[type] = h;
2044     }
2045 
2046     ts = g_hash_table_lookup(h, &val);
2047     if (ts == NULL) {
2048         int64_t *val_ptr;
2049 
2050         ts = tcg_temp_alloc(s);
2051 
2052         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2053             TCGTemp *ts2 = tcg_temp_alloc(s);
2054 
2055             tcg_debug_assert(ts2 == ts + 1);
2056 
2057             ts->base_type = TCG_TYPE_I64;
2058             ts->type = TCG_TYPE_I32;
2059             ts->kind = TEMP_CONST;
2060             ts->temp_allocated = 1;
2061 
2062             ts2->base_type = TCG_TYPE_I64;
2063             ts2->type = TCG_TYPE_I32;
2064             ts2->kind = TEMP_CONST;
2065             ts2->temp_allocated = 1;
2066             ts2->temp_subindex = 1;
2067 
2068             /*
2069              * Retain the full value of the 64-bit constant in the low
2070              * part, so that the hash table works.  Actual uses will
2071              * truncate the value to the low part.
2072              */
2073             ts[HOST_BIG_ENDIAN].val = val;
2074             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2075             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2076         } else {
2077             ts->base_type = type;
2078             ts->type = type;
2079             ts->kind = TEMP_CONST;
2080             ts->temp_allocated = 1;
2081             ts->val = val;
2082             val_ptr = &ts->val;
2083         }
2084         g_hash_table_insert(h, val_ptr, ts);
2085     }
2086 
2087     return ts;
2088 }
2089 
2090 TCGv_i32 tcg_constant_i32(int32_t val)
2091 {
2092     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2093 }
2094 
2095 TCGv_i64 tcg_constant_i64(int64_t val)
2096 {
2097     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2098 }
2099 
2100 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2101 {
2102     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2103 }
2104 
2105 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2106 {
2107     val = dup_const(vece, val);
2108     return temp_tcgv_vec(tcg_constant_internal(type, val));
2109 }
2110 
2111 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2112 {
2113     TCGTemp *t = tcgv_vec_temp(match);
2114 
2115     tcg_debug_assert(t->temp_allocated != 0);
2116     return tcg_constant_vec(t->base_type, vece, val);
2117 }
2118 
2119 #ifdef CONFIG_DEBUG_TCG
2120 size_t temp_idx(TCGTemp *ts)
2121 {
2122     ptrdiff_t n = ts - tcg_ctx->temps;
2123     assert(n >= 0 && n < tcg_ctx->nb_temps);
2124     return n;
2125 }
2126 
2127 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2128 {
2129     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2130 
2131     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2132     assert(o % sizeof(TCGTemp) == 0);
2133 
2134     return (void *)tcg_ctx + (uintptr_t)v;
2135 }
2136 #endif /* CONFIG_DEBUG_TCG */
2137 
2138 /*
2139  * Return true if OP may appear in the opcode stream with TYPE.
2140  * Test the runtime variable that controls each opcode.
2141  */
2142 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2143 {
2144     bool has_type;
2145 
2146     switch (type) {
2147     case TCG_TYPE_I32:
2148         has_type = true;
2149         break;
2150     case TCG_TYPE_I64:
2151         has_type = TCG_TARGET_REG_BITS == 64;
2152         break;
2153     case TCG_TYPE_V64:
2154         has_type = TCG_TARGET_HAS_v64;
2155         break;
2156     case TCG_TYPE_V128:
2157         has_type = TCG_TARGET_HAS_v128;
2158         break;
2159     case TCG_TYPE_V256:
2160         has_type = TCG_TARGET_HAS_v256;
2161         break;
2162     default:
2163         has_type = false;
2164         break;
2165     }
2166 
2167     switch (op) {
2168     case INDEX_op_discard:
2169     case INDEX_op_set_label:
2170     case INDEX_op_call:
2171     case INDEX_op_br:
2172     case INDEX_op_mb:
2173     case INDEX_op_insn_start:
2174     case INDEX_op_exit_tb:
2175     case INDEX_op_goto_tb:
2176     case INDEX_op_goto_ptr:
2177     case INDEX_op_qemu_ld_i32:
2178     case INDEX_op_qemu_st_i32:
2179     case INDEX_op_qemu_ld_i64:
2180     case INDEX_op_qemu_st_i64:
2181         return true;
2182 
2183     case INDEX_op_qemu_st8_i32:
2184         return TCG_TARGET_HAS_qemu_st8_i32;
2185 
2186     case INDEX_op_qemu_ld_i128:
2187     case INDEX_op_qemu_st_i128:
2188         return TCG_TARGET_HAS_qemu_ldst_i128;
2189 
2190     case INDEX_op_mov:
2191         return has_type;
2192 
2193     case INDEX_op_setcond_i32:
2194     case INDEX_op_brcond_i32:
2195     case INDEX_op_movcond_i32:
2196     case INDEX_op_ld8u_i32:
2197     case INDEX_op_ld8s_i32:
2198     case INDEX_op_ld16u_i32:
2199     case INDEX_op_ld16s_i32:
2200     case INDEX_op_ld_i32:
2201     case INDEX_op_st8_i32:
2202     case INDEX_op_st16_i32:
2203     case INDEX_op_st_i32:
2204     case INDEX_op_add_i32:
2205     case INDEX_op_sub_i32:
2206     case INDEX_op_neg_i32:
2207     case INDEX_op_mul_i32:
2208     case INDEX_op_and_i32:
2209     case INDEX_op_or_i32:
2210     case INDEX_op_xor_i32:
2211     case INDEX_op_shl_i32:
2212     case INDEX_op_shr_i32:
2213     case INDEX_op_sar_i32:
2214     case INDEX_op_extract_i32:
2215     case INDEX_op_sextract_i32:
2216     case INDEX_op_deposit_i32:
2217         return true;
2218 
2219     case INDEX_op_negsetcond_i32:
2220         return TCG_TARGET_HAS_negsetcond_i32;
2221     case INDEX_op_div_i32:
2222     case INDEX_op_divu_i32:
2223         return TCG_TARGET_HAS_div_i32;
2224     case INDEX_op_rem_i32:
2225     case INDEX_op_remu_i32:
2226         return TCG_TARGET_HAS_rem_i32;
2227     case INDEX_op_div2_i32:
2228     case INDEX_op_divu2_i32:
2229         return TCG_TARGET_HAS_div2_i32;
2230     case INDEX_op_rotl_i32:
2231     case INDEX_op_rotr_i32:
2232         return TCG_TARGET_HAS_rot_i32;
2233     case INDEX_op_extract2_i32:
2234         return TCG_TARGET_HAS_extract2_i32;
2235     case INDEX_op_add2_i32:
2236         return TCG_TARGET_HAS_add2_i32;
2237     case INDEX_op_sub2_i32:
2238         return TCG_TARGET_HAS_sub2_i32;
2239     case INDEX_op_mulu2_i32:
2240         return TCG_TARGET_HAS_mulu2_i32;
2241     case INDEX_op_muls2_i32:
2242         return TCG_TARGET_HAS_muls2_i32;
2243     case INDEX_op_muluh_i32:
2244         return TCG_TARGET_HAS_muluh_i32;
2245     case INDEX_op_mulsh_i32:
2246         return TCG_TARGET_HAS_mulsh_i32;
2247     case INDEX_op_bswap16_i32:
2248         return TCG_TARGET_HAS_bswap16_i32;
2249     case INDEX_op_bswap32_i32:
2250         return TCG_TARGET_HAS_bswap32_i32;
2251     case INDEX_op_not_i32:
2252         return TCG_TARGET_HAS_not_i32;
2253     case INDEX_op_andc_i32:
2254         return TCG_TARGET_HAS_andc_i32;
2255     case INDEX_op_orc_i32:
2256         return TCG_TARGET_HAS_orc_i32;
2257     case INDEX_op_eqv_i32:
2258         return TCG_TARGET_HAS_eqv_i32;
2259     case INDEX_op_nand_i32:
2260         return TCG_TARGET_HAS_nand_i32;
2261     case INDEX_op_nor_i32:
2262         return TCG_TARGET_HAS_nor_i32;
2263     case INDEX_op_clz_i32:
2264         return TCG_TARGET_HAS_clz_i32;
2265     case INDEX_op_ctz_i32:
2266         return TCG_TARGET_HAS_ctz_i32;
2267     case INDEX_op_ctpop_i32:
2268         return TCG_TARGET_HAS_ctpop_i32;
2269 
2270     case INDEX_op_brcond2_i32:
2271     case INDEX_op_setcond2_i32:
2272         return TCG_TARGET_REG_BITS == 32;
2273 
2274     case INDEX_op_setcond_i64:
2275     case INDEX_op_brcond_i64:
2276     case INDEX_op_movcond_i64:
2277     case INDEX_op_ld8u_i64:
2278     case INDEX_op_ld8s_i64:
2279     case INDEX_op_ld16u_i64:
2280     case INDEX_op_ld16s_i64:
2281     case INDEX_op_ld32u_i64:
2282     case INDEX_op_ld32s_i64:
2283     case INDEX_op_ld_i64:
2284     case INDEX_op_st8_i64:
2285     case INDEX_op_st16_i64:
2286     case INDEX_op_st32_i64:
2287     case INDEX_op_st_i64:
2288     case INDEX_op_add_i64:
2289     case INDEX_op_sub_i64:
2290     case INDEX_op_neg_i64:
2291     case INDEX_op_mul_i64:
2292     case INDEX_op_and_i64:
2293     case INDEX_op_or_i64:
2294     case INDEX_op_xor_i64:
2295     case INDEX_op_shl_i64:
2296     case INDEX_op_shr_i64:
2297     case INDEX_op_sar_i64:
2298     case INDEX_op_ext_i32_i64:
2299     case INDEX_op_extu_i32_i64:
2300     case INDEX_op_extract_i64:
2301     case INDEX_op_sextract_i64:
2302     case INDEX_op_deposit_i64:
2303         return TCG_TARGET_REG_BITS == 64;
2304 
2305     case INDEX_op_negsetcond_i64:
2306         return TCG_TARGET_HAS_negsetcond_i64;
2307     case INDEX_op_div_i64:
2308     case INDEX_op_divu_i64:
2309         return TCG_TARGET_HAS_div_i64;
2310     case INDEX_op_rem_i64:
2311     case INDEX_op_remu_i64:
2312         return TCG_TARGET_HAS_rem_i64;
2313     case INDEX_op_div2_i64:
2314     case INDEX_op_divu2_i64:
2315         return TCG_TARGET_HAS_div2_i64;
2316     case INDEX_op_rotl_i64:
2317     case INDEX_op_rotr_i64:
2318         return TCG_TARGET_HAS_rot_i64;
2319     case INDEX_op_extract2_i64:
2320         return TCG_TARGET_HAS_extract2_i64;
2321     case INDEX_op_extrl_i64_i32:
2322     case INDEX_op_extrh_i64_i32:
2323         return TCG_TARGET_HAS_extr_i64_i32;
2324     case INDEX_op_bswap16_i64:
2325         return TCG_TARGET_HAS_bswap16_i64;
2326     case INDEX_op_bswap32_i64:
2327         return TCG_TARGET_HAS_bswap32_i64;
2328     case INDEX_op_bswap64_i64:
2329         return TCG_TARGET_HAS_bswap64_i64;
2330     case INDEX_op_not_i64:
2331         return TCG_TARGET_HAS_not_i64;
2332     case INDEX_op_andc_i64:
2333         return TCG_TARGET_HAS_andc_i64;
2334     case INDEX_op_orc_i64:
2335         return TCG_TARGET_HAS_orc_i64;
2336     case INDEX_op_eqv_i64:
2337         return TCG_TARGET_HAS_eqv_i64;
2338     case INDEX_op_nand_i64:
2339         return TCG_TARGET_HAS_nand_i64;
2340     case INDEX_op_nor_i64:
2341         return TCG_TARGET_HAS_nor_i64;
2342     case INDEX_op_clz_i64:
2343         return TCG_TARGET_HAS_clz_i64;
2344     case INDEX_op_ctz_i64:
2345         return TCG_TARGET_HAS_ctz_i64;
2346     case INDEX_op_ctpop_i64:
2347         return TCG_TARGET_HAS_ctpop_i64;
2348     case INDEX_op_add2_i64:
2349         return TCG_TARGET_HAS_add2_i64;
2350     case INDEX_op_sub2_i64:
2351         return TCG_TARGET_HAS_sub2_i64;
2352     case INDEX_op_mulu2_i64:
2353         return TCG_TARGET_HAS_mulu2_i64;
2354     case INDEX_op_muls2_i64:
2355         return TCG_TARGET_HAS_muls2_i64;
2356     case INDEX_op_muluh_i64:
2357         return TCG_TARGET_HAS_muluh_i64;
2358     case INDEX_op_mulsh_i64:
2359         return TCG_TARGET_HAS_mulsh_i64;
2360 
2361     case INDEX_op_mov_vec:
2362     case INDEX_op_dup_vec:
2363     case INDEX_op_dupm_vec:
2364     case INDEX_op_ld_vec:
2365     case INDEX_op_st_vec:
2366     case INDEX_op_add_vec:
2367     case INDEX_op_sub_vec:
2368     case INDEX_op_and_vec:
2369     case INDEX_op_or_vec:
2370     case INDEX_op_xor_vec:
2371     case INDEX_op_cmp_vec:
2372         return has_type;
2373     case INDEX_op_dup2_vec:
2374         return has_type && TCG_TARGET_REG_BITS == 32;
2375     case INDEX_op_not_vec:
2376         return has_type && TCG_TARGET_HAS_not_vec;
2377     case INDEX_op_neg_vec:
2378         return has_type && TCG_TARGET_HAS_neg_vec;
2379     case INDEX_op_abs_vec:
2380         return has_type && TCG_TARGET_HAS_abs_vec;
2381     case INDEX_op_andc_vec:
2382         return has_type && TCG_TARGET_HAS_andc_vec;
2383     case INDEX_op_orc_vec:
2384         return has_type && TCG_TARGET_HAS_orc_vec;
2385     case INDEX_op_nand_vec:
2386         return has_type && TCG_TARGET_HAS_nand_vec;
2387     case INDEX_op_nor_vec:
2388         return has_type && TCG_TARGET_HAS_nor_vec;
2389     case INDEX_op_eqv_vec:
2390         return has_type && TCG_TARGET_HAS_eqv_vec;
2391     case INDEX_op_mul_vec:
2392         return has_type && TCG_TARGET_HAS_mul_vec;
2393     case INDEX_op_shli_vec:
2394     case INDEX_op_shri_vec:
2395     case INDEX_op_sari_vec:
2396         return has_type && TCG_TARGET_HAS_shi_vec;
2397     case INDEX_op_shls_vec:
2398     case INDEX_op_shrs_vec:
2399     case INDEX_op_sars_vec:
2400         return has_type && TCG_TARGET_HAS_shs_vec;
2401     case INDEX_op_shlv_vec:
2402     case INDEX_op_shrv_vec:
2403     case INDEX_op_sarv_vec:
2404         return has_type && TCG_TARGET_HAS_shv_vec;
2405     case INDEX_op_rotli_vec:
2406         return has_type && TCG_TARGET_HAS_roti_vec;
2407     case INDEX_op_rotls_vec:
2408         return has_type && TCG_TARGET_HAS_rots_vec;
2409     case INDEX_op_rotlv_vec:
2410     case INDEX_op_rotrv_vec:
2411         return has_type && TCG_TARGET_HAS_rotv_vec;
2412     case INDEX_op_ssadd_vec:
2413     case INDEX_op_usadd_vec:
2414     case INDEX_op_sssub_vec:
2415     case INDEX_op_ussub_vec:
2416         return has_type && TCG_TARGET_HAS_sat_vec;
2417     case INDEX_op_smin_vec:
2418     case INDEX_op_umin_vec:
2419     case INDEX_op_smax_vec:
2420     case INDEX_op_umax_vec:
2421         return has_type && TCG_TARGET_HAS_minmax_vec;
2422     case INDEX_op_bitsel_vec:
2423         return has_type && TCG_TARGET_HAS_bitsel_vec;
2424     case INDEX_op_cmpsel_vec:
2425         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2426 
2427     default:
2428         if (op < INDEX_op_last_generic) {
2429             const TCGOutOp *outop;
2430             TCGConstraintSetIndex con_set;
2431 
2432             if (!has_type) {
2433                 return false;
2434             }
2435 
2436             outop = all_outop[op];
2437             tcg_debug_assert(outop != NULL);
2438 
2439             con_set = outop->static_constraint;
2440             if (con_set == C_Dynamic) {
2441                 con_set = outop->dynamic_constraint(type, flags);
2442             }
2443             if (con_set >= 0) {
2444                 return true;
2445             }
2446             tcg_debug_assert(con_set == C_NotImplemented);
2447             return false;
2448         }
2449         tcg_debug_assert(op < NB_OPS);
2450         return true;
2451 
2452     case INDEX_op_last_generic:
2453         g_assert_not_reached();
2454     }
2455 }
2456 
2457 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2458 {
2459     unsigned width;
2460 
2461     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2462     width = (type == TCG_TYPE_I32 ? 32 : 64);
2463 
2464     tcg_debug_assert(ofs < width);
2465     tcg_debug_assert(len > 0);
2466     tcg_debug_assert(len <= width - ofs);
2467 
2468     return TCG_TARGET_deposit_valid(type, ofs, len);
2469 }
2470 
2471 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2472 
2473 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2474                           TCGTemp *ret, TCGTemp **args)
2475 {
2476     TCGv_i64 extend_free[MAX_CALL_IARGS];
2477     int n_extend = 0;
2478     TCGOp *op;
2479     int i, n, pi = 0, total_args;
2480 
2481     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2482         init_call_layout(info);
2483         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2484     }
2485 
2486     total_args = info->nr_out + info->nr_in + 2;
2487     op = tcg_op_alloc(INDEX_op_call, total_args);
2488 
2489 #ifdef CONFIG_PLUGIN
2490     /* Flag helpers that may affect guest state */
2491     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2492         tcg_ctx->plugin_insn->calls_helpers = true;
2493     }
2494 #endif
2495 
2496     TCGOP_CALLO(op) = n = info->nr_out;
2497     switch (n) {
2498     case 0:
2499         tcg_debug_assert(ret == NULL);
2500         break;
2501     case 1:
2502         tcg_debug_assert(ret != NULL);
2503         op->args[pi++] = temp_arg(ret);
2504         break;
2505     case 2:
2506     case 4:
2507         tcg_debug_assert(ret != NULL);
2508         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2509         tcg_debug_assert(ret->temp_subindex == 0);
2510         for (i = 0; i < n; ++i) {
2511             op->args[pi++] = temp_arg(ret + i);
2512         }
2513         break;
2514     default:
2515         g_assert_not_reached();
2516     }
2517 
2518     TCGOP_CALLI(op) = n = info->nr_in;
2519     for (i = 0; i < n; i++) {
2520         const TCGCallArgumentLoc *loc = &info->in[i];
2521         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2522 
2523         switch (loc->kind) {
2524         case TCG_CALL_ARG_NORMAL:
2525         case TCG_CALL_ARG_BY_REF:
2526         case TCG_CALL_ARG_BY_REF_N:
2527             op->args[pi++] = temp_arg(ts);
2528             break;
2529 
2530         case TCG_CALL_ARG_EXTEND_U:
2531         case TCG_CALL_ARG_EXTEND_S:
2532             {
2533                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2534                 TCGv_i32 orig = temp_tcgv_i32(ts);
2535 
2536                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2537                     tcg_gen_ext_i32_i64(temp, orig);
2538                 } else {
2539                     tcg_gen_extu_i32_i64(temp, orig);
2540                 }
2541                 op->args[pi++] = tcgv_i64_arg(temp);
2542                 extend_free[n_extend++] = temp;
2543             }
2544             break;
2545 
2546         default:
2547             g_assert_not_reached();
2548         }
2549     }
2550     op->args[pi++] = (uintptr_t)func;
2551     op->args[pi++] = (uintptr_t)info;
2552     tcg_debug_assert(pi == total_args);
2553 
2554     if (tcg_ctx->emit_before_op) {
2555         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2556     } else {
2557         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2558     }
2559 
2560     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2561     for (i = 0; i < n_extend; ++i) {
2562         tcg_temp_free_i64(extend_free[i]);
2563     }
2564 }
2565 
2566 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2567 {
2568     tcg_gen_callN(func, info, ret, NULL);
2569 }
2570 
2571 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2572 {
2573     tcg_gen_callN(func, info, ret, &t1);
2574 }
2575 
2576 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2577                    TCGTemp *t1, TCGTemp *t2)
2578 {
2579     TCGTemp *args[2] = { t1, t2 };
2580     tcg_gen_callN(func, info, ret, args);
2581 }
2582 
2583 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2584                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2585 {
2586     TCGTemp *args[3] = { t1, t2, t3 };
2587     tcg_gen_callN(func, info, ret, args);
2588 }
2589 
2590 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2591                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2592 {
2593     TCGTemp *args[4] = { t1, t2, t3, t4 };
2594     tcg_gen_callN(func, info, ret, args);
2595 }
2596 
2597 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2598                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2599 {
2600     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2601     tcg_gen_callN(func, info, ret, args);
2602 }
2603 
2604 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2605                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2606                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2607 {
2608     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2609     tcg_gen_callN(func, info, ret, args);
2610 }
2611 
2612 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2613                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2614                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2615 {
2616     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2617     tcg_gen_callN(func, info, ret, args);
2618 }
2619 
2620 static void tcg_reg_alloc_start(TCGContext *s)
2621 {
2622     int i, n;
2623 
2624     for (i = 0, n = s->nb_temps; i < n; i++) {
2625         TCGTemp *ts = &s->temps[i];
2626         TCGTempVal val = TEMP_VAL_MEM;
2627 
2628         switch (ts->kind) {
2629         case TEMP_CONST:
2630             val = TEMP_VAL_CONST;
2631             break;
2632         case TEMP_FIXED:
2633             val = TEMP_VAL_REG;
2634             break;
2635         case TEMP_GLOBAL:
2636             break;
2637         case TEMP_EBB:
2638             val = TEMP_VAL_DEAD;
2639             /* fall through */
2640         case TEMP_TB:
2641             ts->mem_allocated = 0;
2642             break;
2643         default:
2644             g_assert_not_reached();
2645         }
2646         ts->val_type = val;
2647     }
2648 
2649     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2650 }
2651 
2652 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2653                                  TCGTemp *ts)
2654 {
2655     int idx = temp_idx(ts);
2656 
2657     switch (ts->kind) {
2658     case TEMP_FIXED:
2659     case TEMP_GLOBAL:
2660         pstrcpy(buf, buf_size, ts->name);
2661         break;
2662     case TEMP_TB:
2663         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2664         break;
2665     case TEMP_EBB:
2666         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2667         break;
2668     case TEMP_CONST:
2669         switch (ts->type) {
2670         case TCG_TYPE_I32:
2671             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2672             break;
2673 #if TCG_TARGET_REG_BITS > 32
2674         case TCG_TYPE_I64:
2675             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2676             break;
2677 #endif
2678         case TCG_TYPE_V64:
2679         case TCG_TYPE_V128:
2680         case TCG_TYPE_V256:
2681             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2682                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2683             break;
2684         default:
2685             g_assert_not_reached();
2686         }
2687         break;
2688     }
2689     return buf;
2690 }
2691 
2692 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2693                              int buf_size, TCGArg arg)
2694 {
2695     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2696 }
2697 
2698 static const char * const cond_name[] =
2699 {
2700     [TCG_COND_NEVER] = "never",
2701     [TCG_COND_ALWAYS] = "always",
2702     [TCG_COND_EQ] = "eq",
2703     [TCG_COND_NE] = "ne",
2704     [TCG_COND_LT] = "lt",
2705     [TCG_COND_GE] = "ge",
2706     [TCG_COND_LE] = "le",
2707     [TCG_COND_GT] = "gt",
2708     [TCG_COND_LTU] = "ltu",
2709     [TCG_COND_GEU] = "geu",
2710     [TCG_COND_LEU] = "leu",
2711     [TCG_COND_GTU] = "gtu",
2712     [TCG_COND_TSTEQ] = "tsteq",
2713     [TCG_COND_TSTNE] = "tstne",
2714 };
2715 
2716 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2717 {
2718     [MO_UB]   = "ub",
2719     [MO_SB]   = "sb",
2720     [MO_LEUW] = "leuw",
2721     [MO_LESW] = "lesw",
2722     [MO_LEUL] = "leul",
2723     [MO_LESL] = "lesl",
2724     [MO_LEUQ] = "leq",
2725     [MO_BEUW] = "beuw",
2726     [MO_BESW] = "besw",
2727     [MO_BEUL] = "beul",
2728     [MO_BESL] = "besl",
2729     [MO_BEUQ] = "beq",
2730     [MO_128 + MO_BE] = "beo",
2731     [MO_128 + MO_LE] = "leo",
2732 };
2733 
2734 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2735     [MO_UNALN >> MO_ASHIFT]    = "un+",
2736     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2737     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2738     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2739     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2740     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2741     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2742     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2743 };
2744 
2745 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2746     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2747     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2748     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2749     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2750     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2751     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2752 };
2753 
2754 static const char bswap_flag_name[][6] = {
2755     [TCG_BSWAP_IZ] = "iz",
2756     [TCG_BSWAP_OZ] = "oz",
2757     [TCG_BSWAP_OS] = "os",
2758     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2759     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2760 };
2761 
2762 #ifdef CONFIG_PLUGIN
2763 static const char * const plugin_from_name[] = {
2764     "from-tb",
2765     "from-insn",
2766     "after-insn",
2767     "after-tb",
2768 };
2769 #endif
2770 
2771 static inline bool tcg_regset_single(TCGRegSet d)
2772 {
2773     return (d & (d - 1)) == 0;
2774 }
2775 
2776 static inline TCGReg tcg_regset_first(TCGRegSet d)
2777 {
2778     if (TCG_TARGET_NB_REGS <= 32) {
2779         return ctz32(d);
2780     } else {
2781         return ctz64(d);
2782     }
2783 }
2784 
2785 /* Return only the number of characters output -- no error return. */
2786 #define ne_fprintf(...) \
2787     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2788 
2789 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2790 {
2791     char buf[128];
2792     TCGOp *op;
2793 
2794     QTAILQ_FOREACH(op, &s->ops, link) {
2795         int i, k, nb_oargs, nb_iargs, nb_cargs;
2796         const TCGOpDef *def;
2797         TCGOpcode c;
2798         int col = 0;
2799 
2800         c = op->opc;
2801         def = &tcg_op_defs[c];
2802 
2803         if (c == INDEX_op_insn_start) {
2804             nb_oargs = 0;
2805             col += ne_fprintf(f, "\n ----");
2806 
2807             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2808                 col += ne_fprintf(f, " %016" PRIx64,
2809                                   tcg_get_insn_start_param(op, i));
2810             }
2811         } else if (c == INDEX_op_call) {
2812             const TCGHelperInfo *info = tcg_call_info(op);
2813             void *func = tcg_call_func(op);
2814 
2815             /* variable number of arguments */
2816             nb_oargs = TCGOP_CALLO(op);
2817             nb_iargs = TCGOP_CALLI(op);
2818             nb_cargs = def->nb_cargs;
2819 
2820             col += ne_fprintf(f, " %s ", def->name);
2821 
2822             /*
2823              * Print the function name from TCGHelperInfo, if available.
2824              * Note that plugins have a template function for the info,
2825              * but the actual function pointer comes from the plugin.
2826              */
2827             if (func == info->func) {
2828                 col += ne_fprintf(f, "%s", info->name);
2829             } else {
2830                 col += ne_fprintf(f, "plugin(%p)", func);
2831             }
2832 
2833             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2834             for (i = 0; i < nb_oargs; i++) {
2835                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2836                                                             op->args[i]));
2837             }
2838             for (i = 0; i < nb_iargs; i++) {
2839                 TCGArg arg = op->args[nb_oargs + i];
2840                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2841                 col += ne_fprintf(f, ",%s", t);
2842             }
2843         } else {
2844             if (def->flags & TCG_OPF_INT) {
2845                 col += ne_fprintf(f, " %s_i%d ",
2846                                   def->name,
2847                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2848             } else if (def->flags & TCG_OPF_VECTOR) {
2849                 col += ne_fprintf(f, "%s v%d,e%d,",
2850                                   def->name,
2851                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2852                                   8 << TCGOP_VECE(op));
2853             } else {
2854                 col += ne_fprintf(f, " %s ", def->name);
2855             }
2856 
2857             nb_oargs = def->nb_oargs;
2858             nb_iargs = def->nb_iargs;
2859             nb_cargs = def->nb_cargs;
2860 
2861             k = 0;
2862             for (i = 0; i < nb_oargs; i++) {
2863                 const char *sep =  k ? "," : "";
2864                 col += ne_fprintf(f, "%s%s", sep,
2865                                   tcg_get_arg_str(s, buf, sizeof(buf),
2866                                                   op->args[k++]));
2867             }
2868             for (i = 0; i < nb_iargs; i++) {
2869                 const char *sep =  k ? "," : "";
2870                 col += ne_fprintf(f, "%s%s", sep,
2871                                   tcg_get_arg_str(s, buf, sizeof(buf),
2872                                                   op->args[k++]));
2873             }
2874             switch (c) {
2875             case INDEX_op_brcond_i32:
2876             case INDEX_op_setcond_i32:
2877             case INDEX_op_negsetcond_i32:
2878             case INDEX_op_movcond_i32:
2879             case INDEX_op_brcond2_i32:
2880             case INDEX_op_setcond2_i32:
2881             case INDEX_op_brcond_i64:
2882             case INDEX_op_setcond_i64:
2883             case INDEX_op_negsetcond_i64:
2884             case INDEX_op_movcond_i64:
2885             case INDEX_op_cmp_vec:
2886             case INDEX_op_cmpsel_vec:
2887                 if (op->args[k] < ARRAY_SIZE(cond_name)
2888                     && cond_name[op->args[k]]) {
2889                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2890                 } else {
2891                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2892                 }
2893                 i = 1;
2894                 break;
2895             case INDEX_op_qemu_ld_i32:
2896             case INDEX_op_qemu_st_i32:
2897             case INDEX_op_qemu_st8_i32:
2898             case INDEX_op_qemu_ld_i64:
2899             case INDEX_op_qemu_st_i64:
2900             case INDEX_op_qemu_ld_i128:
2901             case INDEX_op_qemu_st_i128:
2902                 {
2903                     const char *s_al, *s_op, *s_at;
2904                     MemOpIdx oi = op->args[k++];
2905                     MemOp mop = get_memop(oi);
2906                     unsigned ix = get_mmuidx(oi);
2907 
2908                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2909                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2910                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2911                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2912 
2913                     /* If all fields are accounted for, print symbolically. */
2914                     if (!mop && s_al && s_op && s_at) {
2915                         col += ne_fprintf(f, ",%s%s%s,%u",
2916                                           s_at, s_al, s_op, ix);
2917                     } else {
2918                         mop = get_memop(oi);
2919                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2920                     }
2921                     i = 1;
2922                 }
2923                 break;
2924             case INDEX_op_bswap16_i32:
2925             case INDEX_op_bswap16_i64:
2926             case INDEX_op_bswap32_i32:
2927             case INDEX_op_bswap32_i64:
2928             case INDEX_op_bswap64_i64:
2929                 {
2930                     TCGArg flags = op->args[k];
2931                     const char *name = NULL;
2932 
2933                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2934                         name = bswap_flag_name[flags];
2935                     }
2936                     if (name) {
2937                         col += ne_fprintf(f, ",%s", name);
2938                     } else {
2939                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2940                     }
2941                     i = k = 1;
2942                 }
2943                 break;
2944 #ifdef CONFIG_PLUGIN
2945             case INDEX_op_plugin_cb:
2946                 {
2947                     TCGArg from = op->args[k++];
2948                     const char *name = NULL;
2949 
2950                     if (from < ARRAY_SIZE(plugin_from_name)) {
2951                         name = plugin_from_name[from];
2952                     }
2953                     if (name) {
2954                         col += ne_fprintf(f, "%s", name);
2955                     } else {
2956                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2957                     }
2958                     i = 1;
2959                 }
2960                 break;
2961 #endif
2962             default:
2963                 i = 0;
2964                 break;
2965             }
2966             switch (c) {
2967             case INDEX_op_set_label:
2968             case INDEX_op_br:
2969             case INDEX_op_brcond_i32:
2970             case INDEX_op_brcond_i64:
2971             case INDEX_op_brcond2_i32:
2972                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2973                                   arg_label(op->args[k])->id);
2974                 i++, k++;
2975                 break;
2976             case INDEX_op_mb:
2977                 {
2978                     TCGBar membar = op->args[k];
2979                     const char *b_op, *m_op;
2980 
2981                     switch (membar & TCG_BAR_SC) {
2982                     case 0:
2983                         b_op = "none";
2984                         break;
2985                     case TCG_BAR_LDAQ:
2986                         b_op = "acq";
2987                         break;
2988                     case TCG_BAR_STRL:
2989                         b_op = "rel";
2990                         break;
2991                     case TCG_BAR_SC:
2992                         b_op = "seq";
2993                         break;
2994                     default:
2995                         g_assert_not_reached();
2996                     }
2997 
2998                     switch (membar & TCG_MO_ALL) {
2999                     case 0:
3000                         m_op = "none";
3001                         break;
3002                     case TCG_MO_LD_LD:
3003                         m_op = "rr";
3004                         break;
3005                     case TCG_MO_LD_ST:
3006                         m_op = "rw";
3007                         break;
3008                     case TCG_MO_ST_LD:
3009                         m_op = "wr";
3010                         break;
3011                     case TCG_MO_ST_ST:
3012                         m_op = "ww";
3013                         break;
3014                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3015                         m_op = "rr+rw";
3016                         break;
3017                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3018                         m_op = "rr+wr";
3019                         break;
3020                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3021                         m_op = "rr+ww";
3022                         break;
3023                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3024                         m_op = "rw+wr";
3025                         break;
3026                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3027                         m_op = "rw+ww";
3028                         break;
3029                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3030                         m_op = "wr+ww";
3031                         break;
3032                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3033                         m_op = "rr+rw+wr";
3034                         break;
3035                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3036                         m_op = "rr+rw+ww";
3037                         break;
3038                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3039                         m_op = "rr+wr+ww";
3040                         break;
3041                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3042                         m_op = "rw+wr+ww";
3043                         break;
3044                     case TCG_MO_ALL:
3045                         m_op = "all";
3046                         break;
3047                     default:
3048                         g_assert_not_reached();
3049                     }
3050 
3051                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3052                     i++, k++;
3053                 }
3054                 break;
3055             default:
3056                 break;
3057             }
3058             for (; i < nb_cargs; i++, k++) {
3059                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3060                                   op->args[k]);
3061             }
3062         }
3063 
3064         if (have_prefs || op->life) {
3065             for (; col < 40; ++col) {
3066                 putc(' ', f);
3067             }
3068         }
3069 
3070         if (op->life) {
3071             unsigned life = op->life;
3072 
3073             if (life & (SYNC_ARG * 3)) {
3074                 ne_fprintf(f, "  sync:");
3075                 for (i = 0; i < 2; ++i) {
3076                     if (life & (SYNC_ARG << i)) {
3077                         ne_fprintf(f, " %d", i);
3078                     }
3079                 }
3080             }
3081             life /= DEAD_ARG;
3082             if (life) {
3083                 ne_fprintf(f, "  dead:");
3084                 for (i = 0; life; ++i, life >>= 1) {
3085                     if (life & 1) {
3086                         ne_fprintf(f, " %d", i);
3087                     }
3088                 }
3089             }
3090         }
3091 
3092         if (have_prefs) {
3093             for (i = 0; i < nb_oargs; ++i) {
3094                 TCGRegSet set = output_pref(op, i);
3095 
3096                 if (i == 0) {
3097                     ne_fprintf(f, "  pref=");
3098                 } else {
3099                     ne_fprintf(f, ",");
3100                 }
3101                 if (set == 0) {
3102                     ne_fprintf(f, "none");
3103                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3104                     ne_fprintf(f, "all");
3105 #ifdef CONFIG_DEBUG_TCG
3106                 } else if (tcg_regset_single(set)) {
3107                     TCGReg reg = tcg_regset_first(set);
3108                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3109 #endif
3110                 } else if (TCG_TARGET_NB_REGS <= 32) {
3111                     ne_fprintf(f, "0x%x", (uint32_t)set);
3112                 } else {
3113                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3114                 }
3115             }
3116         }
3117 
3118         putc('\n', f);
3119     }
3120 }
3121 
3122 /* we give more priority to constraints with less registers */
3123 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3124 {
3125     int n;
3126 
3127     arg_ct += k;
3128     n = ctpop64(arg_ct->regs);
3129 
3130     /*
3131      * Sort constraints of a single register first, which includes output
3132      * aliases (which must exactly match the input already allocated).
3133      */
3134     if (n == 1 || arg_ct->oalias) {
3135         return INT_MAX;
3136     }
3137 
3138     /*
3139      * Sort register pairs next, first then second immediately after.
3140      * Arbitrarily sort multiple pairs by the index of the first reg;
3141      * there shouldn't be many pairs.
3142      */
3143     switch (arg_ct->pair) {
3144     case 1:
3145     case 3:
3146         return (k + 1) * 2;
3147     case 2:
3148         return (arg_ct->pair_index + 1) * 2 - 1;
3149     }
3150 
3151     /* Finally, sort by decreasing register count. */
3152     assert(n > 1);
3153     return -n;
3154 }
3155 
3156 /* sort from highest priority to lowest */
3157 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3158 {
3159     int i, j;
3160 
3161     for (i = 0; i < n; i++) {
3162         a[start + i].sort_index = start + i;
3163     }
3164     if (n <= 1) {
3165         return;
3166     }
3167     for (i = 0; i < n - 1; i++) {
3168         for (j = i + 1; j < n; j++) {
3169             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3170             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3171             if (p1 < p2) {
3172                 int tmp = a[start + i].sort_index;
3173                 a[start + i].sort_index = a[start + j].sort_index;
3174                 a[start + j].sort_index = tmp;
3175             }
3176         }
3177     }
3178 }
3179 
3180 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3181 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3182 
3183 static void process_constraint_sets(void)
3184 {
3185     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3186         const TCGConstraintSet *tdefs = &constraint_sets[c];
3187         TCGArgConstraint *args_ct = all_cts[c];
3188         int nb_oargs = tdefs->nb_oargs;
3189         int nb_iargs = tdefs->nb_iargs;
3190         int nb_args = nb_oargs + nb_iargs;
3191         bool saw_alias_pair = false;
3192 
3193         for (int i = 0; i < nb_args; i++) {
3194             const char *ct_str = tdefs->args_ct_str[i];
3195             bool input_p = i >= nb_oargs;
3196             int o;
3197 
3198             switch (*ct_str) {
3199             case '0' ... '9':
3200                 o = *ct_str - '0';
3201                 tcg_debug_assert(input_p);
3202                 tcg_debug_assert(o < nb_oargs);
3203                 tcg_debug_assert(args_ct[o].regs != 0);
3204                 tcg_debug_assert(!args_ct[o].oalias);
3205                 args_ct[i] = args_ct[o];
3206                 /* The output sets oalias.  */
3207                 args_ct[o].oalias = 1;
3208                 args_ct[o].alias_index = i;
3209                 /* The input sets ialias. */
3210                 args_ct[i].ialias = 1;
3211                 args_ct[i].alias_index = o;
3212                 if (args_ct[i].pair) {
3213                     saw_alias_pair = true;
3214                 }
3215                 tcg_debug_assert(ct_str[1] == '\0');
3216                 continue;
3217 
3218             case '&':
3219                 tcg_debug_assert(!input_p);
3220                 args_ct[i].newreg = true;
3221                 ct_str++;
3222                 break;
3223 
3224             case 'p': /* plus */
3225                 /* Allocate to the register after the previous. */
3226                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3227                 o = i - 1;
3228                 tcg_debug_assert(!args_ct[o].pair);
3229                 tcg_debug_assert(!args_ct[o].ct);
3230                 args_ct[i] = (TCGArgConstraint){
3231                     .pair = 2,
3232                     .pair_index = o,
3233                     .regs = args_ct[o].regs << 1,
3234                     .newreg = args_ct[o].newreg,
3235                 };
3236                 args_ct[o].pair = 1;
3237                 args_ct[o].pair_index = i;
3238                 tcg_debug_assert(ct_str[1] == '\0');
3239                 continue;
3240 
3241             case 'm': /* minus */
3242                 /* Allocate to the register before the previous. */
3243                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3244                 o = i - 1;
3245                 tcg_debug_assert(!args_ct[o].pair);
3246                 tcg_debug_assert(!args_ct[o].ct);
3247                 args_ct[i] = (TCGArgConstraint){
3248                     .pair = 1,
3249                     .pair_index = o,
3250                     .regs = args_ct[o].regs >> 1,
3251                     .newreg = args_ct[o].newreg,
3252                 };
3253                 args_ct[o].pair = 2;
3254                 args_ct[o].pair_index = i;
3255                 tcg_debug_assert(ct_str[1] == '\0');
3256                 continue;
3257             }
3258 
3259             do {
3260                 switch (*ct_str) {
3261                 case 'i':
3262                     args_ct[i].ct |= TCG_CT_CONST;
3263                     break;
3264 #ifdef TCG_REG_ZERO
3265                 case 'z':
3266                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3267                     break;
3268 #endif
3269 
3270                 /* Include all of the target-specific constraints. */
3271 
3272 #undef CONST
3273 #define CONST(CASE, MASK) \
3274     case CASE: args_ct[i].ct |= MASK; break;
3275 #define REGS(CASE, MASK) \
3276     case CASE: args_ct[i].regs |= MASK; break;
3277 
3278 #include "tcg-target-con-str.h"
3279 
3280 #undef REGS
3281 #undef CONST
3282                 default:
3283                 case '0' ... '9':
3284                 case '&':
3285                 case 'p':
3286                 case 'm':
3287                     /* Typo in TCGConstraintSet constraint. */
3288                     g_assert_not_reached();
3289                 }
3290             } while (*++ct_str != '\0');
3291         }
3292 
3293         /*
3294          * Fix up output pairs that are aliased with inputs.
3295          * When we created the alias, we copied pair from the output.
3296          * There are three cases:
3297          *    (1a) Pairs of inputs alias pairs of outputs.
3298          *    (1b) One input aliases the first of a pair of outputs.
3299          *    (2)  One input aliases the second of a pair of outputs.
3300          *
3301          * Case 1a is handled by making sure that the pair_index'es are
3302          * properly updated so that they appear the same as a pair of inputs.
3303          *
3304          * Case 1b is handled by setting the pair_index of the input to
3305          * itself, simply so it doesn't point to an unrelated argument.
3306          * Since we don't encounter the "second" during the input allocation
3307          * phase, nothing happens with the second half of the input pair.
3308          *
3309          * Case 2 is handled by setting the second input to pair=3, the
3310          * first output to pair=3, and the pair_index'es to match.
3311          */
3312         if (saw_alias_pair) {
3313             for (int i = nb_oargs; i < nb_args; i++) {
3314                 int o, o2, i2;
3315 
3316                 /*
3317                  * Since [0-9pm] must be alone in the constraint string,
3318                  * the only way they can both be set is if the pair comes
3319                  * from the output alias.
3320                  */
3321                 if (!args_ct[i].ialias) {
3322                     continue;
3323                 }
3324                 switch (args_ct[i].pair) {
3325                 case 0:
3326                     break;
3327                 case 1:
3328                     o = args_ct[i].alias_index;
3329                     o2 = args_ct[o].pair_index;
3330                     tcg_debug_assert(args_ct[o].pair == 1);
3331                     tcg_debug_assert(args_ct[o2].pair == 2);
3332                     if (args_ct[o2].oalias) {
3333                         /* Case 1a */
3334                         i2 = args_ct[o2].alias_index;
3335                         tcg_debug_assert(args_ct[i2].pair == 2);
3336                         args_ct[i2].pair_index = i;
3337                         args_ct[i].pair_index = i2;
3338                     } else {
3339                         /* Case 1b */
3340                         args_ct[i].pair_index = i;
3341                     }
3342                     break;
3343                 case 2:
3344                     o = args_ct[i].alias_index;
3345                     o2 = args_ct[o].pair_index;
3346                     tcg_debug_assert(args_ct[o].pair == 2);
3347                     tcg_debug_assert(args_ct[o2].pair == 1);
3348                     if (args_ct[o2].oalias) {
3349                         /* Case 1a */
3350                         i2 = args_ct[o2].alias_index;
3351                         tcg_debug_assert(args_ct[i2].pair == 1);
3352                         args_ct[i2].pair_index = i;
3353                         args_ct[i].pair_index = i2;
3354                     } else {
3355                         /* Case 2 */
3356                         args_ct[i].pair = 3;
3357                         args_ct[o2].pair = 3;
3358                         args_ct[i].pair_index = o2;
3359                         args_ct[o2].pair_index = i;
3360                     }
3361                     break;
3362                 default:
3363                     g_assert_not_reached();
3364                 }
3365             }
3366         }
3367 
3368         /* sort the constraints (XXX: this is just an heuristic) */
3369         sort_constraints(args_ct, 0, nb_oargs);
3370         sort_constraints(args_ct, nb_oargs, nb_iargs);
3371     }
3372 }
3373 
3374 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3375 {
3376     TCGOpcode opc = op->opc;
3377     TCGType type = TCGOP_TYPE(op);
3378     unsigned flags = TCGOP_FLAGS(op);
3379     const TCGOpDef *def = &tcg_op_defs[opc];
3380     const TCGOutOp *outop = all_outop[opc];
3381     TCGConstraintSetIndex con_set;
3382 
3383     if (def->flags & TCG_OPF_NOT_PRESENT) {
3384         return empty_cts;
3385     }
3386 
3387     if (outop) {
3388         con_set = outop->static_constraint;
3389         if (con_set == C_Dynamic) {
3390             con_set = outop->dynamic_constraint(type, flags);
3391         }
3392     } else {
3393         con_set = tcg_target_op_def(opc, type, flags);
3394     }
3395     tcg_debug_assert(con_set >= 0);
3396     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3397 
3398     /* The constraint arguments must match TCGOpcode arguments. */
3399     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3400     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3401 
3402     return all_cts[con_set];
3403 }
3404 
3405 static void remove_label_use(TCGOp *op, int idx)
3406 {
3407     TCGLabel *label = arg_label(op->args[idx]);
3408     TCGLabelUse *use;
3409 
3410     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3411         if (use->op == op) {
3412             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3413             return;
3414         }
3415     }
3416     g_assert_not_reached();
3417 }
3418 
3419 void tcg_op_remove(TCGContext *s, TCGOp *op)
3420 {
3421     switch (op->opc) {
3422     case INDEX_op_br:
3423         remove_label_use(op, 0);
3424         break;
3425     case INDEX_op_brcond_i32:
3426     case INDEX_op_brcond_i64:
3427         remove_label_use(op, 3);
3428         break;
3429     case INDEX_op_brcond2_i32:
3430         remove_label_use(op, 5);
3431         break;
3432     default:
3433         break;
3434     }
3435 
3436     QTAILQ_REMOVE(&s->ops, op, link);
3437     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3438     s->nb_ops--;
3439 }
3440 
3441 void tcg_remove_ops_after(TCGOp *op)
3442 {
3443     TCGContext *s = tcg_ctx;
3444 
3445     while (true) {
3446         TCGOp *last = tcg_last_op();
3447         if (last == op) {
3448             return;
3449         }
3450         tcg_op_remove(s, last);
3451     }
3452 }
3453 
3454 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3455 {
3456     TCGContext *s = tcg_ctx;
3457     TCGOp *op = NULL;
3458 
3459     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3460         QTAILQ_FOREACH(op, &s->free_ops, link) {
3461             if (nargs <= op->nargs) {
3462                 QTAILQ_REMOVE(&s->free_ops, op, link);
3463                 nargs = op->nargs;
3464                 goto found;
3465             }
3466         }
3467     }
3468 
3469     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3470     nargs = MAX(4, nargs);
3471     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3472 
3473  found:
3474     memset(op, 0, offsetof(TCGOp, link));
3475     op->opc = opc;
3476     op->nargs = nargs;
3477 
3478     /* Check for bitfield overflow. */
3479     tcg_debug_assert(op->nargs == nargs);
3480 
3481     s->nb_ops++;
3482     return op;
3483 }
3484 
3485 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3486 {
3487     TCGOp *op = tcg_op_alloc(opc, nargs);
3488 
3489     if (tcg_ctx->emit_before_op) {
3490         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3491     } else {
3492         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3493     }
3494     return op;
3495 }
3496 
3497 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3498                             TCGOpcode opc, TCGType type, unsigned nargs)
3499 {
3500     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3501 
3502     TCGOP_TYPE(new_op) = type;
3503     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3504     return new_op;
3505 }
3506 
3507 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3508                            TCGOpcode opc, TCGType type, unsigned nargs)
3509 {
3510     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3511 
3512     TCGOP_TYPE(new_op) = type;
3513     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3514     return new_op;
3515 }
3516 
3517 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3518 {
3519     TCGLabelUse *u;
3520 
3521     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3522         TCGOp *op = u->op;
3523         switch (op->opc) {
3524         case INDEX_op_br:
3525             op->args[0] = label_arg(to);
3526             break;
3527         case INDEX_op_brcond_i32:
3528         case INDEX_op_brcond_i64:
3529             op->args[3] = label_arg(to);
3530             break;
3531         case INDEX_op_brcond2_i32:
3532             op->args[5] = label_arg(to);
3533             break;
3534         default:
3535             g_assert_not_reached();
3536         }
3537     }
3538 
3539     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3540 }
3541 
3542 /* Reachable analysis : remove unreachable code.  */
3543 static void __attribute__((noinline))
3544 reachable_code_pass(TCGContext *s)
3545 {
3546     TCGOp *op, *op_next, *op_prev;
3547     bool dead = false;
3548 
3549     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3550         bool remove = dead;
3551         TCGLabel *label;
3552 
3553         switch (op->opc) {
3554         case INDEX_op_set_label:
3555             label = arg_label(op->args[0]);
3556 
3557             /*
3558              * Note that the first op in the TB is always a load,
3559              * so there is always something before a label.
3560              */
3561             op_prev = QTAILQ_PREV(op, link);
3562 
3563             /*
3564              * If we find two sequential labels, move all branches to
3565              * reference the second label and remove the first label.
3566              * Do this before branch to next optimization, so that the
3567              * middle label is out of the way.
3568              */
3569             if (op_prev->opc == INDEX_op_set_label) {
3570                 move_label_uses(label, arg_label(op_prev->args[0]));
3571                 tcg_op_remove(s, op_prev);
3572                 op_prev = QTAILQ_PREV(op, link);
3573             }
3574 
3575             /*
3576              * Optimization can fold conditional branches to unconditional.
3577              * If we find a label which is preceded by an unconditional
3578              * branch to next, remove the branch.  We couldn't do this when
3579              * processing the branch because any dead code between the branch
3580              * and label had not yet been removed.
3581              */
3582             if (op_prev->opc == INDEX_op_br &&
3583                 label == arg_label(op_prev->args[0])) {
3584                 tcg_op_remove(s, op_prev);
3585                 /* Fall through means insns become live again.  */
3586                 dead = false;
3587             }
3588 
3589             if (QSIMPLEQ_EMPTY(&label->branches)) {
3590                 /*
3591                  * While there is an occasional backward branch, virtually
3592                  * all branches generated by the translators are forward.
3593                  * Which means that generally we will have already removed
3594                  * all references to the label that will be, and there is
3595                  * little to be gained by iterating.
3596                  */
3597                 remove = true;
3598             } else {
3599                 /* Once we see a label, insns become live again.  */
3600                 dead = false;
3601                 remove = false;
3602             }
3603             break;
3604 
3605         case INDEX_op_br:
3606         case INDEX_op_exit_tb:
3607         case INDEX_op_goto_ptr:
3608             /* Unconditional branches; everything following is dead.  */
3609             dead = true;
3610             break;
3611 
3612         case INDEX_op_call:
3613             /* Notice noreturn helper calls, raising exceptions.  */
3614             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3615                 dead = true;
3616             }
3617             break;
3618 
3619         case INDEX_op_insn_start:
3620             /* Never remove -- we need to keep these for unwind.  */
3621             remove = false;
3622             break;
3623 
3624         default:
3625             break;
3626         }
3627 
3628         if (remove) {
3629             tcg_op_remove(s, op);
3630         }
3631     }
3632 }
3633 
3634 #define TS_DEAD  1
3635 #define TS_MEM   2
3636 
3637 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3638 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3639 
3640 /* For liveness_pass_1, the register preferences for a given temp.  */
3641 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3642 {
3643     return ts->state_ptr;
3644 }
3645 
3646 /* For liveness_pass_1, reset the preferences for a given temp to the
3647  * maximal regset for its type.
3648  */
3649 static inline void la_reset_pref(TCGTemp *ts)
3650 {
3651     *la_temp_pref(ts)
3652         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3653 }
3654 
3655 /* liveness analysis: end of function: all temps are dead, and globals
3656    should be in memory. */
3657 static void la_func_end(TCGContext *s, int ng, int nt)
3658 {
3659     int i;
3660 
3661     for (i = 0; i < ng; ++i) {
3662         s->temps[i].state = TS_DEAD | TS_MEM;
3663         la_reset_pref(&s->temps[i]);
3664     }
3665     for (i = ng; i < nt; ++i) {
3666         s->temps[i].state = TS_DEAD;
3667         la_reset_pref(&s->temps[i]);
3668     }
3669 }
3670 
3671 /* liveness analysis: end of basic block: all temps are dead, globals
3672    and local temps should be in memory. */
3673 static void la_bb_end(TCGContext *s, int ng, int nt)
3674 {
3675     int i;
3676 
3677     for (i = 0; i < nt; ++i) {
3678         TCGTemp *ts = &s->temps[i];
3679         int state;
3680 
3681         switch (ts->kind) {
3682         case TEMP_FIXED:
3683         case TEMP_GLOBAL:
3684         case TEMP_TB:
3685             state = TS_DEAD | TS_MEM;
3686             break;
3687         case TEMP_EBB:
3688         case TEMP_CONST:
3689             state = TS_DEAD;
3690             break;
3691         default:
3692             g_assert_not_reached();
3693         }
3694         ts->state = state;
3695         la_reset_pref(ts);
3696     }
3697 }
3698 
3699 /* liveness analysis: sync globals back to memory.  */
3700 static void la_global_sync(TCGContext *s, int ng)
3701 {
3702     int i;
3703 
3704     for (i = 0; i < ng; ++i) {
3705         int state = s->temps[i].state;
3706         s->temps[i].state = state | TS_MEM;
3707         if (state == TS_DEAD) {
3708             /* If the global was previously dead, reset prefs.  */
3709             la_reset_pref(&s->temps[i]);
3710         }
3711     }
3712 }
3713 
3714 /*
3715  * liveness analysis: conditional branch: all temps are dead unless
3716  * explicitly live-across-conditional-branch, globals and local temps
3717  * should be synced.
3718  */
3719 static void la_bb_sync(TCGContext *s, int ng, int nt)
3720 {
3721     la_global_sync(s, ng);
3722 
3723     for (int i = ng; i < nt; ++i) {
3724         TCGTemp *ts = &s->temps[i];
3725         int state;
3726 
3727         switch (ts->kind) {
3728         case TEMP_TB:
3729             state = ts->state;
3730             ts->state = state | TS_MEM;
3731             if (state != TS_DEAD) {
3732                 continue;
3733             }
3734             break;
3735         case TEMP_EBB:
3736         case TEMP_CONST:
3737             continue;
3738         default:
3739             g_assert_not_reached();
3740         }
3741         la_reset_pref(&s->temps[i]);
3742     }
3743 }
3744 
3745 /* liveness analysis: sync globals back to memory and kill.  */
3746 static void la_global_kill(TCGContext *s, int ng)
3747 {
3748     int i;
3749 
3750     for (i = 0; i < ng; i++) {
3751         s->temps[i].state = TS_DEAD | TS_MEM;
3752         la_reset_pref(&s->temps[i]);
3753     }
3754 }
3755 
3756 /* liveness analysis: note live globals crossing calls.  */
3757 static void la_cross_call(TCGContext *s, int nt)
3758 {
3759     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3760     int i;
3761 
3762     for (i = 0; i < nt; i++) {
3763         TCGTemp *ts = &s->temps[i];
3764         if (!(ts->state & TS_DEAD)) {
3765             TCGRegSet *pset = la_temp_pref(ts);
3766             TCGRegSet set = *pset;
3767 
3768             set &= mask;
3769             /* If the combination is not possible, restart.  */
3770             if (set == 0) {
3771                 set = tcg_target_available_regs[ts->type] & mask;
3772             }
3773             *pset = set;
3774         }
3775     }
3776 }
3777 
3778 /*
3779  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3780  * to TEMP_EBB, if possible.
3781  */
3782 static void __attribute__((noinline))
3783 liveness_pass_0(TCGContext *s)
3784 {
3785     void * const multiple_ebb = (void *)(uintptr_t)-1;
3786     int nb_temps = s->nb_temps;
3787     TCGOp *op, *ebb;
3788 
3789     for (int i = s->nb_globals; i < nb_temps; ++i) {
3790         s->temps[i].state_ptr = NULL;
3791     }
3792 
3793     /*
3794      * Represent each EBB by the op at which it begins.  In the case of
3795      * the first EBB, this is the first op, otherwise it is a label.
3796      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3797      * within a single EBB, else MULTIPLE_EBB.
3798      */
3799     ebb = QTAILQ_FIRST(&s->ops);
3800     QTAILQ_FOREACH(op, &s->ops, link) {
3801         const TCGOpDef *def;
3802         int nb_oargs, nb_iargs;
3803 
3804         switch (op->opc) {
3805         case INDEX_op_set_label:
3806             ebb = op;
3807             continue;
3808         case INDEX_op_discard:
3809             continue;
3810         case INDEX_op_call:
3811             nb_oargs = TCGOP_CALLO(op);
3812             nb_iargs = TCGOP_CALLI(op);
3813             break;
3814         default:
3815             def = &tcg_op_defs[op->opc];
3816             nb_oargs = def->nb_oargs;
3817             nb_iargs = def->nb_iargs;
3818             break;
3819         }
3820 
3821         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3822             TCGTemp *ts = arg_temp(op->args[i]);
3823 
3824             if (ts->kind != TEMP_TB) {
3825                 continue;
3826             }
3827             if (ts->state_ptr == NULL) {
3828                 ts->state_ptr = ebb;
3829             } else if (ts->state_ptr != ebb) {
3830                 ts->state_ptr = multiple_ebb;
3831             }
3832         }
3833     }
3834 
3835     /*
3836      * For TEMP_TB that turned out not to be used beyond one EBB,
3837      * reduce the liveness to TEMP_EBB.
3838      */
3839     for (int i = s->nb_globals; i < nb_temps; ++i) {
3840         TCGTemp *ts = &s->temps[i];
3841         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3842             ts->kind = TEMP_EBB;
3843         }
3844     }
3845 }
3846 
3847 /* Liveness analysis : update the opc_arg_life array to tell if a
3848    given input arguments is dead. Instructions updating dead
3849    temporaries are removed. */
3850 static void __attribute__((noinline))
3851 liveness_pass_1(TCGContext *s)
3852 {
3853     int nb_globals = s->nb_globals;
3854     int nb_temps = s->nb_temps;
3855     TCGOp *op, *op_prev;
3856     TCGRegSet *prefs;
3857     int i;
3858 
3859     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3860     for (i = 0; i < nb_temps; ++i) {
3861         s->temps[i].state_ptr = prefs + i;
3862     }
3863 
3864     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3865     la_func_end(s, nb_globals, nb_temps);
3866 
3867     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3868         int nb_iargs, nb_oargs;
3869         TCGOpcode opc_new, opc_new2;
3870         bool have_opc_new2;
3871         TCGLifeData arg_life = 0;
3872         TCGTemp *ts;
3873         TCGOpcode opc = op->opc;
3874         const TCGOpDef *def = &tcg_op_defs[opc];
3875         const TCGArgConstraint *args_ct;
3876 
3877         switch (opc) {
3878         case INDEX_op_call:
3879             {
3880                 const TCGHelperInfo *info = tcg_call_info(op);
3881                 int call_flags = tcg_call_flags(op);
3882 
3883                 nb_oargs = TCGOP_CALLO(op);
3884                 nb_iargs = TCGOP_CALLI(op);
3885 
3886                 /* pure functions can be removed if their result is unused */
3887                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3888                     for (i = 0; i < nb_oargs; i++) {
3889                         ts = arg_temp(op->args[i]);
3890                         if (ts->state != TS_DEAD) {
3891                             goto do_not_remove_call;
3892                         }
3893                     }
3894                     goto do_remove;
3895                 }
3896             do_not_remove_call:
3897 
3898                 /* Output args are dead.  */
3899                 for (i = 0; i < nb_oargs; i++) {
3900                     ts = arg_temp(op->args[i]);
3901                     if (ts->state & TS_DEAD) {
3902                         arg_life |= DEAD_ARG << i;
3903                     }
3904                     if (ts->state & TS_MEM) {
3905                         arg_life |= SYNC_ARG << i;
3906                     }
3907                     ts->state = TS_DEAD;
3908                     la_reset_pref(ts);
3909                 }
3910 
3911                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3912                 memset(op->output_pref, 0, sizeof(op->output_pref));
3913 
3914                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3915                                     TCG_CALL_NO_READ_GLOBALS))) {
3916                     la_global_kill(s, nb_globals);
3917                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3918                     la_global_sync(s, nb_globals);
3919                 }
3920 
3921                 /* Record arguments that die in this helper.  */
3922                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3923                     ts = arg_temp(op->args[i]);
3924                     if (ts->state & TS_DEAD) {
3925                         arg_life |= DEAD_ARG << i;
3926                     }
3927                 }
3928 
3929                 /* For all live registers, remove call-clobbered prefs.  */
3930                 la_cross_call(s, nb_temps);
3931 
3932                 /*
3933                  * Input arguments are live for preceding opcodes.
3934                  *
3935                  * For those arguments that die, and will be allocated in
3936                  * registers, clear the register set for that arg, to be
3937                  * filled in below.  For args that will be on the stack,
3938                  * reset to any available reg.  Process arguments in reverse
3939                  * order so that if a temp is used more than once, the stack
3940                  * reset to max happens before the register reset to 0.
3941                  */
3942                 for (i = nb_iargs - 1; i >= 0; i--) {
3943                     const TCGCallArgumentLoc *loc = &info->in[i];
3944                     ts = arg_temp(op->args[nb_oargs + i]);
3945 
3946                     if (ts->state & TS_DEAD) {
3947                         switch (loc->kind) {
3948                         case TCG_CALL_ARG_NORMAL:
3949                         case TCG_CALL_ARG_EXTEND_U:
3950                         case TCG_CALL_ARG_EXTEND_S:
3951                             if (arg_slot_reg_p(loc->arg_slot)) {
3952                                 *la_temp_pref(ts) = 0;
3953                                 break;
3954                             }
3955                             /* fall through */
3956                         default:
3957                             *la_temp_pref(ts) =
3958                                 tcg_target_available_regs[ts->type];
3959                             break;
3960                         }
3961                         ts->state &= ~TS_DEAD;
3962                     }
3963                 }
3964 
3965                 /*
3966                  * For each input argument, add its input register to prefs.
3967                  * If a temp is used once, this produces a single set bit;
3968                  * if a temp is used multiple times, this produces a set.
3969                  */
3970                 for (i = 0; i < nb_iargs; i++) {
3971                     const TCGCallArgumentLoc *loc = &info->in[i];
3972                     ts = arg_temp(op->args[nb_oargs + i]);
3973 
3974                     switch (loc->kind) {
3975                     case TCG_CALL_ARG_NORMAL:
3976                     case TCG_CALL_ARG_EXTEND_U:
3977                     case TCG_CALL_ARG_EXTEND_S:
3978                         if (arg_slot_reg_p(loc->arg_slot)) {
3979                             tcg_regset_set_reg(*la_temp_pref(ts),
3980                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3981                         }
3982                         break;
3983                     default:
3984                         break;
3985                     }
3986                 }
3987             }
3988             break;
3989         case INDEX_op_insn_start:
3990             break;
3991         case INDEX_op_discard:
3992             /* mark the temporary as dead */
3993             ts = arg_temp(op->args[0]);
3994             ts->state = TS_DEAD;
3995             la_reset_pref(ts);
3996             break;
3997 
3998         case INDEX_op_add2_i32:
3999             opc_new = INDEX_op_add_i32;
4000             goto do_addsub2;
4001         case INDEX_op_sub2_i32:
4002             opc_new = INDEX_op_sub_i32;
4003             goto do_addsub2;
4004         case INDEX_op_add2_i64:
4005             opc_new = INDEX_op_add_i64;
4006             goto do_addsub2;
4007         case INDEX_op_sub2_i64:
4008             opc_new = INDEX_op_sub_i64;
4009         do_addsub2:
4010             nb_iargs = 4;
4011             nb_oargs = 2;
4012             /* Test if the high part of the operation is dead, but not
4013                the low part.  The result can be optimized to a simple
4014                add or sub.  This happens often for x86_64 guest when the
4015                cpu mode is set to 32 bit.  */
4016             if (arg_temp(op->args[1])->state == TS_DEAD) {
4017                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4018                     goto do_remove;
4019                 }
4020                 /* Replace the opcode and adjust the args in place,
4021                    leaving 3 unused args at the end.  */
4022                 op->opc = opc = opc_new;
4023                 op->args[1] = op->args[2];
4024                 op->args[2] = op->args[4];
4025                 /* Fall through and mark the single-word operation live.  */
4026                 nb_iargs = 2;
4027                 nb_oargs = 1;
4028             }
4029             goto do_not_remove;
4030 
4031         case INDEX_op_mulu2_i32:
4032             opc_new = INDEX_op_mul_i32;
4033             opc_new2 = INDEX_op_muluh_i32;
4034             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
4035             goto do_mul2;
4036         case INDEX_op_muls2_i32:
4037             opc_new = INDEX_op_mul_i32;
4038             opc_new2 = INDEX_op_mulsh_i32;
4039             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
4040             goto do_mul2;
4041         case INDEX_op_mulu2_i64:
4042             opc_new = INDEX_op_mul_i64;
4043             opc_new2 = INDEX_op_muluh_i64;
4044             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
4045             goto do_mul2;
4046         case INDEX_op_muls2_i64:
4047             opc_new = INDEX_op_mul_i64;
4048             opc_new2 = INDEX_op_mulsh_i64;
4049             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
4050             goto do_mul2;
4051         do_mul2:
4052             nb_iargs = 2;
4053             nb_oargs = 2;
4054             if (arg_temp(op->args[1])->state == TS_DEAD) {
4055                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4056                     /* Both parts of the operation are dead.  */
4057                     goto do_remove;
4058                 }
4059                 /* The high part of the operation is dead; generate the low. */
4060                 op->opc = opc = opc_new;
4061                 op->args[1] = op->args[2];
4062                 op->args[2] = op->args[3];
4063             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
4064                 /* The low part of the operation is dead; generate the high. */
4065                 op->opc = opc = opc_new2;
4066                 op->args[0] = op->args[1];
4067                 op->args[1] = op->args[2];
4068                 op->args[2] = op->args[3];
4069             } else {
4070                 goto do_not_remove;
4071             }
4072             /* Mark the single-word operation live.  */
4073             nb_oargs = 1;
4074             goto do_not_remove;
4075 
4076         default:
4077             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4078             nb_iargs = def->nb_iargs;
4079             nb_oargs = def->nb_oargs;
4080 
4081             /* Test if the operation can be removed because all
4082                its outputs are dead. We assume that nb_oargs == 0
4083                implies side effects */
4084             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4085                 for (i = 0; i < nb_oargs; i++) {
4086                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4087                         goto do_not_remove;
4088                     }
4089                 }
4090                 goto do_remove;
4091             }
4092             goto do_not_remove;
4093 
4094         do_remove:
4095             tcg_op_remove(s, op);
4096             break;
4097 
4098         do_not_remove:
4099             for (i = 0; i < nb_oargs; i++) {
4100                 ts = arg_temp(op->args[i]);
4101 
4102                 /* Remember the preference of the uses that followed.  */
4103                 if (i < ARRAY_SIZE(op->output_pref)) {
4104                     op->output_pref[i] = *la_temp_pref(ts);
4105                 }
4106 
4107                 /* Output args are dead.  */
4108                 if (ts->state & TS_DEAD) {
4109                     arg_life |= DEAD_ARG << i;
4110                 }
4111                 if (ts->state & TS_MEM) {
4112                     arg_life |= SYNC_ARG << i;
4113                 }
4114                 ts->state = TS_DEAD;
4115                 la_reset_pref(ts);
4116             }
4117 
4118             /* If end of basic block, update.  */
4119             if (def->flags & TCG_OPF_BB_EXIT) {
4120                 la_func_end(s, nb_globals, nb_temps);
4121             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4122                 la_bb_sync(s, nb_globals, nb_temps);
4123             } else if (def->flags & TCG_OPF_BB_END) {
4124                 la_bb_end(s, nb_globals, nb_temps);
4125             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4126                 la_global_sync(s, nb_globals);
4127                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4128                     la_cross_call(s, nb_temps);
4129                 }
4130             }
4131 
4132             /* Record arguments that die in this opcode.  */
4133             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4134                 ts = arg_temp(op->args[i]);
4135                 if (ts->state & TS_DEAD) {
4136                     arg_life |= DEAD_ARG << i;
4137                 }
4138             }
4139 
4140             /* Input arguments are live for preceding opcodes.  */
4141             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4142                 ts = arg_temp(op->args[i]);
4143                 if (ts->state & TS_DEAD) {
4144                     /* For operands that were dead, initially allow
4145                        all regs for the type.  */
4146                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4147                     ts->state &= ~TS_DEAD;
4148                 }
4149             }
4150 
4151             /* Incorporate constraints for this operand.  */
4152             switch (opc) {
4153             case INDEX_op_mov:
4154                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4155                    have proper constraints.  That said, special case
4156                    moves to propagate preferences backward.  */
4157                 if (IS_DEAD_ARG(1)) {
4158                     *la_temp_pref(arg_temp(op->args[0]))
4159                         = *la_temp_pref(arg_temp(op->args[1]));
4160                 }
4161                 break;
4162 
4163             default:
4164                 args_ct = opcode_args_ct(op);
4165                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4166                     const TCGArgConstraint *ct = &args_ct[i];
4167                     TCGRegSet set, *pset;
4168 
4169                     ts = arg_temp(op->args[i]);
4170                     pset = la_temp_pref(ts);
4171                     set = *pset;
4172 
4173                     set &= ct->regs;
4174                     if (ct->ialias) {
4175                         set &= output_pref(op, ct->alias_index);
4176                     }
4177                     /* If the combination is not possible, restart.  */
4178                     if (set == 0) {
4179                         set = ct->regs;
4180                     }
4181                     *pset = set;
4182                 }
4183                 break;
4184             }
4185             break;
4186         }
4187         op->life = arg_life;
4188     }
4189 }
4190 
4191 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4192 static bool __attribute__((noinline))
4193 liveness_pass_2(TCGContext *s)
4194 {
4195     int nb_globals = s->nb_globals;
4196     int nb_temps, i;
4197     bool changes = false;
4198     TCGOp *op, *op_next;
4199 
4200     /* Create a temporary for each indirect global.  */
4201     for (i = 0; i < nb_globals; ++i) {
4202         TCGTemp *its = &s->temps[i];
4203         if (its->indirect_reg) {
4204             TCGTemp *dts = tcg_temp_alloc(s);
4205             dts->type = its->type;
4206             dts->base_type = its->base_type;
4207             dts->temp_subindex = its->temp_subindex;
4208             dts->kind = TEMP_EBB;
4209             its->state_ptr = dts;
4210         } else {
4211             its->state_ptr = NULL;
4212         }
4213         /* All globals begin dead.  */
4214         its->state = TS_DEAD;
4215     }
4216     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4217         TCGTemp *its = &s->temps[i];
4218         its->state_ptr = NULL;
4219         its->state = TS_DEAD;
4220     }
4221 
4222     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4223         TCGOpcode opc = op->opc;
4224         const TCGOpDef *def = &tcg_op_defs[opc];
4225         TCGLifeData arg_life = op->life;
4226         int nb_iargs, nb_oargs, call_flags;
4227         TCGTemp *arg_ts, *dir_ts;
4228 
4229         if (opc == INDEX_op_call) {
4230             nb_oargs = TCGOP_CALLO(op);
4231             nb_iargs = TCGOP_CALLI(op);
4232             call_flags = tcg_call_flags(op);
4233         } else {
4234             nb_iargs = def->nb_iargs;
4235             nb_oargs = def->nb_oargs;
4236 
4237             /* Set flags similar to how calls require.  */
4238             if (def->flags & TCG_OPF_COND_BRANCH) {
4239                 /* Like reading globals: sync_globals */
4240                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4241             } else if (def->flags & TCG_OPF_BB_END) {
4242                 /* Like writing globals: save_globals */
4243                 call_flags = 0;
4244             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4245                 /* Like reading globals: sync_globals */
4246                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4247             } else {
4248                 /* No effect on globals.  */
4249                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4250                               TCG_CALL_NO_WRITE_GLOBALS);
4251             }
4252         }
4253 
4254         /* Make sure that input arguments are available.  */
4255         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4256             arg_ts = arg_temp(op->args[i]);
4257             dir_ts = arg_ts->state_ptr;
4258             if (dir_ts && arg_ts->state == TS_DEAD) {
4259                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4260                                   ? INDEX_op_ld_i32
4261                                   : INDEX_op_ld_i64);
4262                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4263                                                   arg_ts->type, 3);
4264 
4265                 lop->args[0] = temp_arg(dir_ts);
4266                 lop->args[1] = temp_arg(arg_ts->mem_base);
4267                 lop->args[2] = arg_ts->mem_offset;
4268 
4269                 /* Loaded, but synced with memory.  */
4270                 arg_ts->state = TS_MEM;
4271             }
4272         }
4273 
4274         /* Perform input replacement, and mark inputs that became dead.
4275            No action is required except keeping temp_state up to date
4276            so that we reload when needed.  */
4277         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4278             arg_ts = arg_temp(op->args[i]);
4279             dir_ts = arg_ts->state_ptr;
4280             if (dir_ts) {
4281                 op->args[i] = temp_arg(dir_ts);
4282                 changes = true;
4283                 if (IS_DEAD_ARG(i)) {
4284                     arg_ts->state = TS_DEAD;
4285                 }
4286             }
4287         }
4288 
4289         /* Liveness analysis should ensure that the following are
4290            all correct, for call sites and basic block end points.  */
4291         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4292             /* Nothing to do */
4293         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4294             for (i = 0; i < nb_globals; ++i) {
4295                 /* Liveness should see that globals are synced back,
4296                    that is, either TS_DEAD or TS_MEM.  */
4297                 arg_ts = &s->temps[i];
4298                 tcg_debug_assert(arg_ts->state_ptr == 0
4299                                  || arg_ts->state != 0);
4300             }
4301         } else {
4302             for (i = 0; i < nb_globals; ++i) {
4303                 /* Liveness should see that globals are saved back,
4304                    that is, TS_DEAD, waiting to be reloaded.  */
4305                 arg_ts = &s->temps[i];
4306                 tcg_debug_assert(arg_ts->state_ptr == 0
4307                                  || arg_ts->state == TS_DEAD);
4308             }
4309         }
4310 
4311         /* Outputs become available.  */
4312         if (opc == INDEX_op_mov) {
4313             arg_ts = arg_temp(op->args[0]);
4314             dir_ts = arg_ts->state_ptr;
4315             if (dir_ts) {
4316                 op->args[0] = temp_arg(dir_ts);
4317                 changes = true;
4318 
4319                 /* The output is now live and modified.  */
4320                 arg_ts->state = 0;
4321 
4322                 if (NEED_SYNC_ARG(0)) {
4323                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4324                                       ? INDEX_op_st_i32
4325                                       : INDEX_op_st_i64);
4326                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4327                                                      arg_ts->type, 3);
4328                     TCGTemp *out_ts = dir_ts;
4329 
4330                     if (IS_DEAD_ARG(0)) {
4331                         out_ts = arg_temp(op->args[1]);
4332                         arg_ts->state = TS_DEAD;
4333                         tcg_op_remove(s, op);
4334                     } else {
4335                         arg_ts->state = TS_MEM;
4336                     }
4337 
4338                     sop->args[0] = temp_arg(out_ts);
4339                     sop->args[1] = temp_arg(arg_ts->mem_base);
4340                     sop->args[2] = arg_ts->mem_offset;
4341                 } else {
4342                     tcg_debug_assert(!IS_DEAD_ARG(0));
4343                 }
4344             }
4345         } else {
4346             for (i = 0; i < nb_oargs; i++) {
4347                 arg_ts = arg_temp(op->args[i]);
4348                 dir_ts = arg_ts->state_ptr;
4349                 if (!dir_ts) {
4350                     continue;
4351                 }
4352                 op->args[i] = temp_arg(dir_ts);
4353                 changes = true;
4354 
4355                 /* The output is now live and modified.  */
4356                 arg_ts->state = 0;
4357 
4358                 /* Sync outputs upon their last write.  */
4359                 if (NEED_SYNC_ARG(i)) {
4360                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4361                                       ? INDEX_op_st_i32
4362                                       : INDEX_op_st_i64);
4363                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4364                                                      arg_ts->type, 3);
4365 
4366                     sop->args[0] = temp_arg(dir_ts);
4367                     sop->args[1] = temp_arg(arg_ts->mem_base);
4368                     sop->args[2] = arg_ts->mem_offset;
4369 
4370                     arg_ts->state = TS_MEM;
4371                 }
4372                 /* Drop outputs that are dead.  */
4373                 if (IS_DEAD_ARG(i)) {
4374                     arg_ts->state = TS_DEAD;
4375                 }
4376             }
4377         }
4378     }
4379 
4380     return changes;
4381 }
4382 
4383 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4384 {
4385     intptr_t off;
4386     int size, align;
4387 
4388     /* When allocating an object, look at the full type. */
4389     size = tcg_type_size(ts->base_type);
4390     switch (ts->base_type) {
4391     case TCG_TYPE_I32:
4392         align = 4;
4393         break;
4394     case TCG_TYPE_I64:
4395     case TCG_TYPE_V64:
4396         align = 8;
4397         break;
4398     case TCG_TYPE_I128:
4399     case TCG_TYPE_V128:
4400     case TCG_TYPE_V256:
4401         /*
4402          * Note that we do not require aligned storage for V256,
4403          * and that we provide alignment for I128 to match V128,
4404          * even if that's above what the host ABI requires.
4405          */
4406         align = 16;
4407         break;
4408     default:
4409         g_assert_not_reached();
4410     }
4411 
4412     /*
4413      * Assume the stack is sufficiently aligned.
4414      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4415      * and do not require 16 byte vector alignment.  This seems slightly
4416      * easier than fully parameterizing the above switch statement.
4417      */
4418     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4419     off = ROUND_UP(s->current_frame_offset, align);
4420 
4421     /* If we've exhausted the stack frame, restart with a smaller TB. */
4422     if (off + size > s->frame_end) {
4423         tcg_raise_tb_overflow(s);
4424     }
4425     s->current_frame_offset = off + size;
4426 #if defined(__sparc__)
4427     off += TCG_TARGET_STACK_BIAS;
4428 #endif
4429 
4430     /* If the object was subdivided, assign memory to all the parts. */
4431     if (ts->base_type != ts->type) {
4432         int part_size = tcg_type_size(ts->type);
4433         int part_count = size / part_size;
4434 
4435         /*
4436          * Each part is allocated sequentially in tcg_temp_new_internal.
4437          * Jump back to the first part by subtracting the current index.
4438          */
4439         ts -= ts->temp_subindex;
4440         for (int i = 0; i < part_count; ++i) {
4441             ts[i].mem_offset = off + i * part_size;
4442             ts[i].mem_base = s->frame_temp;
4443             ts[i].mem_allocated = 1;
4444         }
4445     } else {
4446         ts->mem_offset = off;
4447         ts->mem_base = s->frame_temp;
4448         ts->mem_allocated = 1;
4449     }
4450 }
4451 
4452 /* Assign @reg to @ts, and update reg_to_temp[]. */
4453 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4454 {
4455     if (ts->val_type == TEMP_VAL_REG) {
4456         TCGReg old = ts->reg;
4457         tcg_debug_assert(s->reg_to_temp[old] == ts);
4458         if (old == reg) {
4459             return;
4460         }
4461         s->reg_to_temp[old] = NULL;
4462     }
4463     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4464     s->reg_to_temp[reg] = ts;
4465     ts->val_type = TEMP_VAL_REG;
4466     ts->reg = reg;
4467 }
4468 
4469 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4470 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4471 {
4472     tcg_debug_assert(type != TEMP_VAL_REG);
4473     if (ts->val_type == TEMP_VAL_REG) {
4474         TCGReg reg = ts->reg;
4475         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4476         s->reg_to_temp[reg] = NULL;
4477     }
4478     ts->val_type = type;
4479 }
4480 
4481 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4482 
4483 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4484    mark it free; otherwise mark it dead.  */
4485 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4486 {
4487     TCGTempVal new_type;
4488 
4489     switch (ts->kind) {
4490     case TEMP_FIXED:
4491         return;
4492     case TEMP_GLOBAL:
4493     case TEMP_TB:
4494         new_type = TEMP_VAL_MEM;
4495         break;
4496     case TEMP_EBB:
4497         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4498         break;
4499     case TEMP_CONST:
4500         new_type = TEMP_VAL_CONST;
4501         break;
4502     default:
4503         g_assert_not_reached();
4504     }
4505     set_temp_val_nonreg(s, ts, new_type);
4506 }
4507 
4508 /* Mark a temporary as dead.  */
4509 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4510 {
4511     temp_free_or_dead(s, ts, 1);
4512 }
4513 
4514 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4515    registers needs to be allocated to store a constant.  If 'free_or_dead'
4516    is non-zero, subsequently release the temporary; if it is positive, the
4517    temp is dead; if it is negative, the temp is free.  */
4518 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4519                       TCGRegSet preferred_regs, int free_or_dead)
4520 {
4521     if (!temp_readonly(ts) && !ts->mem_coherent) {
4522         if (!ts->mem_allocated) {
4523             temp_allocate_frame(s, ts);
4524         }
4525         switch (ts->val_type) {
4526         case TEMP_VAL_CONST:
4527             /* If we're going to free the temp immediately, then we won't
4528                require it later in a register, so attempt to store the
4529                constant to memory directly.  */
4530             if (free_or_dead
4531                 && tcg_out_sti(s, ts->type, ts->val,
4532                                ts->mem_base->reg, ts->mem_offset)) {
4533                 break;
4534             }
4535             temp_load(s, ts, tcg_target_available_regs[ts->type],
4536                       allocated_regs, preferred_regs);
4537             /* fallthrough */
4538 
4539         case TEMP_VAL_REG:
4540             tcg_out_st(s, ts->type, ts->reg,
4541                        ts->mem_base->reg, ts->mem_offset);
4542             break;
4543 
4544         case TEMP_VAL_MEM:
4545             break;
4546 
4547         case TEMP_VAL_DEAD:
4548         default:
4549             g_assert_not_reached();
4550         }
4551         ts->mem_coherent = 1;
4552     }
4553     if (free_or_dead) {
4554         temp_free_or_dead(s, ts, free_or_dead);
4555     }
4556 }
4557 
4558 /* free register 'reg' by spilling the corresponding temporary if necessary */
4559 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4560 {
4561     TCGTemp *ts = s->reg_to_temp[reg];
4562     if (ts != NULL) {
4563         temp_sync(s, ts, allocated_regs, 0, -1);
4564     }
4565 }
4566 
4567 /**
4568  * tcg_reg_alloc:
4569  * @required_regs: Set of registers in which we must allocate.
4570  * @allocated_regs: Set of registers which must be avoided.
4571  * @preferred_regs: Set of registers we should prefer.
4572  * @rev: True if we search the registers in "indirect" order.
4573  *
4574  * The allocated register must be in @required_regs & ~@allocated_regs,
4575  * but if we can put it in @preferred_regs we may save a move later.
4576  */
4577 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4578                             TCGRegSet allocated_regs,
4579                             TCGRegSet preferred_regs, bool rev)
4580 {
4581     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4582     TCGRegSet reg_ct[2];
4583     const int *order;
4584 
4585     reg_ct[1] = required_regs & ~allocated_regs;
4586     tcg_debug_assert(reg_ct[1] != 0);
4587     reg_ct[0] = reg_ct[1] & preferred_regs;
4588 
4589     /* Skip the preferred_regs option if it cannot be satisfied,
4590        or if the preference made no difference.  */
4591     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4592 
4593     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4594 
4595     /* Try free registers, preferences first.  */
4596     for (j = f; j < 2; j++) {
4597         TCGRegSet set = reg_ct[j];
4598 
4599         if (tcg_regset_single(set)) {
4600             /* One register in the set.  */
4601             TCGReg reg = tcg_regset_first(set);
4602             if (s->reg_to_temp[reg] == NULL) {
4603                 return reg;
4604             }
4605         } else {
4606             for (i = 0; i < n; i++) {
4607                 TCGReg reg = order[i];
4608                 if (s->reg_to_temp[reg] == NULL &&
4609                     tcg_regset_test_reg(set, reg)) {
4610                     return reg;
4611                 }
4612             }
4613         }
4614     }
4615 
4616     /* We must spill something.  */
4617     for (j = f; j < 2; j++) {
4618         TCGRegSet set = reg_ct[j];
4619 
4620         if (tcg_regset_single(set)) {
4621             /* One register in the set.  */
4622             TCGReg reg = tcg_regset_first(set);
4623             tcg_reg_free(s, reg, allocated_regs);
4624             return reg;
4625         } else {
4626             for (i = 0; i < n; i++) {
4627                 TCGReg reg = order[i];
4628                 if (tcg_regset_test_reg(set, reg)) {
4629                     tcg_reg_free(s, reg, allocated_regs);
4630                     return reg;
4631                 }
4632             }
4633         }
4634     }
4635 
4636     g_assert_not_reached();
4637 }
4638 
4639 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4640                                  TCGRegSet allocated_regs,
4641                                  TCGRegSet preferred_regs, bool rev)
4642 {
4643     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4644     TCGRegSet reg_ct[2];
4645     const int *order;
4646 
4647     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4648     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4649     tcg_debug_assert(reg_ct[1] != 0);
4650     reg_ct[0] = reg_ct[1] & preferred_regs;
4651 
4652     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4653 
4654     /*
4655      * Skip the preferred_regs option if it cannot be satisfied,
4656      * or if the preference made no difference.
4657      */
4658     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4659 
4660     /*
4661      * Minimize the number of flushes by looking for 2 free registers first,
4662      * then a single flush, then two flushes.
4663      */
4664     for (fmin = 2; fmin >= 0; fmin--) {
4665         for (j = k; j < 2; j++) {
4666             TCGRegSet set = reg_ct[j];
4667 
4668             for (i = 0; i < n; i++) {
4669                 TCGReg reg = order[i];
4670 
4671                 if (tcg_regset_test_reg(set, reg)) {
4672                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4673                     if (f >= fmin) {
4674                         tcg_reg_free(s, reg, allocated_regs);
4675                         tcg_reg_free(s, reg + 1, allocated_regs);
4676                         return reg;
4677                     }
4678                 }
4679             }
4680         }
4681     }
4682     g_assert_not_reached();
4683 }
4684 
4685 /* Make sure the temporary is in a register.  If needed, allocate the register
4686    from DESIRED while avoiding ALLOCATED.  */
4687 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4688                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4689 {
4690     TCGReg reg;
4691 
4692     switch (ts->val_type) {
4693     case TEMP_VAL_REG:
4694         return;
4695     case TEMP_VAL_CONST:
4696         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4697                             preferred_regs, ts->indirect_base);
4698         if (ts->type <= TCG_TYPE_I64) {
4699             tcg_out_movi(s, ts->type, reg, ts->val);
4700         } else {
4701             uint64_t val = ts->val;
4702             MemOp vece = MO_64;
4703 
4704             /*
4705              * Find the minimal vector element that matches the constant.
4706              * The targets will, in general, have to do this search anyway,
4707              * do this generically.
4708              */
4709             if (val == dup_const(MO_8, val)) {
4710                 vece = MO_8;
4711             } else if (val == dup_const(MO_16, val)) {
4712                 vece = MO_16;
4713             } else if (val == dup_const(MO_32, val)) {
4714                 vece = MO_32;
4715             }
4716 
4717             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4718         }
4719         ts->mem_coherent = 0;
4720         break;
4721     case TEMP_VAL_MEM:
4722         if (!ts->mem_allocated) {
4723             temp_allocate_frame(s, ts);
4724         }
4725         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4726                             preferred_regs, ts->indirect_base);
4727         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4728         ts->mem_coherent = 1;
4729         break;
4730     case TEMP_VAL_DEAD:
4731     default:
4732         g_assert_not_reached();
4733     }
4734     set_temp_val_reg(s, ts, reg);
4735 }
4736 
4737 /* Save a temporary to memory. 'allocated_regs' is used in case a
4738    temporary registers needs to be allocated to store a constant.  */
4739 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4740 {
4741     /* The liveness analysis already ensures that globals are back
4742        in memory. Keep an tcg_debug_assert for safety. */
4743     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4744 }
4745 
4746 /* save globals to their canonical location and assume they can be
4747    modified be the following code. 'allocated_regs' is used in case a
4748    temporary registers needs to be allocated to store a constant. */
4749 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4750 {
4751     int i, n;
4752 
4753     for (i = 0, n = s->nb_globals; i < n; i++) {
4754         temp_save(s, &s->temps[i], allocated_regs);
4755     }
4756 }
4757 
4758 /* sync globals to their canonical location and assume they can be
4759    read by the following code. 'allocated_regs' is used in case a
4760    temporary registers needs to be allocated to store a constant. */
4761 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4762 {
4763     int i, n;
4764 
4765     for (i = 0, n = s->nb_globals; i < n; i++) {
4766         TCGTemp *ts = &s->temps[i];
4767         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4768                          || ts->kind == TEMP_FIXED
4769                          || ts->mem_coherent);
4770     }
4771 }
4772 
4773 /* at the end of a basic block, we assume all temporaries are dead and
4774    all globals are stored at their canonical location. */
4775 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4776 {
4777     int i;
4778 
4779     for (i = s->nb_globals; i < s->nb_temps; i++) {
4780         TCGTemp *ts = &s->temps[i];
4781 
4782         switch (ts->kind) {
4783         case TEMP_TB:
4784             temp_save(s, ts, allocated_regs);
4785             break;
4786         case TEMP_EBB:
4787             /* The liveness analysis already ensures that temps are dead.
4788                Keep an tcg_debug_assert for safety. */
4789             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4790             break;
4791         case TEMP_CONST:
4792             /* Similarly, we should have freed any allocated register. */
4793             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4794             break;
4795         default:
4796             g_assert_not_reached();
4797         }
4798     }
4799 
4800     save_globals(s, allocated_regs);
4801 }
4802 
4803 /*
4804  * At a conditional branch, we assume all temporaries are dead unless
4805  * explicitly live-across-conditional-branch; all globals and local
4806  * temps are synced to their location.
4807  */
4808 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4809 {
4810     sync_globals(s, allocated_regs);
4811 
4812     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4813         TCGTemp *ts = &s->temps[i];
4814         /*
4815          * The liveness analysis already ensures that temps are dead.
4816          * Keep tcg_debug_asserts for safety.
4817          */
4818         switch (ts->kind) {
4819         case TEMP_TB:
4820             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4821             break;
4822         case TEMP_EBB:
4823         case TEMP_CONST:
4824             break;
4825         default:
4826             g_assert_not_reached();
4827         }
4828     }
4829 }
4830 
4831 /*
4832  * Specialized code generation for INDEX_op_mov_* with a constant.
4833  */
4834 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4835                                   tcg_target_ulong val, TCGLifeData arg_life,
4836                                   TCGRegSet preferred_regs)
4837 {
4838     /* ENV should not be modified.  */
4839     tcg_debug_assert(!temp_readonly(ots));
4840 
4841     /* The movi is not explicitly generated here.  */
4842     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4843     ots->val = val;
4844     ots->mem_coherent = 0;
4845     if (NEED_SYNC_ARG(0)) {
4846         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4847     } else if (IS_DEAD_ARG(0)) {
4848         temp_dead(s, ots);
4849     }
4850 }
4851 
4852 /*
4853  * Specialized code generation for INDEX_op_mov_*.
4854  */
4855 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4856 {
4857     const TCGLifeData arg_life = op->life;
4858     TCGRegSet allocated_regs, preferred_regs;
4859     TCGTemp *ts, *ots;
4860     TCGType otype, itype;
4861     TCGReg oreg, ireg;
4862 
4863     allocated_regs = s->reserved_regs;
4864     preferred_regs = output_pref(op, 0);
4865     ots = arg_temp(op->args[0]);
4866     ts = arg_temp(op->args[1]);
4867 
4868     /* ENV should not be modified.  */
4869     tcg_debug_assert(!temp_readonly(ots));
4870 
4871     /* Note that otype != itype for no-op truncation.  */
4872     otype = ots->type;
4873     itype = ts->type;
4874 
4875     if (ts->val_type == TEMP_VAL_CONST) {
4876         /* propagate constant or generate sti */
4877         tcg_target_ulong val = ts->val;
4878         if (IS_DEAD_ARG(1)) {
4879             temp_dead(s, ts);
4880         }
4881         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4882         return;
4883     }
4884 
4885     /* If the source value is in memory we're going to be forced
4886        to have it in a register in order to perform the copy.  Copy
4887        the SOURCE value into its own register first, that way we
4888        don't have to reload SOURCE the next time it is used. */
4889     if (ts->val_type == TEMP_VAL_MEM) {
4890         temp_load(s, ts, tcg_target_available_regs[itype],
4891                   allocated_regs, preferred_regs);
4892     }
4893     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4894     ireg = ts->reg;
4895 
4896     if (IS_DEAD_ARG(0)) {
4897         /* mov to a non-saved dead register makes no sense (even with
4898            liveness analysis disabled). */
4899         tcg_debug_assert(NEED_SYNC_ARG(0));
4900         if (!ots->mem_allocated) {
4901             temp_allocate_frame(s, ots);
4902         }
4903         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4904         if (IS_DEAD_ARG(1)) {
4905             temp_dead(s, ts);
4906         }
4907         temp_dead(s, ots);
4908         return;
4909     }
4910 
4911     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4912         /*
4913          * The mov can be suppressed.  Kill input first, so that it
4914          * is unlinked from reg_to_temp, then set the output to the
4915          * reg that we saved from the input.
4916          */
4917         temp_dead(s, ts);
4918         oreg = ireg;
4919     } else {
4920         if (ots->val_type == TEMP_VAL_REG) {
4921             oreg = ots->reg;
4922         } else {
4923             /* Make sure to not spill the input register during allocation. */
4924             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4925                                  allocated_regs | ((TCGRegSet)1 << ireg),
4926                                  preferred_regs, ots->indirect_base);
4927         }
4928         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4929             /*
4930              * Cross register class move not supported.
4931              * Store the source register into the destination slot
4932              * and leave the destination temp as TEMP_VAL_MEM.
4933              */
4934             assert(!temp_readonly(ots));
4935             if (!ts->mem_allocated) {
4936                 temp_allocate_frame(s, ots);
4937             }
4938             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4939             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4940             ots->mem_coherent = 1;
4941             return;
4942         }
4943     }
4944     set_temp_val_reg(s, ots, oreg);
4945     ots->mem_coherent = 0;
4946 
4947     if (NEED_SYNC_ARG(0)) {
4948         temp_sync(s, ots, allocated_regs, 0, 0);
4949     }
4950 }
4951 
4952 /*
4953  * Specialized code generation for INDEX_op_dup_vec.
4954  */
4955 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4956 {
4957     const TCGLifeData arg_life = op->life;
4958     TCGRegSet dup_out_regs, dup_in_regs;
4959     const TCGArgConstraint *dup_args_ct;
4960     TCGTemp *its, *ots;
4961     TCGType itype, vtype;
4962     unsigned vece;
4963     int lowpart_ofs;
4964     bool ok;
4965 
4966     ots = arg_temp(op->args[0]);
4967     its = arg_temp(op->args[1]);
4968 
4969     /* ENV should not be modified.  */
4970     tcg_debug_assert(!temp_readonly(ots));
4971 
4972     itype = its->type;
4973     vece = TCGOP_VECE(op);
4974     vtype = TCGOP_TYPE(op);
4975 
4976     if (its->val_type == TEMP_VAL_CONST) {
4977         /* Propagate constant via movi -> dupi.  */
4978         tcg_target_ulong val = its->val;
4979         if (IS_DEAD_ARG(1)) {
4980             temp_dead(s, its);
4981         }
4982         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4983         return;
4984     }
4985 
4986     dup_args_ct = opcode_args_ct(op);
4987     dup_out_regs = dup_args_ct[0].regs;
4988     dup_in_regs = dup_args_ct[1].regs;
4989 
4990     /* Allocate the output register now.  */
4991     if (ots->val_type != TEMP_VAL_REG) {
4992         TCGRegSet allocated_regs = s->reserved_regs;
4993         TCGReg oreg;
4994 
4995         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4996             /* Make sure to not spill the input register. */
4997             tcg_regset_set_reg(allocated_regs, its->reg);
4998         }
4999         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5000                              output_pref(op, 0), ots->indirect_base);
5001         set_temp_val_reg(s, ots, oreg);
5002     }
5003 
5004     switch (its->val_type) {
5005     case TEMP_VAL_REG:
5006         /*
5007          * The dup constriaints must be broad, covering all possible VECE.
5008          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5009          * to fail, indicating that extra moves are required for that case.
5010          */
5011         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5012             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5013                 goto done;
5014             }
5015             /* Try again from memory or a vector input register.  */
5016         }
5017         if (!its->mem_coherent) {
5018             /*
5019              * The input register is not synced, and so an extra store
5020              * would be required to use memory.  Attempt an integer-vector
5021              * register move first.  We do not have a TCGRegSet for this.
5022              */
5023             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5024                 break;
5025             }
5026             /* Sync the temp back to its slot and load from there.  */
5027             temp_sync(s, its, s->reserved_regs, 0, 0);
5028         }
5029         /* fall through */
5030 
5031     case TEMP_VAL_MEM:
5032         lowpart_ofs = 0;
5033         if (HOST_BIG_ENDIAN) {
5034             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5035         }
5036         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5037                              its->mem_offset + lowpart_ofs)) {
5038             goto done;
5039         }
5040         /* Load the input into the destination vector register. */
5041         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5042         break;
5043 
5044     default:
5045         g_assert_not_reached();
5046     }
5047 
5048     /* We now have a vector input register, so dup must succeed. */
5049     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5050     tcg_debug_assert(ok);
5051 
5052  done:
5053     ots->mem_coherent = 0;
5054     if (IS_DEAD_ARG(1)) {
5055         temp_dead(s, its);
5056     }
5057     if (NEED_SYNC_ARG(0)) {
5058         temp_sync(s, ots, s->reserved_regs, 0, 0);
5059     }
5060     if (IS_DEAD_ARG(0)) {
5061         temp_dead(s, ots);
5062     }
5063 }
5064 
5065 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5066 {
5067     const TCGLifeData arg_life = op->life;
5068     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5069     TCGRegSet i_allocated_regs;
5070     TCGRegSet o_allocated_regs;
5071     int i, k, nb_iargs, nb_oargs;
5072     TCGReg reg;
5073     TCGArg arg;
5074     const TCGArgConstraint *args_ct;
5075     const TCGArgConstraint *arg_ct;
5076     TCGTemp *ts;
5077     TCGArg new_args[TCG_MAX_OP_ARGS];
5078     int const_args[TCG_MAX_OP_ARGS];
5079     TCGCond op_cond;
5080 
5081     nb_oargs = def->nb_oargs;
5082     nb_iargs = def->nb_iargs;
5083 
5084     /* copy constants */
5085     memcpy(new_args + nb_oargs + nb_iargs,
5086            op->args + nb_oargs + nb_iargs,
5087            sizeof(TCGArg) * def->nb_cargs);
5088 
5089     i_allocated_regs = s->reserved_regs;
5090     o_allocated_regs = s->reserved_regs;
5091 
5092     switch (op->opc) {
5093     case INDEX_op_brcond_i32:
5094     case INDEX_op_brcond_i64:
5095         op_cond = op->args[2];
5096         break;
5097     case INDEX_op_setcond_i32:
5098     case INDEX_op_setcond_i64:
5099     case INDEX_op_negsetcond_i32:
5100     case INDEX_op_negsetcond_i64:
5101     case INDEX_op_cmp_vec:
5102         op_cond = op->args[3];
5103         break;
5104     case INDEX_op_brcond2_i32:
5105         op_cond = op->args[4];
5106         break;
5107     case INDEX_op_movcond_i32:
5108     case INDEX_op_movcond_i64:
5109     case INDEX_op_setcond2_i32:
5110     case INDEX_op_cmpsel_vec:
5111         op_cond = op->args[5];
5112         break;
5113     default:
5114         /* No condition within opcode. */
5115         op_cond = TCG_COND_ALWAYS;
5116         break;
5117     }
5118 
5119     args_ct = opcode_args_ct(op);
5120 
5121     /* satisfy input constraints */
5122     for (k = 0; k < nb_iargs; k++) {
5123         TCGRegSet i_preferred_regs, i_required_regs;
5124         bool allocate_new_reg, copyto_new_reg;
5125         TCGTemp *ts2;
5126         int i1, i2;
5127 
5128         i = args_ct[nb_oargs + k].sort_index;
5129         arg = op->args[i];
5130         arg_ct = &args_ct[i];
5131         ts = arg_temp(arg);
5132 
5133         if (ts->val_type == TEMP_VAL_CONST) {
5134 #ifdef TCG_REG_ZERO
5135             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5136                 /* Hardware zero register: indicate register via non-const. */
5137                 const_args[i] = 0;
5138                 new_args[i] = TCG_REG_ZERO;
5139                 continue;
5140             }
5141 #endif
5142 
5143             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5144                                        op_cond, TCGOP_VECE(op))) {
5145                 /* constant is OK for instruction */
5146                 const_args[i] = 1;
5147                 new_args[i] = ts->val;
5148                 continue;
5149             }
5150         }
5151 
5152         reg = ts->reg;
5153         i_preferred_regs = 0;
5154         i_required_regs = arg_ct->regs;
5155         allocate_new_reg = false;
5156         copyto_new_reg = false;
5157 
5158         switch (arg_ct->pair) {
5159         case 0: /* not paired */
5160             if (arg_ct->ialias) {
5161                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5162 
5163                 /*
5164                  * If the input is readonly, then it cannot also be an
5165                  * output and aliased to itself.  If the input is not
5166                  * dead after the instruction, we must allocate a new
5167                  * register and move it.
5168                  */
5169                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5170                     || args_ct[arg_ct->alias_index].newreg) {
5171                     allocate_new_reg = true;
5172                 } else if (ts->val_type == TEMP_VAL_REG) {
5173                     /*
5174                      * Check if the current register has already been
5175                      * allocated for another input.
5176                      */
5177                     allocate_new_reg =
5178                         tcg_regset_test_reg(i_allocated_regs, reg);
5179                 }
5180             }
5181             if (!allocate_new_reg) {
5182                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5183                           i_preferred_regs);
5184                 reg = ts->reg;
5185                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5186             }
5187             if (allocate_new_reg) {
5188                 /*
5189                  * Allocate a new register matching the constraint
5190                  * and move the temporary register into it.
5191                  */
5192                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5193                           i_allocated_regs, 0);
5194                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5195                                     i_preferred_regs, ts->indirect_base);
5196                 copyto_new_reg = true;
5197             }
5198             break;
5199 
5200         case 1:
5201             /* First of an input pair; if i1 == i2, the second is an output. */
5202             i1 = i;
5203             i2 = arg_ct->pair_index;
5204             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5205 
5206             /*
5207              * It is easier to default to allocating a new pair
5208              * and to identify a few cases where it's not required.
5209              */
5210             if (arg_ct->ialias) {
5211                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5212                 if (IS_DEAD_ARG(i1) &&
5213                     IS_DEAD_ARG(i2) &&
5214                     !temp_readonly(ts) &&
5215                     ts->val_type == TEMP_VAL_REG &&
5216                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5217                     tcg_regset_test_reg(i_required_regs, reg) &&
5218                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5219                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5220                     (ts2
5221                      ? ts2->val_type == TEMP_VAL_REG &&
5222                        ts2->reg == reg + 1 &&
5223                        !temp_readonly(ts2)
5224                      : s->reg_to_temp[reg + 1] == NULL)) {
5225                     break;
5226                 }
5227             } else {
5228                 /* Without aliasing, the pair must also be an input. */
5229                 tcg_debug_assert(ts2);
5230                 if (ts->val_type == TEMP_VAL_REG &&
5231                     ts2->val_type == TEMP_VAL_REG &&
5232                     ts2->reg == reg + 1 &&
5233                     tcg_regset_test_reg(i_required_regs, reg)) {
5234                     break;
5235                 }
5236             }
5237             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5238                                      0, ts->indirect_base);
5239             goto do_pair;
5240 
5241         case 2: /* pair second */
5242             reg = new_args[arg_ct->pair_index] + 1;
5243             goto do_pair;
5244 
5245         case 3: /* ialias with second output, no first input */
5246             tcg_debug_assert(arg_ct->ialias);
5247             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5248 
5249             if (IS_DEAD_ARG(i) &&
5250                 !temp_readonly(ts) &&
5251                 ts->val_type == TEMP_VAL_REG &&
5252                 reg > 0 &&
5253                 s->reg_to_temp[reg - 1] == NULL &&
5254                 tcg_regset_test_reg(i_required_regs, reg) &&
5255                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5256                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5257                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5258                 break;
5259             }
5260             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5261                                      i_allocated_regs, 0,
5262                                      ts->indirect_base);
5263             tcg_regset_set_reg(i_allocated_regs, reg);
5264             reg += 1;
5265             goto do_pair;
5266 
5267         do_pair:
5268             /*
5269              * If an aliased input is not dead after the instruction,
5270              * we must allocate a new register and move it.
5271              */
5272             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5273                 TCGRegSet t_allocated_regs = i_allocated_regs;
5274 
5275                 /*
5276                  * Because of the alias, and the continued life, make sure
5277                  * that the temp is somewhere *other* than the reg pair,
5278                  * and we get a copy in reg.
5279                  */
5280                 tcg_regset_set_reg(t_allocated_regs, reg);
5281                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5282                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5283                     /* If ts was already in reg, copy it somewhere else. */
5284                     TCGReg nr;
5285                     bool ok;
5286 
5287                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5288                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5289                                        t_allocated_regs, 0, ts->indirect_base);
5290                     ok = tcg_out_mov(s, ts->type, nr, reg);
5291                     tcg_debug_assert(ok);
5292 
5293                     set_temp_val_reg(s, ts, nr);
5294                 } else {
5295                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5296                               t_allocated_regs, 0);
5297                     copyto_new_reg = true;
5298                 }
5299             } else {
5300                 /* Preferably allocate to reg, otherwise copy. */
5301                 i_required_regs = (TCGRegSet)1 << reg;
5302                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5303                           i_preferred_regs);
5304                 copyto_new_reg = ts->reg != reg;
5305             }
5306             break;
5307 
5308         default:
5309             g_assert_not_reached();
5310         }
5311 
5312         if (copyto_new_reg) {
5313             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5314                 /*
5315                  * Cross register class move not supported.  Sync the
5316                  * temp back to its slot and load from there.
5317                  */
5318                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5319                 tcg_out_ld(s, ts->type, reg,
5320                            ts->mem_base->reg, ts->mem_offset);
5321             }
5322         }
5323         new_args[i] = reg;
5324         const_args[i] = 0;
5325         tcg_regset_set_reg(i_allocated_regs, reg);
5326     }
5327 
5328     /* mark dead temporaries and free the associated registers */
5329     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5330         if (IS_DEAD_ARG(i)) {
5331             temp_dead(s, arg_temp(op->args[i]));
5332         }
5333     }
5334 
5335     if (def->flags & TCG_OPF_COND_BRANCH) {
5336         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5337     } else if (def->flags & TCG_OPF_BB_END) {
5338         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5339     } else {
5340         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5341             /* XXX: permit generic clobber register list ? */
5342             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5343                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5344                     tcg_reg_free(s, i, i_allocated_regs);
5345                 }
5346             }
5347         }
5348         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5349             /* sync globals if the op has side effects and might trigger
5350                an exception. */
5351             sync_globals(s, i_allocated_regs);
5352         }
5353 
5354         /* satisfy the output constraints */
5355         for (k = 0; k < nb_oargs; k++) {
5356             i = args_ct[k].sort_index;
5357             arg = op->args[i];
5358             arg_ct = &args_ct[i];
5359             ts = arg_temp(arg);
5360 
5361             /* ENV should not be modified.  */
5362             tcg_debug_assert(!temp_readonly(ts));
5363 
5364             switch (arg_ct->pair) {
5365             case 0: /* not paired */
5366                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5367                     reg = new_args[arg_ct->alias_index];
5368                 } else if (arg_ct->newreg) {
5369                     reg = tcg_reg_alloc(s, arg_ct->regs,
5370                                         i_allocated_regs | o_allocated_regs,
5371                                         output_pref(op, k), ts->indirect_base);
5372                 } else {
5373                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5374                                         output_pref(op, k), ts->indirect_base);
5375                 }
5376                 break;
5377 
5378             case 1: /* first of pair */
5379                 if (arg_ct->oalias) {
5380                     reg = new_args[arg_ct->alias_index];
5381                 } else if (arg_ct->newreg) {
5382                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5383                                              i_allocated_regs | o_allocated_regs,
5384                                              output_pref(op, k),
5385                                              ts->indirect_base);
5386                 } else {
5387                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5388                                              output_pref(op, k),
5389                                              ts->indirect_base);
5390                 }
5391                 break;
5392 
5393             case 2: /* second of pair */
5394                 if (arg_ct->oalias) {
5395                     reg = new_args[arg_ct->alias_index];
5396                 } else {
5397                     reg = new_args[arg_ct->pair_index] + 1;
5398                 }
5399                 break;
5400 
5401             case 3: /* first of pair, aliasing with a second input */
5402                 tcg_debug_assert(!arg_ct->newreg);
5403                 reg = new_args[arg_ct->pair_index] - 1;
5404                 break;
5405 
5406             default:
5407                 g_assert_not_reached();
5408             }
5409             tcg_regset_set_reg(o_allocated_regs, reg);
5410             set_temp_val_reg(s, ts, reg);
5411             ts->mem_coherent = 0;
5412             new_args[i] = reg;
5413         }
5414     }
5415 
5416     /* emit instruction */
5417     switch (op->opc) {
5418     case INDEX_op_ext_i32_i64:
5419         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5420         break;
5421     case INDEX_op_extu_i32_i64:
5422         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5423         break;
5424     case INDEX_op_extrl_i64_i32:
5425         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5426         break;
5427     default:
5428         if (def->flags & TCG_OPF_VECTOR) {
5429             tcg_out_vec_op(s, op->opc, TCGOP_TYPE(op) - TCG_TYPE_V64,
5430                            TCGOP_VECE(op), new_args, const_args);
5431         } else {
5432             tcg_out_op(s, op->opc, TCGOP_TYPE(op), new_args, const_args);
5433         }
5434         break;
5435     }
5436 
5437     /* move the outputs in the correct register if needed */
5438     for(i = 0; i < nb_oargs; i++) {
5439         ts = arg_temp(op->args[i]);
5440 
5441         /* ENV should not be modified.  */
5442         tcg_debug_assert(!temp_readonly(ts));
5443 
5444         if (NEED_SYNC_ARG(i)) {
5445             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5446         } else if (IS_DEAD_ARG(i)) {
5447             temp_dead(s, ts);
5448         }
5449     }
5450 }
5451 
5452 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5453 {
5454     const TCGLifeData arg_life = op->life;
5455     TCGTemp *ots, *itsl, *itsh;
5456     TCGType vtype = TCGOP_TYPE(op);
5457 
5458     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5459     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5460     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5461 
5462     ots = arg_temp(op->args[0]);
5463     itsl = arg_temp(op->args[1]);
5464     itsh = arg_temp(op->args[2]);
5465 
5466     /* ENV should not be modified.  */
5467     tcg_debug_assert(!temp_readonly(ots));
5468 
5469     /* Allocate the output register now.  */
5470     if (ots->val_type != TEMP_VAL_REG) {
5471         TCGRegSet allocated_regs = s->reserved_regs;
5472         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5473         TCGReg oreg;
5474 
5475         /* Make sure to not spill the input registers. */
5476         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5477             tcg_regset_set_reg(allocated_regs, itsl->reg);
5478         }
5479         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5480             tcg_regset_set_reg(allocated_regs, itsh->reg);
5481         }
5482 
5483         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5484                              output_pref(op, 0), ots->indirect_base);
5485         set_temp_val_reg(s, ots, oreg);
5486     }
5487 
5488     /* Promote dup2 of immediates to dupi_vec. */
5489     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5490         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5491         MemOp vece = MO_64;
5492 
5493         if (val == dup_const(MO_8, val)) {
5494             vece = MO_8;
5495         } else if (val == dup_const(MO_16, val)) {
5496             vece = MO_16;
5497         } else if (val == dup_const(MO_32, val)) {
5498             vece = MO_32;
5499         }
5500 
5501         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5502         goto done;
5503     }
5504 
5505     /* If the two inputs form one 64-bit value, try dupm_vec. */
5506     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5507         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5508         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5509         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5510 
5511         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5512         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5513 
5514         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5515                              its->mem_base->reg, its->mem_offset)) {
5516             goto done;
5517         }
5518     }
5519 
5520     /* Fall back to generic expansion. */
5521     return false;
5522 
5523  done:
5524     ots->mem_coherent = 0;
5525     if (IS_DEAD_ARG(1)) {
5526         temp_dead(s, itsl);
5527     }
5528     if (IS_DEAD_ARG(2)) {
5529         temp_dead(s, itsh);
5530     }
5531     if (NEED_SYNC_ARG(0)) {
5532         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5533     } else if (IS_DEAD_ARG(0)) {
5534         temp_dead(s, ots);
5535     }
5536     return true;
5537 }
5538 
5539 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5540                          TCGRegSet allocated_regs)
5541 {
5542     if (ts->val_type == TEMP_VAL_REG) {
5543         if (ts->reg != reg) {
5544             tcg_reg_free(s, reg, allocated_regs);
5545             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5546                 /*
5547                  * Cross register class move not supported.  Sync the
5548                  * temp back to its slot and load from there.
5549                  */
5550                 temp_sync(s, ts, allocated_regs, 0, 0);
5551                 tcg_out_ld(s, ts->type, reg,
5552                            ts->mem_base->reg, ts->mem_offset);
5553             }
5554         }
5555     } else {
5556         TCGRegSet arg_set = 0;
5557 
5558         tcg_reg_free(s, reg, allocated_regs);
5559         tcg_regset_set_reg(arg_set, reg);
5560         temp_load(s, ts, arg_set, allocated_regs, 0);
5561     }
5562 }
5563 
5564 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5565                          TCGRegSet allocated_regs)
5566 {
5567     /*
5568      * When the destination is on the stack, load up the temp and store.
5569      * If there are many call-saved registers, the temp might live to
5570      * see another use; otherwise it'll be discarded.
5571      */
5572     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5573     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5574                arg_slot_stk_ofs(arg_slot));
5575 }
5576 
5577 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5578                             TCGTemp *ts, TCGRegSet *allocated_regs)
5579 {
5580     if (arg_slot_reg_p(l->arg_slot)) {
5581         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5582         load_arg_reg(s, reg, ts, *allocated_regs);
5583         tcg_regset_set_reg(*allocated_regs, reg);
5584     } else {
5585         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5586     }
5587 }
5588 
5589 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5590                          intptr_t ref_off, TCGRegSet *allocated_regs)
5591 {
5592     TCGReg reg;
5593 
5594     if (arg_slot_reg_p(arg_slot)) {
5595         reg = tcg_target_call_iarg_regs[arg_slot];
5596         tcg_reg_free(s, reg, *allocated_regs);
5597         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5598         tcg_regset_set_reg(*allocated_regs, reg);
5599     } else {
5600         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5601                             *allocated_regs, 0, false);
5602         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5603         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5604                    arg_slot_stk_ofs(arg_slot));
5605     }
5606 }
5607 
5608 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5609 {
5610     const int nb_oargs = TCGOP_CALLO(op);
5611     const int nb_iargs = TCGOP_CALLI(op);
5612     const TCGLifeData arg_life = op->life;
5613     const TCGHelperInfo *info = tcg_call_info(op);
5614     TCGRegSet allocated_regs = s->reserved_regs;
5615     int i;
5616 
5617     /*
5618      * Move inputs into place in reverse order,
5619      * so that we place stacked arguments first.
5620      */
5621     for (i = nb_iargs - 1; i >= 0; --i) {
5622         const TCGCallArgumentLoc *loc = &info->in[i];
5623         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5624 
5625         switch (loc->kind) {
5626         case TCG_CALL_ARG_NORMAL:
5627         case TCG_CALL_ARG_EXTEND_U:
5628         case TCG_CALL_ARG_EXTEND_S:
5629             load_arg_normal(s, loc, ts, &allocated_regs);
5630             break;
5631         case TCG_CALL_ARG_BY_REF:
5632             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5633             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5634                          arg_slot_stk_ofs(loc->ref_slot),
5635                          &allocated_regs);
5636             break;
5637         case TCG_CALL_ARG_BY_REF_N:
5638             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5639             break;
5640         default:
5641             g_assert_not_reached();
5642         }
5643     }
5644 
5645     /* Mark dead temporaries and free the associated registers.  */
5646     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5647         if (IS_DEAD_ARG(i)) {
5648             temp_dead(s, arg_temp(op->args[i]));
5649         }
5650     }
5651 
5652     /* Clobber call registers.  */
5653     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5654         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5655             tcg_reg_free(s, i, allocated_regs);
5656         }
5657     }
5658 
5659     /*
5660      * Save globals if they might be written by the helper,
5661      * sync them if they might be read.
5662      */
5663     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5664         /* Nothing to do */
5665     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5666         sync_globals(s, allocated_regs);
5667     } else {
5668         save_globals(s, allocated_regs);
5669     }
5670 
5671     /*
5672      * If the ABI passes a pointer to the returned struct as the first
5673      * argument, load that now.  Pass a pointer to the output home slot.
5674      */
5675     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5676         TCGTemp *ts = arg_temp(op->args[0]);
5677 
5678         if (!ts->mem_allocated) {
5679             temp_allocate_frame(s, ts);
5680         }
5681         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5682     }
5683 
5684     tcg_out_call(s, tcg_call_func(op), info);
5685 
5686     /* Assign output registers and emit moves if needed.  */
5687     switch (info->out_kind) {
5688     case TCG_CALL_RET_NORMAL:
5689         for (i = 0; i < nb_oargs; i++) {
5690             TCGTemp *ts = arg_temp(op->args[i]);
5691             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5692 
5693             /* ENV should not be modified.  */
5694             tcg_debug_assert(!temp_readonly(ts));
5695 
5696             set_temp_val_reg(s, ts, reg);
5697             ts->mem_coherent = 0;
5698         }
5699         break;
5700 
5701     case TCG_CALL_RET_BY_VEC:
5702         {
5703             TCGTemp *ts = arg_temp(op->args[0]);
5704 
5705             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5706             tcg_debug_assert(ts->temp_subindex == 0);
5707             if (!ts->mem_allocated) {
5708                 temp_allocate_frame(s, ts);
5709             }
5710             tcg_out_st(s, TCG_TYPE_V128,
5711                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5712                        ts->mem_base->reg, ts->mem_offset);
5713         }
5714         /* fall through to mark all parts in memory */
5715 
5716     case TCG_CALL_RET_BY_REF:
5717         /* The callee has performed a write through the reference. */
5718         for (i = 0; i < nb_oargs; i++) {
5719             TCGTemp *ts = arg_temp(op->args[i]);
5720             ts->val_type = TEMP_VAL_MEM;
5721         }
5722         break;
5723 
5724     default:
5725         g_assert_not_reached();
5726     }
5727 
5728     /* Flush or discard output registers as needed. */
5729     for (i = 0; i < nb_oargs; i++) {
5730         TCGTemp *ts = arg_temp(op->args[i]);
5731         if (NEED_SYNC_ARG(i)) {
5732             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5733         } else if (IS_DEAD_ARG(i)) {
5734             temp_dead(s, ts);
5735         }
5736     }
5737 }
5738 
5739 /**
5740  * atom_and_align_for_opc:
5741  * @s: tcg context
5742  * @opc: memory operation code
5743  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5744  * @allow_two_ops: true if we are prepared to issue two operations
5745  *
5746  * Return the alignment and atomicity to use for the inline fast path
5747  * for the given memory operation.  The alignment may be larger than
5748  * that specified in @opc, and the correct alignment will be diagnosed
5749  * by the slow path helper.
5750  *
5751  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5752  * and issue two loads or stores for subalignment.
5753  */
5754 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5755                                            MemOp host_atom, bool allow_two_ops)
5756 {
5757     MemOp align = memop_alignment_bits(opc);
5758     MemOp size = opc & MO_SIZE;
5759     MemOp half = size ? size - 1 : 0;
5760     MemOp atom = opc & MO_ATOM_MASK;
5761     MemOp atmax;
5762 
5763     switch (atom) {
5764     case MO_ATOM_NONE:
5765         /* The operation requires no specific atomicity. */
5766         atmax = MO_8;
5767         break;
5768 
5769     case MO_ATOM_IFALIGN:
5770         atmax = size;
5771         break;
5772 
5773     case MO_ATOM_IFALIGN_PAIR:
5774         atmax = half;
5775         break;
5776 
5777     case MO_ATOM_WITHIN16:
5778         atmax = size;
5779         if (size == MO_128) {
5780             /* Misalignment implies !within16, and therefore no atomicity. */
5781         } else if (host_atom != MO_ATOM_WITHIN16) {
5782             /* The host does not implement within16, so require alignment. */
5783             align = MAX(align, size);
5784         }
5785         break;
5786 
5787     case MO_ATOM_WITHIN16_PAIR:
5788         atmax = size;
5789         /*
5790          * Misalignment implies !within16, and therefore half atomicity.
5791          * Any host prepared for two operations can implement this with
5792          * half alignment.
5793          */
5794         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5795             align = MAX(align, half);
5796         }
5797         break;
5798 
5799     case MO_ATOM_SUBALIGN:
5800         atmax = size;
5801         if (host_atom != MO_ATOM_SUBALIGN) {
5802             /* If unaligned but not odd, there are subobjects up to half. */
5803             if (allow_two_ops) {
5804                 align = MAX(align, half);
5805             } else {
5806                 align = MAX(align, size);
5807             }
5808         }
5809         break;
5810 
5811     default:
5812         g_assert_not_reached();
5813     }
5814 
5815     return (TCGAtomAlign){ .atom = atmax, .align = align };
5816 }
5817 
5818 /*
5819  * Similarly for qemu_ld/st slow path helpers.
5820  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5821  * using only the provided backend tcg_out_* functions.
5822  */
5823 
5824 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5825 {
5826     int ofs = arg_slot_stk_ofs(slot);
5827 
5828     /*
5829      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5830      * require extension to uint64_t, adjust the address for uint32_t.
5831      */
5832     if (HOST_BIG_ENDIAN &&
5833         TCG_TARGET_REG_BITS == 64 &&
5834         type == TCG_TYPE_I32) {
5835         ofs += 4;
5836     }
5837     return ofs;
5838 }
5839 
5840 static void tcg_out_helper_load_slots(TCGContext *s,
5841                                       unsigned nmov, TCGMovExtend *mov,
5842                                       const TCGLdstHelperParam *parm)
5843 {
5844     unsigned i;
5845     TCGReg dst3;
5846 
5847     /*
5848      * Start from the end, storing to the stack first.
5849      * This frees those registers, so we need not consider overlap.
5850      */
5851     for (i = nmov; i-- > 0; ) {
5852         unsigned slot = mov[i].dst;
5853 
5854         if (arg_slot_reg_p(slot)) {
5855             goto found_reg;
5856         }
5857 
5858         TCGReg src = mov[i].src;
5859         TCGType dst_type = mov[i].dst_type;
5860         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5861 
5862         /* The argument is going onto the stack; extend into scratch. */
5863         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5864             tcg_debug_assert(parm->ntmp != 0);
5865             mov[i].dst = src = parm->tmp[0];
5866             tcg_out_movext1(s, &mov[i]);
5867         }
5868 
5869         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5870                    tcg_out_helper_stk_ofs(dst_type, slot));
5871     }
5872     return;
5873 
5874  found_reg:
5875     /*
5876      * The remaining arguments are in registers.
5877      * Convert slot numbers to argument registers.
5878      */
5879     nmov = i + 1;
5880     for (i = 0; i < nmov; ++i) {
5881         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5882     }
5883 
5884     switch (nmov) {
5885     case 4:
5886         /* The backend must have provided enough temps for the worst case. */
5887         tcg_debug_assert(parm->ntmp >= 2);
5888 
5889         dst3 = mov[3].dst;
5890         for (unsigned j = 0; j < 3; ++j) {
5891             if (dst3 == mov[j].src) {
5892                 /*
5893                  * Conflict. Copy the source to a temporary, perform the
5894                  * remaining moves, then the extension from our scratch
5895                  * on the way out.
5896                  */
5897                 TCGReg scratch = parm->tmp[1];
5898 
5899                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5900                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5901                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5902                 break;
5903             }
5904         }
5905 
5906         /* No conflicts: perform this move and continue. */
5907         tcg_out_movext1(s, &mov[3]);
5908         /* fall through */
5909 
5910     case 3:
5911         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5912                         parm->ntmp ? parm->tmp[0] : -1);
5913         break;
5914     case 2:
5915         tcg_out_movext2(s, mov, mov + 1,
5916                         parm->ntmp ? parm->tmp[0] : -1);
5917         break;
5918     case 1:
5919         tcg_out_movext1(s, mov);
5920         break;
5921     default:
5922         g_assert_not_reached();
5923     }
5924 }
5925 
5926 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5927                                     TCGType type, tcg_target_long imm,
5928                                     const TCGLdstHelperParam *parm)
5929 {
5930     if (arg_slot_reg_p(slot)) {
5931         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5932     } else {
5933         int ofs = tcg_out_helper_stk_ofs(type, slot);
5934         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5935             tcg_debug_assert(parm->ntmp != 0);
5936             tcg_out_movi(s, type, parm->tmp[0], imm);
5937             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5938         }
5939     }
5940 }
5941 
5942 static void tcg_out_helper_load_common_args(TCGContext *s,
5943                                             const TCGLabelQemuLdst *ldst,
5944                                             const TCGLdstHelperParam *parm,
5945                                             const TCGHelperInfo *info,
5946                                             unsigned next_arg)
5947 {
5948     TCGMovExtend ptr_mov = {
5949         .dst_type = TCG_TYPE_PTR,
5950         .src_type = TCG_TYPE_PTR,
5951         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5952     };
5953     const TCGCallArgumentLoc *loc = &info->in[0];
5954     TCGType type;
5955     unsigned slot;
5956     tcg_target_ulong imm;
5957 
5958     /*
5959      * Handle env, which is always first.
5960      */
5961     ptr_mov.dst = loc->arg_slot;
5962     ptr_mov.src = TCG_AREG0;
5963     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5964 
5965     /*
5966      * Handle oi.
5967      */
5968     imm = ldst->oi;
5969     loc = &info->in[next_arg];
5970     type = TCG_TYPE_I32;
5971     switch (loc->kind) {
5972     case TCG_CALL_ARG_NORMAL:
5973         break;
5974     case TCG_CALL_ARG_EXTEND_U:
5975     case TCG_CALL_ARG_EXTEND_S:
5976         /* No extension required for MemOpIdx. */
5977         tcg_debug_assert(imm <= INT32_MAX);
5978         type = TCG_TYPE_REG;
5979         break;
5980     default:
5981         g_assert_not_reached();
5982     }
5983     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5984     next_arg++;
5985 
5986     /*
5987      * Handle ra.
5988      */
5989     loc = &info->in[next_arg];
5990     slot = loc->arg_slot;
5991     if (parm->ra_gen) {
5992         int arg_reg = -1;
5993         TCGReg ra_reg;
5994 
5995         if (arg_slot_reg_p(slot)) {
5996             arg_reg = tcg_target_call_iarg_regs[slot];
5997         }
5998         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5999 
6000         ptr_mov.dst = slot;
6001         ptr_mov.src = ra_reg;
6002         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6003     } else {
6004         imm = (uintptr_t)ldst->raddr;
6005         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6006     }
6007 }
6008 
6009 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6010                                        const TCGCallArgumentLoc *loc,
6011                                        TCGType dst_type, TCGType src_type,
6012                                        TCGReg lo, TCGReg hi)
6013 {
6014     MemOp reg_mo;
6015 
6016     if (dst_type <= TCG_TYPE_REG) {
6017         MemOp src_ext;
6018 
6019         switch (loc->kind) {
6020         case TCG_CALL_ARG_NORMAL:
6021             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6022             break;
6023         case TCG_CALL_ARG_EXTEND_U:
6024             dst_type = TCG_TYPE_REG;
6025             src_ext = MO_UL;
6026             break;
6027         case TCG_CALL_ARG_EXTEND_S:
6028             dst_type = TCG_TYPE_REG;
6029             src_ext = MO_SL;
6030             break;
6031         default:
6032             g_assert_not_reached();
6033         }
6034 
6035         mov[0].dst = loc->arg_slot;
6036         mov[0].dst_type = dst_type;
6037         mov[0].src = lo;
6038         mov[0].src_type = src_type;
6039         mov[0].src_ext = src_ext;
6040         return 1;
6041     }
6042 
6043     if (TCG_TARGET_REG_BITS == 32) {
6044         assert(dst_type == TCG_TYPE_I64);
6045         reg_mo = MO_32;
6046     } else {
6047         assert(dst_type == TCG_TYPE_I128);
6048         reg_mo = MO_64;
6049     }
6050 
6051     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6052     mov[0].src = lo;
6053     mov[0].dst_type = TCG_TYPE_REG;
6054     mov[0].src_type = TCG_TYPE_REG;
6055     mov[0].src_ext = reg_mo;
6056 
6057     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6058     mov[1].src = hi;
6059     mov[1].dst_type = TCG_TYPE_REG;
6060     mov[1].src_type = TCG_TYPE_REG;
6061     mov[1].src_ext = reg_mo;
6062 
6063     return 2;
6064 }
6065 
6066 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6067                                    const TCGLdstHelperParam *parm)
6068 {
6069     const TCGHelperInfo *info;
6070     const TCGCallArgumentLoc *loc;
6071     TCGMovExtend mov[2];
6072     unsigned next_arg, nmov;
6073     MemOp mop = get_memop(ldst->oi);
6074 
6075     switch (mop & MO_SIZE) {
6076     case MO_8:
6077     case MO_16:
6078     case MO_32:
6079         info = &info_helper_ld32_mmu;
6080         break;
6081     case MO_64:
6082         info = &info_helper_ld64_mmu;
6083         break;
6084     case MO_128:
6085         info = &info_helper_ld128_mmu;
6086         break;
6087     default:
6088         g_assert_not_reached();
6089     }
6090 
6091     /* Defer env argument. */
6092     next_arg = 1;
6093 
6094     loc = &info->in[next_arg];
6095     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6096         /*
6097          * 32-bit host with 32-bit guest: zero-extend the guest address
6098          * to 64-bits for the helper by storing the low part, then
6099          * load a zero for the high part.
6100          */
6101         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6102                                TCG_TYPE_I32, TCG_TYPE_I32,
6103                                ldst->addr_reg, -1);
6104         tcg_out_helper_load_slots(s, 1, mov, parm);
6105 
6106         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6107                                 TCG_TYPE_I32, 0, parm);
6108         next_arg += 2;
6109     } else {
6110         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6111                                       ldst->addr_reg, -1);
6112         tcg_out_helper_load_slots(s, nmov, mov, parm);
6113         next_arg += nmov;
6114     }
6115 
6116     switch (info->out_kind) {
6117     case TCG_CALL_RET_NORMAL:
6118     case TCG_CALL_RET_BY_VEC:
6119         break;
6120     case TCG_CALL_RET_BY_REF:
6121         /*
6122          * The return reference is in the first argument slot.
6123          * We need memory in which to return: re-use the top of stack.
6124          */
6125         {
6126             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6127 
6128             if (arg_slot_reg_p(0)) {
6129                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6130                                  TCG_REG_CALL_STACK, ofs_slot0);
6131             } else {
6132                 tcg_debug_assert(parm->ntmp != 0);
6133                 tcg_out_addi_ptr(s, parm->tmp[0],
6134                                  TCG_REG_CALL_STACK, ofs_slot0);
6135                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6136                            TCG_REG_CALL_STACK, ofs_slot0);
6137             }
6138         }
6139         break;
6140     default:
6141         g_assert_not_reached();
6142     }
6143 
6144     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6145 }
6146 
6147 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6148                                   bool load_sign,
6149                                   const TCGLdstHelperParam *parm)
6150 {
6151     MemOp mop = get_memop(ldst->oi);
6152     TCGMovExtend mov[2];
6153     int ofs_slot0;
6154 
6155     switch (ldst->type) {
6156     case TCG_TYPE_I64:
6157         if (TCG_TARGET_REG_BITS == 32) {
6158             break;
6159         }
6160         /* fall through */
6161 
6162     case TCG_TYPE_I32:
6163         mov[0].dst = ldst->datalo_reg;
6164         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6165         mov[0].dst_type = ldst->type;
6166         mov[0].src_type = TCG_TYPE_REG;
6167 
6168         /*
6169          * If load_sign, then we allowed the helper to perform the
6170          * appropriate sign extension to tcg_target_ulong, and all
6171          * we need now is a plain move.
6172          *
6173          * If they do not, then we expect the relevant extension
6174          * instruction to be no more expensive than a move, and
6175          * we thus save the icache etc by only using one of two
6176          * helper functions.
6177          */
6178         if (load_sign || !(mop & MO_SIGN)) {
6179             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6180                 mov[0].src_ext = MO_32;
6181             } else {
6182                 mov[0].src_ext = MO_64;
6183             }
6184         } else {
6185             mov[0].src_ext = mop & MO_SSIZE;
6186         }
6187         tcg_out_movext1(s, mov);
6188         return;
6189 
6190     case TCG_TYPE_I128:
6191         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6192         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6193         switch (TCG_TARGET_CALL_RET_I128) {
6194         case TCG_CALL_RET_NORMAL:
6195             break;
6196         case TCG_CALL_RET_BY_VEC:
6197             tcg_out_st(s, TCG_TYPE_V128,
6198                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6199                        TCG_REG_CALL_STACK, ofs_slot0);
6200             /* fall through */
6201         case TCG_CALL_RET_BY_REF:
6202             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6203                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6204             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6205                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6206             return;
6207         default:
6208             g_assert_not_reached();
6209         }
6210         break;
6211 
6212     default:
6213         g_assert_not_reached();
6214     }
6215 
6216     mov[0].dst = ldst->datalo_reg;
6217     mov[0].src =
6218         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6219     mov[0].dst_type = TCG_TYPE_REG;
6220     mov[0].src_type = TCG_TYPE_REG;
6221     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6222 
6223     mov[1].dst = ldst->datahi_reg;
6224     mov[1].src =
6225         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6226     mov[1].dst_type = TCG_TYPE_REG;
6227     mov[1].src_type = TCG_TYPE_REG;
6228     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6229 
6230     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6231 }
6232 
6233 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6234                                    const TCGLdstHelperParam *parm)
6235 {
6236     const TCGHelperInfo *info;
6237     const TCGCallArgumentLoc *loc;
6238     TCGMovExtend mov[4];
6239     TCGType data_type;
6240     unsigned next_arg, nmov, n;
6241     MemOp mop = get_memop(ldst->oi);
6242 
6243     switch (mop & MO_SIZE) {
6244     case MO_8:
6245     case MO_16:
6246     case MO_32:
6247         info = &info_helper_st32_mmu;
6248         data_type = TCG_TYPE_I32;
6249         break;
6250     case MO_64:
6251         info = &info_helper_st64_mmu;
6252         data_type = TCG_TYPE_I64;
6253         break;
6254     case MO_128:
6255         info = &info_helper_st128_mmu;
6256         data_type = TCG_TYPE_I128;
6257         break;
6258     default:
6259         g_assert_not_reached();
6260     }
6261 
6262     /* Defer env argument. */
6263     next_arg = 1;
6264     nmov = 0;
6265 
6266     /* Handle addr argument. */
6267     loc = &info->in[next_arg];
6268     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6269     if (TCG_TARGET_REG_BITS == 32) {
6270         /*
6271          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6272          * to 64-bits for the helper by storing the low part.  Later,
6273          * after we have processed the register inputs, we will load a
6274          * zero for the high part.
6275          */
6276         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6277                                TCG_TYPE_I32, TCG_TYPE_I32,
6278                                ldst->addr_reg, -1);
6279         next_arg += 2;
6280         nmov += 1;
6281     } else {
6282         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6283                                    ldst->addr_reg, -1);
6284         next_arg += n;
6285         nmov += n;
6286     }
6287 
6288     /* Handle data argument. */
6289     loc = &info->in[next_arg];
6290     switch (loc->kind) {
6291     case TCG_CALL_ARG_NORMAL:
6292     case TCG_CALL_ARG_EXTEND_U:
6293     case TCG_CALL_ARG_EXTEND_S:
6294         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6295                                    ldst->datalo_reg, ldst->datahi_reg);
6296         next_arg += n;
6297         nmov += n;
6298         tcg_out_helper_load_slots(s, nmov, mov, parm);
6299         break;
6300 
6301     case TCG_CALL_ARG_BY_REF:
6302         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6303         tcg_debug_assert(data_type == TCG_TYPE_I128);
6304         tcg_out_st(s, TCG_TYPE_I64,
6305                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6306                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6307         tcg_out_st(s, TCG_TYPE_I64,
6308                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6309                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6310 
6311         tcg_out_helper_load_slots(s, nmov, mov, parm);
6312 
6313         if (arg_slot_reg_p(loc->arg_slot)) {
6314             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6315                              TCG_REG_CALL_STACK,
6316                              arg_slot_stk_ofs(loc->ref_slot));
6317         } else {
6318             tcg_debug_assert(parm->ntmp != 0);
6319             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6320                              arg_slot_stk_ofs(loc->ref_slot));
6321             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6322                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6323         }
6324         next_arg += 2;
6325         break;
6326 
6327     default:
6328         g_assert_not_reached();
6329     }
6330 
6331     if (TCG_TARGET_REG_BITS == 32) {
6332         /* Zero extend the address by loading a zero for the high part. */
6333         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6334         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6335     }
6336 
6337     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6338 }
6339 
6340 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6341 {
6342     int i, start_words, num_insns;
6343     TCGOp *op;
6344 
6345     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6346                  && qemu_log_in_addr_range(pc_start))) {
6347         FILE *logfile = qemu_log_trylock();
6348         if (logfile) {
6349             fprintf(logfile, "OP:\n");
6350             tcg_dump_ops(s, logfile, false);
6351             fprintf(logfile, "\n");
6352             qemu_log_unlock(logfile);
6353         }
6354     }
6355 
6356 #ifdef CONFIG_DEBUG_TCG
6357     /* Ensure all labels referenced have been emitted.  */
6358     {
6359         TCGLabel *l;
6360         bool error = false;
6361 
6362         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6363             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6364                 qemu_log_mask(CPU_LOG_TB_OP,
6365                               "$L%d referenced but not present.\n", l->id);
6366                 error = true;
6367             }
6368         }
6369         assert(!error);
6370     }
6371 #endif
6372 
6373     /* Do not reuse any EBB that may be allocated within the TB. */
6374     tcg_temp_ebb_reset_freed(s);
6375 
6376     tcg_optimize(s);
6377 
6378     reachable_code_pass(s);
6379     liveness_pass_0(s);
6380     liveness_pass_1(s);
6381 
6382     if (s->nb_indirects > 0) {
6383         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6384                      && qemu_log_in_addr_range(pc_start))) {
6385             FILE *logfile = qemu_log_trylock();
6386             if (logfile) {
6387                 fprintf(logfile, "OP before indirect lowering:\n");
6388                 tcg_dump_ops(s, logfile, false);
6389                 fprintf(logfile, "\n");
6390                 qemu_log_unlock(logfile);
6391             }
6392         }
6393 
6394         /* Replace indirect temps with direct temps.  */
6395         if (liveness_pass_2(s)) {
6396             /* If changes were made, re-run liveness.  */
6397             liveness_pass_1(s);
6398         }
6399     }
6400 
6401     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6402                  && qemu_log_in_addr_range(pc_start))) {
6403         FILE *logfile = qemu_log_trylock();
6404         if (logfile) {
6405             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6406             tcg_dump_ops(s, logfile, true);
6407             fprintf(logfile, "\n");
6408             qemu_log_unlock(logfile);
6409         }
6410     }
6411 
6412     /* Initialize goto_tb jump offsets. */
6413     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6414     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6415     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6416     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6417 
6418     tcg_reg_alloc_start(s);
6419 
6420     /*
6421      * Reset the buffer pointers when restarting after overflow.
6422      * TODO: Move this into translate-all.c with the rest of the
6423      * buffer management.  Having only this done here is confusing.
6424      */
6425     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6426     s->code_ptr = s->code_buf;
6427     s->data_gen_ptr = NULL;
6428 
6429     QSIMPLEQ_INIT(&s->ldst_labels);
6430     s->pool_labels = NULL;
6431 
6432     start_words = s->insn_start_words;
6433     s->gen_insn_data =
6434         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6435 
6436     tcg_out_tb_start(s);
6437 
6438     num_insns = -1;
6439     QTAILQ_FOREACH(op, &s->ops, link) {
6440         TCGOpcode opc = op->opc;
6441 
6442         switch (opc) {
6443         case INDEX_op_mov:
6444         case INDEX_op_mov_vec:
6445             tcg_reg_alloc_mov(s, op);
6446             break;
6447         case INDEX_op_dup_vec:
6448             tcg_reg_alloc_dup(s, op);
6449             break;
6450         case INDEX_op_insn_start:
6451             if (num_insns >= 0) {
6452                 size_t off = tcg_current_code_size(s);
6453                 s->gen_insn_end_off[num_insns] = off;
6454                 /* Assert that we do not overflow our stored offset.  */
6455                 assert(s->gen_insn_end_off[num_insns] == off);
6456             }
6457             num_insns++;
6458             for (i = 0; i < start_words; ++i) {
6459                 s->gen_insn_data[num_insns * start_words + i] =
6460                     tcg_get_insn_start_param(op, i);
6461             }
6462             break;
6463         case INDEX_op_discard:
6464             temp_dead(s, arg_temp(op->args[0]));
6465             break;
6466         case INDEX_op_set_label:
6467             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6468             tcg_out_label(s, arg_label(op->args[0]));
6469             break;
6470         case INDEX_op_call:
6471             tcg_reg_alloc_call(s, op);
6472             break;
6473         case INDEX_op_exit_tb:
6474             tcg_out_exit_tb(s, op->args[0]);
6475             break;
6476         case INDEX_op_goto_tb:
6477             tcg_out_goto_tb(s, op->args[0]);
6478             break;
6479         case INDEX_op_dup2_vec:
6480             if (tcg_reg_alloc_dup2(s, op)) {
6481                 break;
6482             }
6483             /* fall through */
6484         default:
6485             /* Sanity check that we've not introduced any unhandled opcodes. */
6486             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6487                                               TCGOP_FLAGS(op)));
6488             /* Note: in order to speed up the code, it would be much
6489                faster to have specialized register allocator functions for
6490                some common argument patterns */
6491             tcg_reg_alloc_op(s, op);
6492             break;
6493         }
6494         /* Test for (pending) buffer overflow.  The assumption is that any
6495            one operation beginning below the high water mark cannot overrun
6496            the buffer completely.  Thus we can test for overflow after
6497            generating code without having to check during generation.  */
6498         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6499             return -1;
6500         }
6501         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6502         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6503             return -2;
6504         }
6505     }
6506     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6507     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6508 
6509     /* Generate TB finalization at the end of block */
6510     i = tcg_out_ldst_finalize(s);
6511     if (i < 0) {
6512         return i;
6513     }
6514     i = tcg_out_pool_finalize(s);
6515     if (i < 0) {
6516         return i;
6517     }
6518     if (!tcg_resolve_relocs(s)) {
6519         return -2;
6520     }
6521 
6522 #ifndef CONFIG_TCG_INTERPRETER
6523     /* flush instruction cache */
6524     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6525                         (uintptr_t)s->code_buf,
6526                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6527 #endif
6528 
6529     return tcg_current_code_size(s);
6530 }
6531 
6532 #ifdef ELF_HOST_MACHINE
6533 /* In order to use this feature, the backend needs to do three things:
6534 
6535    (1) Define ELF_HOST_MACHINE to indicate both what value to
6536        put into the ELF image and to indicate support for the feature.
6537 
6538    (2) Define tcg_register_jit.  This should create a buffer containing
6539        the contents of a .debug_frame section that describes the post-
6540        prologue unwind info for the tcg machine.
6541 
6542    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6543 */
6544 
6545 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6546 typedef enum {
6547     JIT_NOACTION = 0,
6548     JIT_REGISTER_FN,
6549     JIT_UNREGISTER_FN
6550 } jit_actions_t;
6551 
6552 struct jit_code_entry {
6553     struct jit_code_entry *next_entry;
6554     struct jit_code_entry *prev_entry;
6555     const void *symfile_addr;
6556     uint64_t symfile_size;
6557 };
6558 
6559 struct jit_descriptor {
6560     uint32_t version;
6561     uint32_t action_flag;
6562     struct jit_code_entry *relevant_entry;
6563     struct jit_code_entry *first_entry;
6564 };
6565 
6566 void __jit_debug_register_code(void) __attribute__((noinline));
6567 void __jit_debug_register_code(void)
6568 {
6569     asm("");
6570 }
6571 
6572 /* Must statically initialize the version, because GDB may check
6573    the version before we can set it.  */
6574 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6575 
6576 /* End GDB interface.  */
6577 
6578 static int find_string(const char *strtab, const char *str)
6579 {
6580     const char *p = strtab + 1;
6581 
6582     while (1) {
6583         if (strcmp(p, str) == 0) {
6584             return p - strtab;
6585         }
6586         p += strlen(p) + 1;
6587     }
6588 }
6589 
6590 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6591                                  const void *debug_frame,
6592                                  size_t debug_frame_size)
6593 {
6594     struct __attribute__((packed)) DebugInfo {
6595         uint32_t  len;
6596         uint16_t  version;
6597         uint32_t  abbrev;
6598         uint8_t   ptr_size;
6599         uint8_t   cu_die;
6600         uint16_t  cu_lang;
6601         uintptr_t cu_low_pc;
6602         uintptr_t cu_high_pc;
6603         uint8_t   fn_die;
6604         char      fn_name[16];
6605         uintptr_t fn_low_pc;
6606         uintptr_t fn_high_pc;
6607         uint8_t   cu_eoc;
6608     };
6609 
6610     struct ElfImage {
6611         ElfW(Ehdr) ehdr;
6612         ElfW(Phdr) phdr;
6613         ElfW(Shdr) shdr[7];
6614         ElfW(Sym)  sym[2];
6615         struct DebugInfo di;
6616         uint8_t    da[24];
6617         char       str[80];
6618     };
6619 
6620     struct ElfImage *img;
6621 
6622     static const struct ElfImage img_template = {
6623         .ehdr = {
6624             .e_ident[EI_MAG0] = ELFMAG0,
6625             .e_ident[EI_MAG1] = ELFMAG1,
6626             .e_ident[EI_MAG2] = ELFMAG2,
6627             .e_ident[EI_MAG3] = ELFMAG3,
6628             .e_ident[EI_CLASS] = ELF_CLASS,
6629             .e_ident[EI_DATA] = ELF_DATA,
6630             .e_ident[EI_VERSION] = EV_CURRENT,
6631             .e_type = ET_EXEC,
6632             .e_machine = ELF_HOST_MACHINE,
6633             .e_version = EV_CURRENT,
6634             .e_phoff = offsetof(struct ElfImage, phdr),
6635             .e_shoff = offsetof(struct ElfImage, shdr),
6636             .e_ehsize = sizeof(ElfW(Shdr)),
6637             .e_phentsize = sizeof(ElfW(Phdr)),
6638             .e_phnum = 1,
6639             .e_shentsize = sizeof(ElfW(Shdr)),
6640             .e_shnum = ARRAY_SIZE(img->shdr),
6641             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6642 #ifdef ELF_HOST_FLAGS
6643             .e_flags = ELF_HOST_FLAGS,
6644 #endif
6645 #ifdef ELF_OSABI
6646             .e_ident[EI_OSABI] = ELF_OSABI,
6647 #endif
6648         },
6649         .phdr = {
6650             .p_type = PT_LOAD,
6651             .p_flags = PF_X,
6652         },
6653         .shdr = {
6654             [0] = { .sh_type = SHT_NULL },
6655             /* Trick: The contents of code_gen_buffer are not present in
6656                this fake ELF file; that got allocated elsewhere.  Therefore
6657                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6658                will not look for contents.  We can record any address.  */
6659             [1] = { /* .text */
6660                 .sh_type = SHT_NOBITS,
6661                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6662             },
6663             [2] = { /* .debug_info */
6664                 .sh_type = SHT_PROGBITS,
6665                 .sh_offset = offsetof(struct ElfImage, di),
6666                 .sh_size = sizeof(struct DebugInfo),
6667             },
6668             [3] = { /* .debug_abbrev */
6669                 .sh_type = SHT_PROGBITS,
6670                 .sh_offset = offsetof(struct ElfImage, da),
6671                 .sh_size = sizeof(img->da),
6672             },
6673             [4] = { /* .debug_frame */
6674                 .sh_type = SHT_PROGBITS,
6675                 .sh_offset = sizeof(struct ElfImage),
6676             },
6677             [5] = { /* .symtab */
6678                 .sh_type = SHT_SYMTAB,
6679                 .sh_offset = offsetof(struct ElfImage, sym),
6680                 .sh_size = sizeof(img->sym),
6681                 .sh_info = 1,
6682                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6683                 .sh_entsize = sizeof(ElfW(Sym)),
6684             },
6685             [6] = { /* .strtab */
6686                 .sh_type = SHT_STRTAB,
6687                 .sh_offset = offsetof(struct ElfImage, str),
6688                 .sh_size = sizeof(img->str),
6689             }
6690         },
6691         .sym = {
6692             [1] = { /* code_gen_buffer */
6693                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6694                 .st_shndx = 1,
6695             }
6696         },
6697         .di = {
6698             .len = sizeof(struct DebugInfo) - 4,
6699             .version = 2,
6700             .ptr_size = sizeof(void *),
6701             .cu_die = 1,
6702             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6703             .fn_die = 2,
6704             .fn_name = "code_gen_buffer"
6705         },
6706         .da = {
6707             1,          /* abbrev number (the cu) */
6708             0x11, 1,    /* DW_TAG_compile_unit, has children */
6709             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6710             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6711             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6712             0, 0,       /* end of abbrev */
6713             2,          /* abbrev number (the fn) */
6714             0x2e, 0,    /* DW_TAG_subprogram, no children */
6715             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6716             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6717             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6718             0, 0,       /* end of abbrev */
6719             0           /* no more abbrev */
6720         },
6721         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6722                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6723     };
6724 
6725     /* We only need a single jit entry; statically allocate it.  */
6726     static struct jit_code_entry one_entry;
6727 
6728     uintptr_t buf = (uintptr_t)buf_ptr;
6729     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6730     DebugFrameHeader *dfh;
6731 
6732     img = g_malloc(img_size);
6733     *img = img_template;
6734 
6735     img->phdr.p_vaddr = buf;
6736     img->phdr.p_paddr = buf;
6737     img->phdr.p_memsz = buf_size;
6738 
6739     img->shdr[1].sh_name = find_string(img->str, ".text");
6740     img->shdr[1].sh_addr = buf;
6741     img->shdr[1].sh_size = buf_size;
6742 
6743     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6744     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6745 
6746     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6747     img->shdr[4].sh_size = debug_frame_size;
6748 
6749     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6750     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6751 
6752     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6753     img->sym[1].st_value = buf;
6754     img->sym[1].st_size = buf_size;
6755 
6756     img->di.cu_low_pc = buf;
6757     img->di.cu_high_pc = buf + buf_size;
6758     img->di.fn_low_pc = buf;
6759     img->di.fn_high_pc = buf + buf_size;
6760 
6761     dfh = (DebugFrameHeader *)(img + 1);
6762     memcpy(dfh, debug_frame, debug_frame_size);
6763     dfh->fde.func_start = buf;
6764     dfh->fde.func_len = buf_size;
6765 
6766 #ifdef DEBUG_JIT
6767     /* Enable this block to be able to debug the ELF image file creation.
6768        One can use readelf, objdump, or other inspection utilities.  */
6769     {
6770         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6771         FILE *f = fopen(jit, "w+b");
6772         if (f) {
6773             if (fwrite(img, img_size, 1, f) != img_size) {
6774                 /* Avoid stupid unused return value warning for fwrite.  */
6775             }
6776             fclose(f);
6777         }
6778     }
6779 #endif
6780 
6781     one_entry.symfile_addr = img;
6782     one_entry.symfile_size = img_size;
6783 
6784     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6785     __jit_debug_descriptor.relevant_entry = &one_entry;
6786     __jit_debug_descriptor.first_entry = &one_entry;
6787     __jit_debug_register_code();
6788 }
6789 #else
6790 /* No support for the feature.  Provide the entry point expected by exec.c,
6791    and implement the internal function we declared earlier.  */
6792 
6793 static void tcg_register_jit_int(const void *buf, size_t size,
6794                                  const void *debug_frame,
6795                                  size_t debug_frame_size)
6796 {
6797 }
6798 
6799 void tcg_register_jit(const void *buf, size_t buf_size)
6800 {
6801 }
6802 #endif /* ELF_HOST_MACHINE */
6803 
6804 #if !TCG_TARGET_MAYBE_vec
6805 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6806 {
6807     g_assert_not_reached();
6808 }
6809 #endif
6810