xref: /openbmc/qemu/tcg/tcg.c (revision 3a8c4e9e53c6f4aa7c590971950000b174e74fa1)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 #include "tcg-target.c.inc"
990 
991 #ifndef CONFIG_TCG_INTERPRETER
992 /* Validate CPUTLBDescFast placement. */
993 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
994                         sizeof(CPUNegativeOffsetState))
995                   < MIN_TLB_MASK_TABLE_OFS);
996 #endif
997 
998 /*
999  * Register V as the TCGOutOp for O.
1000  * This verifies that V is of type T, otherwise give a nice compiler error.
1001  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1002  */
1003 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1004 
1005 /* Register allocation descriptions for every TCGOpcode. */
1006 static const TCGOutOp * const all_outop[NB_OPS] = {
1007     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1008     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1009     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1010     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1011     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1012     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1013     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1014     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1015     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1016 };
1017 
1018 #undef OUTOP
1019 
1020 /*
1021  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1022  * and registered the target's TCG globals) must register with this function
1023  * before initiating translation.
1024  *
1025  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1026  * of tcg_region_init() for the reasoning behind this.
1027  *
1028  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1029  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1030  * is not used anymore for translation once this function is called.
1031  *
1032  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1033  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1034  * modes.
1035  */
1036 #ifdef CONFIG_USER_ONLY
1037 void tcg_register_thread(void)
1038 {
1039     tcg_ctx = &tcg_init_ctx;
1040 }
1041 #else
1042 void tcg_register_thread(void)
1043 {
1044     TCGContext *s = g_malloc(sizeof(*s));
1045     unsigned int i, n;
1046 
1047     *s = tcg_init_ctx;
1048 
1049     /* Relink mem_base.  */
1050     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1051         if (tcg_init_ctx.temps[i].mem_base) {
1052             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1053             tcg_debug_assert(b >= 0 && b < n);
1054             s->temps[i].mem_base = &s->temps[b];
1055         }
1056     }
1057 
1058     /* Claim an entry in tcg_ctxs */
1059     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1060     g_assert(n < tcg_max_ctxs);
1061     qatomic_set(&tcg_ctxs[n], s);
1062 
1063     if (n > 0) {
1064         tcg_region_initial_alloc(s);
1065     }
1066 
1067     tcg_ctx = s;
1068 }
1069 #endif /* !CONFIG_USER_ONLY */
1070 
1071 /* pool based memory allocation */
1072 void *tcg_malloc_internal(TCGContext *s, int size)
1073 {
1074     TCGPool *p;
1075     int pool_size;
1076 
1077     if (size > TCG_POOL_CHUNK_SIZE) {
1078         /* big malloc: insert a new pool (XXX: could optimize) */
1079         p = g_malloc(sizeof(TCGPool) + size);
1080         p->size = size;
1081         p->next = s->pool_first_large;
1082         s->pool_first_large = p;
1083         return p->data;
1084     } else {
1085         p = s->pool_current;
1086         if (!p) {
1087             p = s->pool_first;
1088             if (!p)
1089                 goto new_pool;
1090         } else {
1091             if (!p->next) {
1092             new_pool:
1093                 pool_size = TCG_POOL_CHUNK_SIZE;
1094                 p = g_malloc(sizeof(TCGPool) + pool_size);
1095                 p->size = pool_size;
1096                 p->next = NULL;
1097                 if (s->pool_current) {
1098                     s->pool_current->next = p;
1099                 } else {
1100                     s->pool_first = p;
1101                 }
1102             } else {
1103                 p = p->next;
1104             }
1105         }
1106     }
1107     s->pool_current = p;
1108     s->pool_cur = p->data + size;
1109     s->pool_end = p->data + p->size;
1110     return p->data;
1111 }
1112 
1113 void tcg_pool_reset(TCGContext *s)
1114 {
1115     TCGPool *p, *t;
1116     for (p = s->pool_first_large; p; p = t) {
1117         t = p->next;
1118         g_free(p);
1119     }
1120     s->pool_first_large = NULL;
1121     s->pool_cur = s->pool_end = NULL;
1122     s->pool_current = NULL;
1123 }
1124 
1125 /*
1126  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1127  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1128  * We only use these for layout in tcg_out_ld_helper_ret and
1129  * tcg_out_st_helper_args, and share them between several of
1130  * the helpers, with the end result that it's easier to build manually.
1131  */
1132 
1133 #if TCG_TARGET_REG_BITS == 32
1134 # define dh_typecode_ttl  dh_typecode_i32
1135 #else
1136 # define dh_typecode_ttl  dh_typecode_i64
1137 #endif
1138 
1139 static TCGHelperInfo info_helper_ld32_mmu = {
1140     .flags = TCG_CALL_NO_WG,
1141     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1142               | dh_typemask(env, 1)
1143               | dh_typemask(i64, 2)  /* uint64_t addr */
1144               | dh_typemask(i32, 3)  /* unsigned oi */
1145               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1146 };
1147 
1148 static TCGHelperInfo info_helper_ld64_mmu = {
1149     .flags = TCG_CALL_NO_WG,
1150     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1151               | dh_typemask(env, 1)
1152               | dh_typemask(i64, 2)  /* uint64_t addr */
1153               | dh_typemask(i32, 3)  /* unsigned oi */
1154               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1155 };
1156 
1157 static TCGHelperInfo info_helper_ld128_mmu = {
1158     .flags = TCG_CALL_NO_WG,
1159     .typemask = dh_typemask(i128, 0) /* return Int128 */
1160               | dh_typemask(env, 1)
1161               | dh_typemask(i64, 2)  /* uint64_t addr */
1162               | dh_typemask(i32, 3)  /* unsigned oi */
1163               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1164 };
1165 
1166 static TCGHelperInfo info_helper_st32_mmu = {
1167     .flags = TCG_CALL_NO_WG,
1168     .typemask = dh_typemask(void, 0)
1169               | dh_typemask(env, 1)
1170               | dh_typemask(i64, 2)  /* uint64_t addr */
1171               | dh_typemask(i32, 3)  /* uint32_t data */
1172               | dh_typemask(i32, 4)  /* unsigned oi */
1173               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1174 };
1175 
1176 static TCGHelperInfo info_helper_st64_mmu = {
1177     .flags = TCG_CALL_NO_WG,
1178     .typemask = dh_typemask(void, 0)
1179               | dh_typemask(env, 1)
1180               | dh_typemask(i64, 2)  /* uint64_t addr */
1181               | dh_typemask(i64, 3)  /* uint64_t data */
1182               | dh_typemask(i32, 4)  /* unsigned oi */
1183               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1184 };
1185 
1186 static TCGHelperInfo info_helper_st128_mmu = {
1187     .flags = TCG_CALL_NO_WG,
1188     .typemask = dh_typemask(void, 0)
1189               | dh_typemask(env, 1)
1190               | dh_typemask(i64, 2)  /* uint64_t addr */
1191               | dh_typemask(i128, 3) /* Int128 data */
1192               | dh_typemask(i32, 4)  /* unsigned oi */
1193               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1194 };
1195 
1196 #ifdef CONFIG_TCG_INTERPRETER
1197 static ffi_type *typecode_to_ffi(int argmask)
1198 {
1199     /*
1200      * libffi does not support __int128_t, so we have forced Int128
1201      * to use the structure definition instead of the builtin type.
1202      */
1203     static ffi_type *ffi_type_i128_elements[3] = {
1204         &ffi_type_uint64,
1205         &ffi_type_uint64,
1206         NULL
1207     };
1208     static ffi_type ffi_type_i128 = {
1209         .size = 16,
1210         .alignment = __alignof__(Int128),
1211         .type = FFI_TYPE_STRUCT,
1212         .elements = ffi_type_i128_elements,
1213     };
1214 
1215     switch (argmask) {
1216     case dh_typecode_void:
1217         return &ffi_type_void;
1218     case dh_typecode_i32:
1219         return &ffi_type_uint32;
1220     case dh_typecode_s32:
1221         return &ffi_type_sint32;
1222     case dh_typecode_i64:
1223         return &ffi_type_uint64;
1224     case dh_typecode_s64:
1225         return &ffi_type_sint64;
1226     case dh_typecode_ptr:
1227         return &ffi_type_pointer;
1228     case dh_typecode_i128:
1229         return &ffi_type_i128;
1230     }
1231     g_assert_not_reached();
1232 }
1233 
1234 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1235 {
1236     unsigned typemask = info->typemask;
1237     struct {
1238         ffi_cif cif;
1239         ffi_type *args[];
1240     } *ca;
1241     ffi_status status;
1242     int nargs;
1243 
1244     /* Ignoring the return type, find the last non-zero field. */
1245     nargs = 32 - clz32(typemask >> 3);
1246     nargs = DIV_ROUND_UP(nargs, 3);
1247     assert(nargs <= MAX_CALL_IARGS);
1248 
1249     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1250     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1251     ca->cif.nargs = nargs;
1252 
1253     if (nargs != 0) {
1254         ca->cif.arg_types = ca->args;
1255         for (int j = 0; j < nargs; ++j) {
1256             int typecode = extract32(typemask, (j + 1) * 3, 3);
1257             ca->args[j] = typecode_to_ffi(typecode);
1258         }
1259     }
1260 
1261     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1262                           ca->cif.rtype, ca->cif.arg_types);
1263     assert(status == FFI_OK);
1264 
1265     return &ca->cif;
1266 }
1267 
1268 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1269 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1270 #else
1271 #define HELPER_INFO_INIT(I)      (&(I)->init)
1272 #define HELPER_INFO_INIT_VAL(I)  1
1273 #endif /* CONFIG_TCG_INTERPRETER */
1274 
1275 static inline bool arg_slot_reg_p(unsigned arg_slot)
1276 {
1277     /*
1278      * Split the sizeof away from the comparison to avoid Werror from
1279      * "unsigned < 0 is always false", when iarg_regs is empty.
1280      */
1281     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1282     return arg_slot < nreg;
1283 }
1284 
1285 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1286 {
1287     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1288     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1289 
1290     tcg_debug_assert(stk_slot < max);
1291     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1292 }
1293 
1294 typedef struct TCGCumulativeArgs {
1295     int arg_idx;                /* tcg_gen_callN args[] */
1296     int info_in_idx;            /* TCGHelperInfo in[] */
1297     int arg_slot;               /* regs+stack slot */
1298     int ref_slot;               /* stack slots for references */
1299 } TCGCumulativeArgs;
1300 
1301 static void layout_arg_even(TCGCumulativeArgs *cum)
1302 {
1303     cum->arg_slot += cum->arg_slot & 1;
1304 }
1305 
1306 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1307                          TCGCallArgumentKind kind)
1308 {
1309     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1310 
1311     *loc = (TCGCallArgumentLoc){
1312         .kind = kind,
1313         .arg_idx = cum->arg_idx,
1314         .arg_slot = cum->arg_slot,
1315     };
1316     cum->info_in_idx++;
1317     cum->arg_slot++;
1318 }
1319 
1320 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1321                                 TCGHelperInfo *info, int n)
1322 {
1323     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1324 
1325     for (int i = 0; i < n; ++i) {
1326         /* Layout all using the same arg_idx, adjusting the subindex. */
1327         loc[i] = (TCGCallArgumentLoc){
1328             .kind = TCG_CALL_ARG_NORMAL,
1329             .arg_idx = cum->arg_idx,
1330             .tmp_subindex = i,
1331             .arg_slot = cum->arg_slot + i,
1332         };
1333     }
1334     cum->info_in_idx += n;
1335     cum->arg_slot += n;
1336 }
1337 
1338 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1339 {
1340     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1341     int n = 128 / TCG_TARGET_REG_BITS;
1342 
1343     /* The first subindex carries the pointer. */
1344     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1345 
1346     /*
1347      * The callee is allowed to clobber memory associated with
1348      * structure pass by-reference.  Therefore we must make copies.
1349      * Allocate space from "ref_slot", which will be adjusted to
1350      * follow the parameters on the stack.
1351      */
1352     loc[0].ref_slot = cum->ref_slot;
1353 
1354     /*
1355      * Subsequent words also go into the reference slot, but
1356      * do not accumulate into the regular arguments.
1357      */
1358     for (int i = 1; i < n; ++i) {
1359         loc[i] = (TCGCallArgumentLoc){
1360             .kind = TCG_CALL_ARG_BY_REF_N,
1361             .arg_idx = cum->arg_idx,
1362             .tmp_subindex = i,
1363             .ref_slot = cum->ref_slot + i,
1364         };
1365     }
1366     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1367     cum->ref_slot += n;
1368 }
1369 
1370 static void init_call_layout(TCGHelperInfo *info)
1371 {
1372     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1373     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1374     unsigned typemask = info->typemask;
1375     unsigned typecode;
1376     TCGCumulativeArgs cum = { };
1377 
1378     /*
1379      * Parse and place any function return value.
1380      */
1381     typecode = typemask & 7;
1382     switch (typecode) {
1383     case dh_typecode_void:
1384         info->nr_out = 0;
1385         break;
1386     case dh_typecode_i32:
1387     case dh_typecode_s32:
1388     case dh_typecode_ptr:
1389         info->nr_out = 1;
1390         info->out_kind = TCG_CALL_RET_NORMAL;
1391         break;
1392     case dh_typecode_i64:
1393     case dh_typecode_s64:
1394         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1395         info->out_kind = TCG_CALL_RET_NORMAL;
1396         /* Query the last register now to trigger any assert early. */
1397         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1398         break;
1399     case dh_typecode_i128:
1400         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1401         info->out_kind = TCG_TARGET_CALL_RET_I128;
1402         switch (TCG_TARGET_CALL_RET_I128) {
1403         case TCG_CALL_RET_NORMAL:
1404             /* Query the last register now to trigger any assert early. */
1405             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1406             break;
1407         case TCG_CALL_RET_BY_VEC:
1408             /* Query the single register now to trigger any assert early. */
1409             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1410             break;
1411         case TCG_CALL_RET_BY_REF:
1412             /*
1413              * Allocate the first argument to the output.
1414              * We don't need to store this anywhere, just make it
1415              * unavailable for use in the input loop below.
1416              */
1417             cum.arg_slot = 1;
1418             break;
1419         default:
1420             qemu_build_not_reached();
1421         }
1422         break;
1423     default:
1424         g_assert_not_reached();
1425     }
1426 
1427     /*
1428      * Parse and place function arguments.
1429      */
1430     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1431         TCGCallArgumentKind kind;
1432         TCGType type;
1433 
1434         typecode = typemask & 7;
1435         switch (typecode) {
1436         case dh_typecode_i32:
1437         case dh_typecode_s32:
1438             type = TCG_TYPE_I32;
1439             break;
1440         case dh_typecode_i64:
1441         case dh_typecode_s64:
1442             type = TCG_TYPE_I64;
1443             break;
1444         case dh_typecode_ptr:
1445             type = TCG_TYPE_PTR;
1446             break;
1447         case dh_typecode_i128:
1448             type = TCG_TYPE_I128;
1449             break;
1450         default:
1451             g_assert_not_reached();
1452         }
1453 
1454         switch (type) {
1455         case TCG_TYPE_I32:
1456             switch (TCG_TARGET_CALL_ARG_I32) {
1457             case TCG_CALL_ARG_EVEN:
1458                 layout_arg_even(&cum);
1459                 /* fall through */
1460             case TCG_CALL_ARG_NORMAL:
1461                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1462                 break;
1463             case TCG_CALL_ARG_EXTEND:
1464                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1465                 layout_arg_1(&cum, info, kind);
1466                 break;
1467             default:
1468                 qemu_build_not_reached();
1469             }
1470             break;
1471 
1472         case TCG_TYPE_I64:
1473             switch (TCG_TARGET_CALL_ARG_I64) {
1474             case TCG_CALL_ARG_EVEN:
1475                 layout_arg_even(&cum);
1476                 /* fall through */
1477             case TCG_CALL_ARG_NORMAL:
1478                 if (TCG_TARGET_REG_BITS == 32) {
1479                     layout_arg_normal_n(&cum, info, 2);
1480                 } else {
1481                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1482                 }
1483                 break;
1484             default:
1485                 qemu_build_not_reached();
1486             }
1487             break;
1488 
1489         case TCG_TYPE_I128:
1490             switch (TCG_TARGET_CALL_ARG_I128) {
1491             case TCG_CALL_ARG_EVEN:
1492                 layout_arg_even(&cum);
1493                 /* fall through */
1494             case TCG_CALL_ARG_NORMAL:
1495                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1496                 break;
1497             case TCG_CALL_ARG_BY_REF:
1498                 layout_arg_by_ref(&cum, info);
1499                 break;
1500             default:
1501                 qemu_build_not_reached();
1502             }
1503             break;
1504 
1505         default:
1506             g_assert_not_reached();
1507         }
1508     }
1509     info->nr_in = cum.info_in_idx;
1510 
1511     /* Validate that we didn't overrun the input array. */
1512     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1513     /* Validate the backend has enough argument space. */
1514     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1515 
1516     /*
1517      * Relocate the "ref_slot" area to the end of the parameters.
1518      * Minimizing this stack offset helps code size for x86,
1519      * which has a signed 8-bit offset encoding.
1520      */
1521     if (cum.ref_slot != 0) {
1522         int ref_base = 0;
1523 
1524         if (cum.arg_slot > max_reg_slots) {
1525             int align = __alignof(Int128) / sizeof(tcg_target_long);
1526 
1527             ref_base = cum.arg_slot - max_reg_slots;
1528             if (align > 1) {
1529                 ref_base = ROUND_UP(ref_base, align);
1530             }
1531         }
1532         assert(ref_base + cum.ref_slot <= max_stk_slots);
1533         ref_base += max_reg_slots;
1534 
1535         if (ref_base != 0) {
1536             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1537                 TCGCallArgumentLoc *loc = &info->in[i];
1538                 switch (loc->kind) {
1539                 case TCG_CALL_ARG_BY_REF:
1540                 case TCG_CALL_ARG_BY_REF_N:
1541                     loc->ref_slot += ref_base;
1542                     break;
1543                 default:
1544                     break;
1545                 }
1546             }
1547         }
1548     }
1549 }
1550 
1551 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1552 static void process_constraint_sets(void);
1553 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1554                                             TCGReg reg, const char *name);
1555 
1556 static void tcg_context_init(unsigned max_threads)
1557 {
1558     TCGContext *s = &tcg_init_ctx;
1559     int n, i;
1560     TCGTemp *ts;
1561 
1562     memset(s, 0, sizeof(*s));
1563     s->nb_globals = 0;
1564 
1565     init_call_layout(&info_helper_ld32_mmu);
1566     init_call_layout(&info_helper_ld64_mmu);
1567     init_call_layout(&info_helper_ld128_mmu);
1568     init_call_layout(&info_helper_st32_mmu);
1569     init_call_layout(&info_helper_st64_mmu);
1570     init_call_layout(&info_helper_st128_mmu);
1571 
1572     tcg_target_init(s);
1573     process_constraint_sets();
1574 
1575     /* Reverse the order of the saved registers, assuming they're all at
1576        the start of tcg_target_reg_alloc_order.  */
1577     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1578         int r = tcg_target_reg_alloc_order[n];
1579         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1580             break;
1581         }
1582     }
1583     for (i = 0; i < n; ++i) {
1584         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1585     }
1586     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1587         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1588     }
1589 
1590     tcg_ctx = s;
1591     /*
1592      * In user-mode we simply share the init context among threads, since we
1593      * use a single region. See the documentation tcg_region_init() for the
1594      * reasoning behind this.
1595      * In system-mode we will have at most max_threads TCG threads.
1596      */
1597 #ifdef CONFIG_USER_ONLY
1598     tcg_ctxs = &tcg_ctx;
1599     tcg_cur_ctxs = 1;
1600     tcg_max_ctxs = 1;
1601 #else
1602     tcg_max_ctxs = max_threads;
1603     tcg_ctxs = g_new0(TCGContext *, max_threads);
1604 #endif
1605 
1606     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1607     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1608     tcg_env = temp_tcgv_ptr(ts);
1609 }
1610 
1611 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1612 {
1613     tcg_context_init(max_threads);
1614     tcg_region_init(tb_size, splitwx, max_threads);
1615 }
1616 
1617 /*
1618  * Allocate TBs right before their corresponding translated code, making
1619  * sure that TBs and code are on different cache lines.
1620  */
1621 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1622 {
1623     uintptr_t align = qemu_icache_linesize;
1624     TranslationBlock *tb;
1625     void *next;
1626 
1627  retry:
1628     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1629     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1630 
1631     if (unlikely(next > s->code_gen_highwater)) {
1632         if (tcg_region_alloc(s)) {
1633             return NULL;
1634         }
1635         goto retry;
1636     }
1637     qatomic_set(&s->code_gen_ptr, next);
1638     return tb;
1639 }
1640 
1641 void tcg_prologue_init(void)
1642 {
1643     TCGContext *s = tcg_ctx;
1644     size_t prologue_size;
1645 
1646     s->code_ptr = s->code_gen_ptr;
1647     s->code_buf = s->code_gen_ptr;
1648     s->data_gen_ptr = NULL;
1649 
1650 #ifndef CONFIG_TCG_INTERPRETER
1651     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1652 #endif
1653 
1654     s->pool_labels = NULL;
1655 
1656     qemu_thread_jit_write();
1657     /* Generate the prologue.  */
1658     tcg_target_qemu_prologue(s);
1659 
1660     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1661     {
1662         int result = tcg_out_pool_finalize(s);
1663         tcg_debug_assert(result == 0);
1664     }
1665 
1666     prologue_size = tcg_current_code_size(s);
1667     perf_report_prologue(s->code_gen_ptr, prologue_size);
1668 
1669 #ifndef CONFIG_TCG_INTERPRETER
1670     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1671                         (uintptr_t)s->code_buf, prologue_size);
1672 #endif
1673 
1674     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1675         FILE *logfile = qemu_log_trylock();
1676         if (logfile) {
1677             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1678             if (s->data_gen_ptr) {
1679                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1680                 size_t data_size = prologue_size - code_size;
1681                 size_t i;
1682 
1683                 disas(logfile, s->code_gen_ptr, code_size);
1684 
1685                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1686                     if (sizeof(tcg_target_ulong) == 8) {
1687                         fprintf(logfile,
1688                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1689                                 (uintptr_t)s->data_gen_ptr + i,
1690                                 *(uint64_t *)(s->data_gen_ptr + i));
1691                     } else {
1692                         fprintf(logfile,
1693                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1694                                 (uintptr_t)s->data_gen_ptr + i,
1695                                 *(uint32_t *)(s->data_gen_ptr + i));
1696                     }
1697                 }
1698             } else {
1699                 disas(logfile, s->code_gen_ptr, prologue_size);
1700             }
1701             fprintf(logfile, "\n");
1702             qemu_log_unlock(logfile);
1703         }
1704     }
1705 
1706 #ifndef CONFIG_TCG_INTERPRETER
1707     /*
1708      * Assert that goto_ptr is implemented completely, setting an epilogue.
1709      * For tci, we use NULL as the signal to return from the interpreter,
1710      * so skip this check.
1711      */
1712     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1713 #endif
1714 
1715     tcg_region_prologue_set(s);
1716 }
1717 
1718 void tcg_func_start(TCGContext *s)
1719 {
1720     tcg_pool_reset(s);
1721     s->nb_temps = s->nb_globals;
1722 
1723     /* No temps have been previously allocated for size or locality.  */
1724     tcg_temp_ebb_reset_freed(s);
1725 
1726     /* No constant temps have been previously allocated. */
1727     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1728         if (s->const_table[i]) {
1729             g_hash_table_remove_all(s->const_table[i]);
1730         }
1731     }
1732 
1733     s->nb_ops = 0;
1734     s->nb_labels = 0;
1735     s->current_frame_offset = s->frame_start;
1736 
1737 #ifdef CONFIG_DEBUG_TCG
1738     s->goto_tb_issue_mask = 0;
1739 #endif
1740 
1741     QTAILQ_INIT(&s->ops);
1742     QTAILQ_INIT(&s->free_ops);
1743     s->emit_before_op = NULL;
1744     QSIMPLEQ_INIT(&s->labels);
1745 
1746     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1747     tcg_debug_assert(s->insn_start_words > 0);
1748 }
1749 
1750 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1751 {
1752     int n = s->nb_temps++;
1753 
1754     if (n >= TCG_MAX_TEMPS) {
1755         tcg_raise_tb_overflow(s);
1756     }
1757     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1758 }
1759 
1760 static TCGTemp *tcg_global_alloc(TCGContext *s)
1761 {
1762     TCGTemp *ts;
1763 
1764     tcg_debug_assert(s->nb_globals == s->nb_temps);
1765     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1766     s->nb_globals++;
1767     ts = tcg_temp_alloc(s);
1768     ts->kind = TEMP_GLOBAL;
1769 
1770     return ts;
1771 }
1772 
1773 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1774                                             TCGReg reg, const char *name)
1775 {
1776     TCGTemp *ts;
1777 
1778     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1779 
1780     ts = tcg_global_alloc(s);
1781     ts->base_type = type;
1782     ts->type = type;
1783     ts->kind = TEMP_FIXED;
1784     ts->reg = reg;
1785     ts->name = name;
1786     tcg_regset_set_reg(s->reserved_regs, reg);
1787 
1788     return ts;
1789 }
1790 
1791 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1792 {
1793     s->frame_start = start;
1794     s->frame_end = start + size;
1795     s->frame_temp
1796         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1797 }
1798 
1799 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1800                                             const char *name, TCGType type)
1801 {
1802     TCGContext *s = tcg_ctx;
1803     TCGTemp *base_ts = tcgv_ptr_temp(base);
1804     TCGTemp *ts = tcg_global_alloc(s);
1805     int indirect_reg = 0;
1806 
1807     switch (base_ts->kind) {
1808     case TEMP_FIXED:
1809         break;
1810     case TEMP_GLOBAL:
1811         /* We do not support double-indirect registers.  */
1812         tcg_debug_assert(!base_ts->indirect_reg);
1813         base_ts->indirect_base = 1;
1814         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1815                             ? 2 : 1);
1816         indirect_reg = 1;
1817         break;
1818     default:
1819         g_assert_not_reached();
1820     }
1821 
1822     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1823         TCGTemp *ts2 = tcg_global_alloc(s);
1824         char buf[64];
1825 
1826         ts->base_type = TCG_TYPE_I64;
1827         ts->type = TCG_TYPE_I32;
1828         ts->indirect_reg = indirect_reg;
1829         ts->mem_allocated = 1;
1830         ts->mem_base = base_ts;
1831         ts->mem_offset = offset;
1832         pstrcpy(buf, sizeof(buf), name);
1833         pstrcat(buf, sizeof(buf), "_0");
1834         ts->name = strdup(buf);
1835 
1836         tcg_debug_assert(ts2 == ts + 1);
1837         ts2->base_type = TCG_TYPE_I64;
1838         ts2->type = TCG_TYPE_I32;
1839         ts2->indirect_reg = indirect_reg;
1840         ts2->mem_allocated = 1;
1841         ts2->mem_base = base_ts;
1842         ts2->mem_offset = offset + 4;
1843         ts2->temp_subindex = 1;
1844         pstrcpy(buf, sizeof(buf), name);
1845         pstrcat(buf, sizeof(buf), "_1");
1846         ts2->name = strdup(buf);
1847     } else {
1848         ts->base_type = type;
1849         ts->type = type;
1850         ts->indirect_reg = indirect_reg;
1851         ts->mem_allocated = 1;
1852         ts->mem_base = base_ts;
1853         ts->mem_offset = offset;
1854         ts->name = name;
1855     }
1856     return ts;
1857 }
1858 
1859 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1860 {
1861     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1862     return temp_tcgv_i32(ts);
1863 }
1864 
1865 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1866 {
1867     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1868     return temp_tcgv_i64(ts);
1869 }
1870 
1871 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1872 {
1873     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1874     return temp_tcgv_ptr(ts);
1875 }
1876 
1877 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1878 {
1879     TCGContext *s = tcg_ctx;
1880     TCGTemp *ts;
1881     int n;
1882 
1883     if (kind == TEMP_EBB) {
1884         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1885 
1886         if (idx < TCG_MAX_TEMPS) {
1887             /* There is already an available temp with the right type.  */
1888             clear_bit(idx, s->free_temps[type].l);
1889 
1890             ts = &s->temps[idx];
1891             ts->temp_allocated = 1;
1892             tcg_debug_assert(ts->base_type == type);
1893             tcg_debug_assert(ts->kind == kind);
1894             return ts;
1895         }
1896     } else {
1897         tcg_debug_assert(kind == TEMP_TB);
1898     }
1899 
1900     switch (type) {
1901     case TCG_TYPE_I32:
1902     case TCG_TYPE_V64:
1903     case TCG_TYPE_V128:
1904     case TCG_TYPE_V256:
1905         n = 1;
1906         break;
1907     case TCG_TYPE_I64:
1908         n = 64 / TCG_TARGET_REG_BITS;
1909         break;
1910     case TCG_TYPE_I128:
1911         n = 128 / TCG_TARGET_REG_BITS;
1912         break;
1913     default:
1914         g_assert_not_reached();
1915     }
1916 
1917     ts = tcg_temp_alloc(s);
1918     ts->base_type = type;
1919     ts->temp_allocated = 1;
1920     ts->kind = kind;
1921 
1922     if (n == 1) {
1923         ts->type = type;
1924     } else {
1925         ts->type = TCG_TYPE_REG;
1926 
1927         for (int i = 1; i < n; ++i) {
1928             TCGTemp *ts2 = tcg_temp_alloc(s);
1929 
1930             tcg_debug_assert(ts2 == ts + i);
1931             ts2->base_type = type;
1932             ts2->type = TCG_TYPE_REG;
1933             ts2->temp_allocated = 1;
1934             ts2->temp_subindex = i;
1935             ts2->kind = kind;
1936         }
1937     }
1938     return ts;
1939 }
1940 
1941 TCGv_i32 tcg_temp_new_i32(void)
1942 {
1943     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1944 }
1945 
1946 TCGv_i32 tcg_temp_ebb_new_i32(void)
1947 {
1948     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1949 }
1950 
1951 TCGv_i64 tcg_temp_new_i64(void)
1952 {
1953     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1954 }
1955 
1956 TCGv_i64 tcg_temp_ebb_new_i64(void)
1957 {
1958     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1959 }
1960 
1961 TCGv_ptr tcg_temp_new_ptr(void)
1962 {
1963     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1964 }
1965 
1966 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1967 {
1968     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1969 }
1970 
1971 TCGv_i128 tcg_temp_new_i128(void)
1972 {
1973     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1974 }
1975 
1976 TCGv_i128 tcg_temp_ebb_new_i128(void)
1977 {
1978     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1979 }
1980 
1981 TCGv_vec tcg_temp_new_vec(TCGType type)
1982 {
1983     TCGTemp *t;
1984 
1985 #ifdef CONFIG_DEBUG_TCG
1986     switch (type) {
1987     case TCG_TYPE_V64:
1988         assert(TCG_TARGET_HAS_v64);
1989         break;
1990     case TCG_TYPE_V128:
1991         assert(TCG_TARGET_HAS_v128);
1992         break;
1993     case TCG_TYPE_V256:
1994         assert(TCG_TARGET_HAS_v256);
1995         break;
1996     default:
1997         g_assert_not_reached();
1998     }
1999 #endif
2000 
2001     t = tcg_temp_new_internal(type, TEMP_EBB);
2002     return temp_tcgv_vec(t);
2003 }
2004 
2005 /* Create a new temp of the same type as an existing temp.  */
2006 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2007 {
2008     TCGTemp *t = tcgv_vec_temp(match);
2009 
2010     tcg_debug_assert(t->temp_allocated != 0);
2011 
2012     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2013     return temp_tcgv_vec(t);
2014 }
2015 
2016 void tcg_temp_free_internal(TCGTemp *ts)
2017 {
2018     TCGContext *s = tcg_ctx;
2019 
2020     switch (ts->kind) {
2021     case TEMP_CONST:
2022     case TEMP_TB:
2023         /* Silently ignore free. */
2024         break;
2025     case TEMP_EBB:
2026         tcg_debug_assert(ts->temp_allocated != 0);
2027         ts->temp_allocated = 0;
2028         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2029         break;
2030     default:
2031         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2032         g_assert_not_reached();
2033     }
2034 }
2035 
2036 void tcg_temp_free_i32(TCGv_i32 arg)
2037 {
2038     tcg_temp_free_internal(tcgv_i32_temp(arg));
2039 }
2040 
2041 void tcg_temp_free_i64(TCGv_i64 arg)
2042 {
2043     tcg_temp_free_internal(tcgv_i64_temp(arg));
2044 }
2045 
2046 void tcg_temp_free_i128(TCGv_i128 arg)
2047 {
2048     tcg_temp_free_internal(tcgv_i128_temp(arg));
2049 }
2050 
2051 void tcg_temp_free_ptr(TCGv_ptr arg)
2052 {
2053     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2054 }
2055 
2056 void tcg_temp_free_vec(TCGv_vec arg)
2057 {
2058     tcg_temp_free_internal(tcgv_vec_temp(arg));
2059 }
2060 
2061 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2062 {
2063     TCGContext *s = tcg_ctx;
2064     GHashTable *h = s->const_table[type];
2065     TCGTemp *ts;
2066 
2067     if (h == NULL) {
2068         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2069         s->const_table[type] = h;
2070     }
2071 
2072     ts = g_hash_table_lookup(h, &val);
2073     if (ts == NULL) {
2074         int64_t *val_ptr;
2075 
2076         ts = tcg_temp_alloc(s);
2077 
2078         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2079             TCGTemp *ts2 = tcg_temp_alloc(s);
2080 
2081             tcg_debug_assert(ts2 == ts + 1);
2082 
2083             ts->base_type = TCG_TYPE_I64;
2084             ts->type = TCG_TYPE_I32;
2085             ts->kind = TEMP_CONST;
2086             ts->temp_allocated = 1;
2087 
2088             ts2->base_type = TCG_TYPE_I64;
2089             ts2->type = TCG_TYPE_I32;
2090             ts2->kind = TEMP_CONST;
2091             ts2->temp_allocated = 1;
2092             ts2->temp_subindex = 1;
2093 
2094             /*
2095              * Retain the full value of the 64-bit constant in the low
2096              * part, so that the hash table works.  Actual uses will
2097              * truncate the value to the low part.
2098              */
2099             ts[HOST_BIG_ENDIAN].val = val;
2100             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2101             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2102         } else {
2103             ts->base_type = type;
2104             ts->type = type;
2105             ts->kind = TEMP_CONST;
2106             ts->temp_allocated = 1;
2107             ts->val = val;
2108             val_ptr = &ts->val;
2109         }
2110         g_hash_table_insert(h, val_ptr, ts);
2111     }
2112 
2113     return ts;
2114 }
2115 
2116 TCGv_i32 tcg_constant_i32(int32_t val)
2117 {
2118     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2119 }
2120 
2121 TCGv_i64 tcg_constant_i64(int64_t val)
2122 {
2123     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2124 }
2125 
2126 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2127 {
2128     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2129 }
2130 
2131 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2132 {
2133     val = dup_const(vece, val);
2134     return temp_tcgv_vec(tcg_constant_internal(type, val));
2135 }
2136 
2137 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2138 {
2139     TCGTemp *t = tcgv_vec_temp(match);
2140 
2141     tcg_debug_assert(t->temp_allocated != 0);
2142     return tcg_constant_vec(t->base_type, vece, val);
2143 }
2144 
2145 #ifdef CONFIG_DEBUG_TCG
2146 size_t temp_idx(TCGTemp *ts)
2147 {
2148     ptrdiff_t n = ts - tcg_ctx->temps;
2149     assert(n >= 0 && n < tcg_ctx->nb_temps);
2150     return n;
2151 }
2152 
2153 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2154 {
2155     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2156 
2157     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2158     assert(o % sizeof(TCGTemp) == 0);
2159 
2160     return (void *)tcg_ctx + (uintptr_t)v;
2161 }
2162 #endif /* CONFIG_DEBUG_TCG */
2163 
2164 /*
2165  * Return true if OP may appear in the opcode stream with TYPE.
2166  * Test the runtime variable that controls each opcode.
2167  */
2168 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2169 {
2170     bool has_type;
2171 
2172     switch (type) {
2173     case TCG_TYPE_I32:
2174         has_type = true;
2175         break;
2176     case TCG_TYPE_I64:
2177         has_type = TCG_TARGET_REG_BITS == 64;
2178         break;
2179     case TCG_TYPE_V64:
2180         has_type = TCG_TARGET_HAS_v64;
2181         break;
2182     case TCG_TYPE_V128:
2183         has_type = TCG_TARGET_HAS_v128;
2184         break;
2185     case TCG_TYPE_V256:
2186         has_type = TCG_TARGET_HAS_v256;
2187         break;
2188     default:
2189         has_type = false;
2190         break;
2191     }
2192 
2193     switch (op) {
2194     case INDEX_op_discard:
2195     case INDEX_op_set_label:
2196     case INDEX_op_call:
2197     case INDEX_op_br:
2198     case INDEX_op_mb:
2199     case INDEX_op_insn_start:
2200     case INDEX_op_exit_tb:
2201     case INDEX_op_goto_tb:
2202     case INDEX_op_goto_ptr:
2203     case INDEX_op_qemu_ld_i32:
2204     case INDEX_op_qemu_st_i32:
2205     case INDEX_op_qemu_ld_i64:
2206     case INDEX_op_qemu_st_i64:
2207         return true;
2208 
2209     case INDEX_op_qemu_st8_i32:
2210         return TCG_TARGET_HAS_qemu_st8_i32;
2211 
2212     case INDEX_op_qemu_ld_i128:
2213     case INDEX_op_qemu_st_i128:
2214         return TCG_TARGET_HAS_qemu_ldst_i128;
2215 
2216     case INDEX_op_add:
2217     case INDEX_op_and:
2218     case INDEX_op_mov:
2219     case INDEX_op_or:
2220     case INDEX_op_xor:
2221         return has_type;
2222 
2223     case INDEX_op_setcond_i32:
2224     case INDEX_op_brcond_i32:
2225     case INDEX_op_movcond_i32:
2226     case INDEX_op_ld8u_i32:
2227     case INDEX_op_ld8s_i32:
2228     case INDEX_op_ld16u_i32:
2229     case INDEX_op_ld16s_i32:
2230     case INDEX_op_ld_i32:
2231     case INDEX_op_st8_i32:
2232     case INDEX_op_st16_i32:
2233     case INDEX_op_st_i32:
2234     case INDEX_op_sub_i32:
2235     case INDEX_op_neg_i32:
2236     case INDEX_op_mul_i32:
2237     case INDEX_op_shl_i32:
2238     case INDEX_op_shr_i32:
2239     case INDEX_op_sar_i32:
2240     case INDEX_op_extract_i32:
2241     case INDEX_op_sextract_i32:
2242     case INDEX_op_deposit_i32:
2243         return true;
2244 
2245     case INDEX_op_negsetcond_i32:
2246         return TCG_TARGET_HAS_negsetcond_i32;
2247     case INDEX_op_div_i32:
2248     case INDEX_op_divu_i32:
2249         return TCG_TARGET_HAS_div_i32;
2250     case INDEX_op_rem_i32:
2251     case INDEX_op_remu_i32:
2252         return TCG_TARGET_HAS_rem_i32;
2253     case INDEX_op_div2_i32:
2254     case INDEX_op_divu2_i32:
2255         return TCG_TARGET_HAS_div2_i32;
2256     case INDEX_op_rotl_i32:
2257     case INDEX_op_rotr_i32:
2258         return TCG_TARGET_HAS_rot_i32;
2259     case INDEX_op_extract2_i32:
2260         return TCG_TARGET_HAS_extract2_i32;
2261     case INDEX_op_add2_i32:
2262         return TCG_TARGET_HAS_add2_i32;
2263     case INDEX_op_sub2_i32:
2264         return TCG_TARGET_HAS_sub2_i32;
2265     case INDEX_op_mulu2_i32:
2266         return TCG_TARGET_HAS_mulu2_i32;
2267     case INDEX_op_muls2_i32:
2268         return TCG_TARGET_HAS_muls2_i32;
2269     case INDEX_op_muluh_i32:
2270         return TCG_TARGET_HAS_muluh_i32;
2271     case INDEX_op_mulsh_i32:
2272         return TCG_TARGET_HAS_mulsh_i32;
2273     case INDEX_op_bswap16_i32:
2274         return TCG_TARGET_HAS_bswap16_i32;
2275     case INDEX_op_bswap32_i32:
2276         return TCG_TARGET_HAS_bswap32_i32;
2277     case INDEX_op_not_i32:
2278         return TCG_TARGET_HAS_not_i32;
2279     case INDEX_op_clz_i32:
2280         return TCG_TARGET_HAS_clz_i32;
2281     case INDEX_op_ctz_i32:
2282         return TCG_TARGET_HAS_ctz_i32;
2283     case INDEX_op_ctpop_i32:
2284         return TCG_TARGET_HAS_ctpop_i32;
2285 
2286     case INDEX_op_brcond2_i32:
2287     case INDEX_op_setcond2_i32:
2288         return TCG_TARGET_REG_BITS == 32;
2289 
2290     case INDEX_op_setcond_i64:
2291     case INDEX_op_brcond_i64:
2292     case INDEX_op_movcond_i64:
2293     case INDEX_op_ld8u_i64:
2294     case INDEX_op_ld8s_i64:
2295     case INDEX_op_ld16u_i64:
2296     case INDEX_op_ld16s_i64:
2297     case INDEX_op_ld32u_i64:
2298     case INDEX_op_ld32s_i64:
2299     case INDEX_op_ld_i64:
2300     case INDEX_op_st8_i64:
2301     case INDEX_op_st16_i64:
2302     case INDEX_op_st32_i64:
2303     case INDEX_op_st_i64:
2304     case INDEX_op_sub_i64:
2305     case INDEX_op_neg_i64:
2306     case INDEX_op_mul_i64:
2307     case INDEX_op_shl_i64:
2308     case INDEX_op_shr_i64:
2309     case INDEX_op_sar_i64:
2310     case INDEX_op_ext_i32_i64:
2311     case INDEX_op_extu_i32_i64:
2312     case INDEX_op_extract_i64:
2313     case INDEX_op_sextract_i64:
2314     case INDEX_op_deposit_i64:
2315         return TCG_TARGET_REG_BITS == 64;
2316 
2317     case INDEX_op_negsetcond_i64:
2318         return TCG_TARGET_HAS_negsetcond_i64;
2319     case INDEX_op_div_i64:
2320     case INDEX_op_divu_i64:
2321         return TCG_TARGET_HAS_div_i64;
2322     case INDEX_op_rem_i64:
2323     case INDEX_op_remu_i64:
2324         return TCG_TARGET_HAS_rem_i64;
2325     case INDEX_op_div2_i64:
2326     case INDEX_op_divu2_i64:
2327         return TCG_TARGET_HAS_div2_i64;
2328     case INDEX_op_rotl_i64:
2329     case INDEX_op_rotr_i64:
2330         return TCG_TARGET_HAS_rot_i64;
2331     case INDEX_op_extract2_i64:
2332         return TCG_TARGET_HAS_extract2_i64;
2333     case INDEX_op_extrl_i64_i32:
2334     case INDEX_op_extrh_i64_i32:
2335         return TCG_TARGET_HAS_extr_i64_i32;
2336     case INDEX_op_bswap16_i64:
2337         return TCG_TARGET_HAS_bswap16_i64;
2338     case INDEX_op_bswap32_i64:
2339         return TCG_TARGET_HAS_bswap32_i64;
2340     case INDEX_op_bswap64_i64:
2341         return TCG_TARGET_HAS_bswap64_i64;
2342     case INDEX_op_not_i64:
2343         return TCG_TARGET_HAS_not_i64;
2344     case INDEX_op_clz_i64:
2345         return TCG_TARGET_HAS_clz_i64;
2346     case INDEX_op_ctz_i64:
2347         return TCG_TARGET_HAS_ctz_i64;
2348     case INDEX_op_ctpop_i64:
2349         return TCG_TARGET_HAS_ctpop_i64;
2350     case INDEX_op_add2_i64:
2351         return TCG_TARGET_HAS_add2_i64;
2352     case INDEX_op_sub2_i64:
2353         return TCG_TARGET_HAS_sub2_i64;
2354     case INDEX_op_mulu2_i64:
2355         return TCG_TARGET_HAS_mulu2_i64;
2356     case INDEX_op_muls2_i64:
2357         return TCG_TARGET_HAS_muls2_i64;
2358     case INDEX_op_muluh_i64:
2359         return TCG_TARGET_HAS_muluh_i64;
2360     case INDEX_op_mulsh_i64:
2361         return TCG_TARGET_HAS_mulsh_i64;
2362 
2363     case INDEX_op_mov_vec:
2364     case INDEX_op_dup_vec:
2365     case INDEX_op_dupm_vec:
2366     case INDEX_op_ld_vec:
2367     case INDEX_op_st_vec:
2368     case INDEX_op_add_vec:
2369     case INDEX_op_sub_vec:
2370     case INDEX_op_and_vec:
2371     case INDEX_op_or_vec:
2372     case INDEX_op_xor_vec:
2373     case INDEX_op_cmp_vec:
2374         return has_type;
2375     case INDEX_op_dup2_vec:
2376         return has_type && TCG_TARGET_REG_BITS == 32;
2377     case INDEX_op_not_vec:
2378         return has_type && TCG_TARGET_HAS_not_vec;
2379     case INDEX_op_neg_vec:
2380         return has_type && TCG_TARGET_HAS_neg_vec;
2381     case INDEX_op_abs_vec:
2382         return has_type && TCG_TARGET_HAS_abs_vec;
2383     case INDEX_op_andc_vec:
2384         return has_type && TCG_TARGET_HAS_andc_vec;
2385     case INDEX_op_orc_vec:
2386         return has_type && TCG_TARGET_HAS_orc_vec;
2387     case INDEX_op_nand_vec:
2388         return has_type && TCG_TARGET_HAS_nand_vec;
2389     case INDEX_op_nor_vec:
2390         return has_type && TCG_TARGET_HAS_nor_vec;
2391     case INDEX_op_eqv_vec:
2392         return has_type && TCG_TARGET_HAS_eqv_vec;
2393     case INDEX_op_mul_vec:
2394         return has_type && TCG_TARGET_HAS_mul_vec;
2395     case INDEX_op_shli_vec:
2396     case INDEX_op_shri_vec:
2397     case INDEX_op_sari_vec:
2398         return has_type && TCG_TARGET_HAS_shi_vec;
2399     case INDEX_op_shls_vec:
2400     case INDEX_op_shrs_vec:
2401     case INDEX_op_sars_vec:
2402         return has_type && TCG_TARGET_HAS_shs_vec;
2403     case INDEX_op_shlv_vec:
2404     case INDEX_op_shrv_vec:
2405     case INDEX_op_sarv_vec:
2406         return has_type && TCG_TARGET_HAS_shv_vec;
2407     case INDEX_op_rotli_vec:
2408         return has_type && TCG_TARGET_HAS_roti_vec;
2409     case INDEX_op_rotls_vec:
2410         return has_type && TCG_TARGET_HAS_rots_vec;
2411     case INDEX_op_rotlv_vec:
2412     case INDEX_op_rotrv_vec:
2413         return has_type && TCG_TARGET_HAS_rotv_vec;
2414     case INDEX_op_ssadd_vec:
2415     case INDEX_op_usadd_vec:
2416     case INDEX_op_sssub_vec:
2417     case INDEX_op_ussub_vec:
2418         return has_type && TCG_TARGET_HAS_sat_vec;
2419     case INDEX_op_smin_vec:
2420     case INDEX_op_umin_vec:
2421     case INDEX_op_smax_vec:
2422     case INDEX_op_umax_vec:
2423         return has_type && TCG_TARGET_HAS_minmax_vec;
2424     case INDEX_op_bitsel_vec:
2425         return has_type && TCG_TARGET_HAS_bitsel_vec;
2426     case INDEX_op_cmpsel_vec:
2427         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2428 
2429     default:
2430         if (op < INDEX_op_last_generic) {
2431             const TCGOutOp *outop;
2432             TCGConstraintSetIndex con_set;
2433 
2434             if (!has_type) {
2435                 return false;
2436             }
2437 
2438             outop = all_outop[op];
2439             tcg_debug_assert(outop != NULL);
2440 
2441             con_set = outop->static_constraint;
2442             if (con_set == C_Dynamic) {
2443                 con_set = outop->dynamic_constraint(type, flags);
2444             }
2445             if (con_set >= 0) {
2446                 return true;
2447             }
2448             tcg_debug_assert(con_set == C_NotImplemented);
2449             return false;
2450         }
2451         tcg_debug_assert(op < NB_OPS);
2452         return true;
2453 
2454     case INDEX_op_last_generic:
2455         g_assert_not_reached();
2456     }
2457 }
2458 
2459 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2460 {
2461     unsigned width;
2462 
2463     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2464     width = (type == TCG_TYPE_I32 ? 32 : 64);
2465 
2466     tcg_debug_assert(ofs < width);
2467     tcg_debug_assert(len > 0);
2468     tcg_debug_assert(len <= width - ofs);
2469 
2470     return TCG_TARGET_deposit_valid(type, ofs, len);
2471 }
2472 
2473 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2474 
2475 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2476                           TCGTemp *ret, TCGTemp **args)
2477 {
2478     TCGv_i64 extend_free[MAX_CALL_IARGS];
2479     int n_extend = 0;
2480     TCGOp *op;
2481     int i, n, pi = 0, total_args;
2482 
2483     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2484         init_call_layout(info);
2485         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2486     }
2487 
2488     total_args = info->nr_out + info->nr_in + 2;
2489     op = tcg_op_alloc(INDEX_op_call, total_args);
2490 
2491 #ifdef CONFIG_PLUGIN
2492     /* Flag helpers that may affect guest state */
2493     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2494         tcg_ctx->plugin_insn->calls_helpers = true;
2495     }
2496 #endif
2497 
2498     TCGOP_CALLO(op) = n = info->nr_out;
2499     switch (n) {
2500     case 0:
2501         tcg_debug_assert(ret == NULL);
2502         break;
2503     case 1:
2504         tcg_debug_assert(ret != NULL);
2505         op->args[pi++] = temp_arg(ret);
2506         break;
2507     case 2:
2508     case 4:
2509         tcg_debug_assert(ret != NULL);
2510         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2511         tcg_debug_assert(ret->temp_subindex == 0);
2512         for (i = 0; i < n; ++i) {
2513             op->args[pi++] = temp_arg(ret + i);
2514         }
2515         break;
2516     default:
2517         g_assert_not_reached();
2518     }
2519 
2520     TCGOP_CALLI(op) = n = info->nr_in;
2521     for (i = 0; i < n; i++) {
2522         const TCGCallArgumentLoc *loc = &info->in[i];
2523         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2524 
2525         switch (loc->kind) {
2526         case TCG_CALL_ARG_NORMAL:
2527         case TCG_CALL_ARG_BY_REF:
2528         case TCG_CALL_ARG_BY_REF_N:
2529             op->args[pi++] = temp_arg(ts);
2530             break;
2531 
2532         case TCG_CALL_ARG_EXTEND_U:
2533         case TCG_CALL_ARG_EXTEND_S:
2534             {
2535                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2536                 TCGv_i32 orig = temp_tcgv_i32(ts);
2537 
2538                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2539                     tcg_gen_ext_i32_i64(temp, orig);
2540                 } else {
2541                     tcg_gen_extu_i32_i64(temp, orig);
2542                 }
2543                 op->args[pi++] = tcgv_i64_arg(temp);
2544                 extend_free[n_extend++] = temp;
2545             }
2546             break;
2547 
2548         default:
2549             g_assert_not_reached();
2550         }
2551     }
2552     op->args[pi++] = (uintptr_t)func;
2553     op->args[pi++] = (uintptr_t)info;
2554     tcg_debug_assert(pi == total_args);
2555 
2556     if (tcg_ctx->emit_before_op) {
2557         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2558     } else {
2559         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2560     }
2561 
2562     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2563     for (i = 0; i < n_extend; ++i) {
2564         tcg_temp_free_i64(extend_free[i]);
2565     }
2566 }
2567 
2568 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2569 {
2570     tcg_gen_callN(func, info, ret, NULL);
2571 }
2572 
2573 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2574 {
2575     tcg_gen_callN(func, info, ret, &t1);
2576 }
2577 
2578 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2579                    TCGTemp *t1, TCGTemp *t2)
2580 {
2581     TCGTemp *args[2] = { t1, t2 };
2582     tcg_gen_callN(func, info, ret, args);
2583 }
2584 
2585 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2586                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2587 {
2588     TCGTemp *args[3] = { t1, t2, t3 };
2589     tcg_gen_callN(func, info, ret, args);
2590 }
2591 
2592 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2593                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2594 {
2595     TCGTemp *args[4] = { t1, t2, t3, t4 };
2596     tcg_gen_callN(func, info, ret, args);
2597 }
2598 
2599 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2600                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2601 {
2602     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2603     tcg_gen_callN(func, info, ret, args);
2604 }
2605 
2606 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2607                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2608                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2609 {
2610     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2611     tcg_gen_callN(func, info, ret, args);
2612 }
2613 
2614 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2615                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2616                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2617 {
2618     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2619     tcg_gen_callN(func, info, ret, args);
2620 }
2621 
2622 static void tcg_reg_alloc_start(TCGContext *s)
2623 {
2624     int i, n;
2625 
2626     for (i = 0, n = s->nb_temps; i < n; i++) {
2627         TCGTemp *ts = &s->temps[i];
2628         TCGTempVal val = TEMP_VAL_MEM;
2629 
2630         switch (ts->kind) {
2631         case TEMP_CONST:
2632             val = TEMP_VAL_CONST;
2633             break;
2634         case TEMP_FIXED:
2635             val = TEMP_VAL_REG;
2636             break;
2637         case TEMP_GLOBAL:
2638             break;
2639         case TEMP_EBB:
2640             val = TEMP_VAL_DEAD;
2641             /* fall through */
2642         case TEMP_TB:
2643             ts->mem_allocated = 0;
2644             break;
2645         default:
2646             g_assert_not_reached();
2647         }
2648         ts->val_type = val;
2649     }
2650 
2651     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2652 }
2653 
2654 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2655                                  TCGTemp *ts)
2656 {
2657     int idx = temp_idx(ts);
2658 
2659     switch (ts->kind) {
2660     case TEMP_FIXED:
2661     case TEMP_GLOBAL:
2662         pstrcpy(buf, buf_size, ts->name);
2663         break;
2664     case TEMP_TB:
2665         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2666         break;
2667     case TEMP_EBB:
2668         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2669         break;
2670     case TEMP_CONST:
2671         switch (ts->type) {
2672         case TCG_TYPE_I32:
2673             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2674             break;
2675 #if TCG_TARGET_REG_BITS > 32
2676         case TCG_TYPE_I64:
2677             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2678             break;
2679 #endif
2680         case TCG_TYPE_V64:
2681         case TCG_TYPE_V128:
2682         case TCG_TYPE_V256:
2683             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2684                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2685             break;
2686         default:
2687             g_assert_not_reached();
2688         }
2689         break;
2690     }
2691     return buf;
2692 }
2693 
2694 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2695                              int buf_size, TCGArg arg)
2696 {
2697     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2698 }
2699 
2700 static const char * const cond_name[] =
2701 {
2702     [TCG_COND_NEVER] = "never",
2703     [TCG_COND_ALWAYS] = "always",
2704     [TCG_COND_EQ] = "eq",
2705     [TCG_COND_NE] = "ne",
2706     [TCG_COND_LT] = "lt",
2707     [TCG_COND_GE] = "ge",
2708     [TCG_COND_LE] = "le",
2709     [TCG_COND_GT] = "gt",
2710     [TCG_COND_LTU] = "ltu",
2711     [TCG_COND_GEU] = "geu",
2712     [TCG_COND_LEU] = "leu",
2713     [TCG_COND_GTU] = "gtu",
2714     [TCG_COND_TSTEQ] = "tsteq",
2715     [TCG_COND_TSTNE] = "tstne",
2716 };
2717 
2718 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2719 {
2720     [MO_UB]   = "ub",
2721     [MO_SB]   = "sb",
2722     [MO_LEUW] = "leuw",
2723     [MO_LESW] = "lesw",
2724     [MO_LEUL] = "leul",
2725     [MO_LESL] = "lesl",
2726     [MO_LEUQ] = "leq",
2727     [MO_BEUW] = "beuw",
2728     [MO_BESW] = "besw",
2729     [MO_BEUL] = "beul",
2730     [MO_BESL] = "besl",
2731     [MO_BEUQ] = "beq",
2732     [MO_128 + MO_BE] = "beo",
2733     [MO_128 + MO_LE] = "leo",
2734 };
2735 
2736 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2737     [MO_UNALN >> MO_ASHIFT]    = "un+",
2738     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2739     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2740     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2741     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2742     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2743     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2744     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2745 };
2746 
2747 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2748     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2749     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2750     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2751     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2752     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2753     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2754 };
2755 
2756 static const char bswap_flag_name[][6] = {
2757     [TCG_BSWAP_IZ] = "iz",
2758     [TCG_BSWAP_OZ] = "oz",
2759     [TCG_BSWAP_OS] = "os",
2760     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2761     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2762 };
2763 
2764 #ifdef CONFIG_PLUGIN
2765 static const char * const plugin_from_name[] = {
2766     "from-tb",
2767     "from-insn",
2768     "after-insn",
2769     "after-tb",
2770 };
2771 #endif
2772 
2773 static inline bool tcg_regset_single(TCGRegSet d)
2774 {
2775     return (d & (d - 1)) == 0;
2776 }
2777 
2778 static inline TCGReg tcg_regset_first(TCGRegSet d)
2779 {
2780     if (TCG_TARGET_NB_REGS <= 32) {
2781         return ctz32(d);
2782     } else {
2783         return ctz64(d);
2784     }
2785 }
2786 
2787 /* Return only the number of characters output -- no error return. */
2788 #define ne_fprintf(...) \
2789     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2790 
2791 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2792 {
2793     char buf[128];
2794     TCGOp *op;
2795 
2796     QTAILQ_FOREACH(op, &s->ops, link) {
2797         int i, k, nb_oargs, nb_iargs, nb_cargs;
2798         const TCGOpDef *def;
2799         TCGOpcode c;
2800         int col = 0;
2801 
2802         c = op->opc;
2803         def = &tcg_op_defs[c];
2804 
2805         if (c == INDEX_op_insn_start) {
2806             nb_oargs = 0;
2807             col += ne_fprintf(f, "\n ----");
2808 
2809             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2810                 col += ne_fprintf(f, " %016" PRIx64,
2811                                   tcg_get_insn_start_param(op, i));
2812             }
2813         } else if (c == INDEX_op_call) {
2814             const TCGHelperInfo *info = tcg_call_info(op);
2815             void *func = tcg_call_func(op);
2816 
2817             /* variable number of arguments */
2818             nb_oargs = TCGOP_CALLO(op);
2819             nb_iargs = TCGOP_CALLI(op);
2820             nb_cargs = def->nb_cargs;
2821 
2822             col += ne_fprintf(f, " %s ", def->name);
2823 
2824             /*
2825              * Print the function name from TCGHelperInfo, if available.
2826              * Note that plugins have a template function for the info,
2827              * but the actual function pointer comes from the plugin.
2828              */
2829             if (func == info->func) {
2830                 col += ne_fprintf(f, "%s", info->name);
2831             } else {
2832                 col += ne_fprintf(f, "plugin(%p)", func);
2833             }
2834 
2835             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2836             for (i = 0; i < nb_oargs; i++) {
2837                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2838                                                             op->args[i]));
2839             }
2840             for (i = 0; i < nb_iargs; i++) {
2841                 TCGArg arg = op->args[nb_oargs + i];
2842                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2843                 col += ne_fprintf(f, ",%s", t);
2844             }
2845         } else {
2846             if (def->flags & TCG_OPF_INT) {
2847                 col += ne_fprintf(f, " %s_i%d ",
2848                                   def->name,
2849                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2850             } else if (def->flags & TCG_OPF_VECTOR) {
2851                 col += ne_fprintf(f, "%s v%d,e%d,",
2852                                   def->name,
2853                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2854                                   8 << TCGOP_VECE(op));
2855             } else {
2856                 col += ne_fprintf(f, " %s ", def->name);
2857             }
2858 
2859             nb_oargs = def->nb_oargs;
2860             nb_iargs = def->nb_iargs;
2861             nb_cargs = def->nb_cargs;
2862 
2863             k = 0;
2864             for (i = 0; i < nb_oargs; i++) {
2865                 const char *sep =  k ? "," : "";
2866                 col += ne_fprintf(f, "%s%s", sep,
2867                                   tcg_get_arg_str(s, buf, sizeof(buf),
2868                                                   op->args[k++]));
2869             }
2870             for (i = 0; i < nb_iargs; i++) {
2871                 const char *sep =  k ? "," : "";
2872                 col += ne_fprintf(f, "%s%s", sep,
2873                                   tcg_get_arg_str(s, buf, sizeof(buf),
2874                                                   op->args[k++]));
2875             }
2876             switch (c) {
2877             case INDEX_op_brcond_i32:
2878             case INDEX_op_setcond_i32:
2879             case INDEX_op_negsetcond_i32:
2880             case INDEX_op_movcond_i32:
2881             case INDEX_op_brcond2_i32:
2882             case INDEX_op_setcond2_i32:
2883             case INDEX_op_brcond_i64:
2884             case INDEX_op_setcond_i64:
2885             case INDEX_op_negsetcond_i64:
2886             case INDEX_op_movcond_i64:
2887             case INDEX_op_cmp_vec:
2888             case INDEX_op_cmpsel_vec:
2889                 if (op->args[k] < ARRAY_SIZE(cond_name)
2890                     && cond_name[op->args[k]]) {
2891                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2892                 } else {
2893                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2894                 }
2895                 i = 1;
2896                 break;
2897             case INDEX_op_qemu_ld_i32:
2898             case INDEX_op_qemu_st_i32:
2899             case INDEX_op_qemu_st8_i32:
2900             case INDEX_op_qemu_ld_i64:
2901             case INDEX_op_qemu_st_i64:
2902             case INDEX_op_qemu_ld_i128:
2903             case INDEX_op_qemu_st_i128:
2904                 {
2905                     const char *s_al, *s_op, *s_at;
2906                     MemOpIdx oi = op->args[k++];
2907                     MemOp mop = get_memop(oi);
2908                     unsigned ix = get_mmuidx(oi);
2909 
2910                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2911                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2912                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2913                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2914 
2915                     /* If all fields are accounted for, print symbolically. */
2916                     if (!mop && s_al && s_op && s_at) {
2917                         col += ne_fprintf(f, ",%s%s%s,%u",
2918                                           s_at, s_al, s_op, ix);
2919                     } else {
2920                         mop = get_memop(oi);
2921                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2922                     }
2923                     i = 1;
2924                 }
2925                 break;
2926             case INDEX_op_bswap16_i32:
2927             case INDEX_op_bswap16_i64:
2928             case INDEX_op_bswap32_i32:
2929             case INDEX_op_bswap32_i64:
2930             case INDEX_op_bswap64_i64:
2931                 {
2932                     TCGArg flags = op->args[k];
2933                     const char *name = NULL;
2934 
2935                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2936                         name = bswap_flag_name[flags];
2937                     }
2938                     if (name) {
2939                         col += ne_fprintf(f, ",%s", name);
2940                     } else {
2941                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2942                     }
2943                     i = k = 1;
2944                 }
2945                 break;
2946 #ifdef CONFIG_PLUGIN
2947             case INDEX_op_plugin_cb:
2948                 {
2949                     TCGArg from = op->args[k++];
2950                     const char *name = NULL;
2951 
2952                     if (from < ARRAY_SIZE(plugin_from_name)) {
2953                         name = plugin_from_name[from];
2954                     }
2955                     if (name) {
2956                         col += ne_fprintf(f, "%s", name);
2957                     } else {
2958                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2959                     }
2960                     i = 1;
2961                 }
2962                 break;
2963 #endif
2964             default:
2965                 i = 0;
2966                 break;
2967             }
2968             switch (c) {
2969             case INDEX_op_set_label:
2970             case INDEX_op_br:
2971             case INDEX_op_brcond_i32:
2972             case INDEX_op_brcond_i64:
2973             case INDEX_op_brcond2_i32:
2974                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2975                                   arg_label(op->args[k])->id);
2976                 i++, k++;
2977                 break;
2978             case INDEX_op_mb:
2979                 {
2980                     TCGBar membar = op->args[k];
2981                     const char *b_op, *m_op;
2982 
2983                     switch (membar & TCG_BAR_SC) {
2984                     case 0:
2985                         b_op = "none";
2986                         break;
2987                     case TCG_BAR_LDAQ:
2988                         b_op = "acq";
2989                         break;
2990                     case TCG_BAR_STRL:
2991                         b_op = "rel";
2992                         break;
2993                     case TCG_BAR_SC:
2994                         b_op = "seq";
2995                         break;
2996                     default:
2997                         g_assert_not_reached();
2998                     }
2999 
3000                     switch (membar & TCG_MO_ALL) {
3001                     case 0:
3002                         m_op = "none";
3003                         break;
3004                     case TCG_MO_LD_LD:
3005                         m_op = "rr";
3006                         break;
3007                     case TCG_MO_LD_ST:
3008                         m_op = "rw";
3009                         break;
3010                     case TCG_MO_ST_LD:
3011                         m_op = "wr";
3012                         break;
3013                     case TCG_MO_ST_ST:
3014                         m_op = "ww";
3015                         break;
3016                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3017                         m_op = "rr+rw";
3018                         break;
3019                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3020                         m_op = "rr+wr";
3021                         break;
3022                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3023                         m_op = "rr+ww";
3024                         break;
3025                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3026                         m_op = "rw+wr";
3027                         break;
3028                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3029                         m_op = "rw+ww";
3030                         break;
3031                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3032                         m_op = "wr+ww";
3033                         break;
3034                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3035                         m_op = "rr+rw+wr";
3036                         break;
3037                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3038                         m_op = "rr+rw+ww";
3039                         break;
3040                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3041                         m_op = "rr+wr+ww";
3042                         break;
3043                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3044                         m_op = "rw+wr+ww";
3045                         break;
3046                     case TCG_MO_ALL:
3047                         m_op = "all";
3048                         break;
3049                     default:
3050                         g_assert_not_reached();
3051                     }
3052 
3053                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3054                     i++, k++;
3055                 }
3056                 break;
3057             default:
3058                 break;
3059             }
3060             for (; i < nb_cargs; i++, k++) {
3061                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3062                                   op->args[k]);
3063             }
3064         }
3065 
3066         if (have_prefs || op->life) {
3067             for (; col < 40; ++col) {
3068                 putc(' ', f);
3069             }
3070         }
3071 
3072         if (op->life) {
3073             unsigned life = op->life;
3074 
3075             if (life & (SYNC_ARG * 3)) {
3076                 ne_fprintf(f, "  sync:");
3077                 for (i = 0; i < 2; ++i) {
3078                     if (life & (SYNC_ARG << i)) {
3079                         ne_fprintf(f, " %d", i);
3080                     }
3081                 }
3082             }
3083             life /= DEAD_ARG;
3084             if (life) {
3085                 ne_fprintf(f, "  dead:");
3086                 for (i = 0; life; ++i, life >>= 1) {
3087                     if (life & 1) {
3088                         ne_fprintf(f, " %d", i);
3089                     }
3090                 }
3091             }
3092         }
3093 
3094         if (have_prefs) {
3095             for (i = 0; i < nb_oargs; ++i) {
3096                 TCGRegSet set = output_pref(op, i);
3097 
3098                 if (i == 0) {
3099                     ne_fprintf(f, "  pref=");
3100                 } else {
3101                     ne_fprintf(f, ",");
3102                 }
3103                 if (set == 0) {
3104                     ne_fprintf(f, "none");
3105                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3106                     ne_fprintf(f, "all");
3107 #ifdef CONFIG_DEBUG_TCG
3108                 } else if (tcg_regset_single(set)) {
3109                     TCGReg reg = tcg_regset_first(set);
3110                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3111 #endif
3112                 } else if (TCG_TARGET_NB_REGS <= 32) {
3113                     ne_fprintf(f, "0x%x", (uint32_t)set);
3114                 } else {
3115                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3116                 }
3117             }
3118         }
3119 
3120         putc('\n', f);
3121     }
3122 }
3123 
3124 /* we give more priority to constraints with less registers */
3125 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3126 {
3127     int n;
3128 
3129     arg_ct += k;
3130     n = ctpop64(arg_ct->regs);
3131 
3132     /*
3133      * Sort constraints of a single register first, which includes output
3134      * aliases (which must exactly match the input already allocated).
3135      */
3136     if (n == 1 || arg_ct->oalias) {
3137         return INT_MAX;
3138     }
3139 
3140     /*
3141      * Sort register pairs next, first then second immediately after.
3142      * Arbitrarily sort multiple pairs by the index of the first reg;
3143      * there shouldn't be many pairs.
3144      */
3145     switch (arg_ct->pair) {
3146     case 1:
3147     case 3:
3148         return (k + 1) * 2;
3149     case 2:
3150         return (arg_ct->pair_index + 1) * 2 - 1;
3151     }
3152 
3153     /* Finally, sort by decreasing register count. */
3154     assert(n > 1);
3155     return -n;
3156 }
3157 
3158 /* sort from highest priority to lowest */
3159 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3160 {
3161     int i, j;
3162 
3163     for (i = 0; i < n; i++) {
3164         a[start + i].sort_index = start + i;
3165     }
3166     if (n <= 1) {
3167         return;
3168     }
3169     for (i = 0; i < n - 1; i++) {
3170         for (j = i + 1; j < n; j++) {
3171             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3172             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3173             if (p1 < p2) {
3174                 int tmp = a[start + i].sort_index;
3175                 a[start + i].sort_index = a[start + j].sort_index;
3176                 a[start + j].sort_index = tmp;
3177             }
3178         }
3179     }
3180 }
3181 
3182 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3183 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3184 
3185 static void process_constraint_sets(void)
3186 {
3187     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3188         const TCGConstraintSet *tdefs = &constraint_sets[c];
3189         TCGArgConstraint *args_ct = all_cts[c];
3190         int nb_oargs = tdefs->nb_oargs;
3191         int nb_iargs = tdefs->nb_iargs;
3192         int nb_args = nb_oargs + nb_iargs;
3193         bool saw_alias_pair = false;
3194 
3195         for (int i = 0; i < nb_args; i++) {
3196             const char *ct_str = tdefs->args_ct_str[i];
3197             bool input_p = i >= nb_oargs;
3198             int o;
3199 
3200             switch (*ct_str) {
3201             case '0' ... '9':
3202                 o = *ct_str - '0';
3203                 tcg_debug_assert(input_p);
3204                 tcg_debug_assert(o < nb_oargs);
3205                 tcg_debug_assert(args_ct[o].regs != 0);
3206                 tcg_debug_assert(!args_ct[o].oalias);
3207                 args_ct[i] = args_ct[o];
3208                 /* The output sets oalias.  */
3209                 args_ct[o].oalias = 1;
3210                 args_ct[o].alias_index = i;
3211                 /* The input sets ialias. */
3212                 args_ct[i].ialias = 1;
3213                 args_ct[i].alias_index = o;
3214                 if (args_ct[i].pair) {
3215                     saw_alias_pair = true;
3216                 }
3217                 tcg_debug_assert(ct_str[1] == '\0');
3218                 continue;
3219 
3220             case '&':
3221                 tcg_debug_assert(!input_p);
3222                 args_ct[i].newreg = true;
3223                 ct_str++;
3224                 break;
3225 
3226             case 'p': /* plus */
3227                 /* Allocate to the register after the previous. */
3228                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3229                 o = i - 1;
3230                 tcg_debug_assert(!args_ct[o].pair);
3231                 tcg_debug_assert(!args_ct[o].ct);
3232                 args_ct[i] = (TCGArgConstraint){
3233                     .pair = 2,
3234                     .pair_index = o,
3235                     .regs = args_ct[o].regs << 1,
3236                     .newreg = args_ct[o].newreg,
3237                 };
3238                 args_ct[o].pair = 1;
3239                 args_ct[o].pair_index = i;
3240                 tcg_debug_assert(ct_str[1] == '\0');
3241                 continue;
3242 
3243             case 'm': /* minus */
3244                 /* Allocate to the register before the previous. */
3245                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3246                 o = i - 1;
3247                 tcg_debug_assert(!args_ct[o].pair);
3248                 tcg_debug_assert(!args_ct[o].ct);
3249                 args_ct[i] = (TCGArgConstraint){
3250                     .pair = 1,
3251                     .pair_index = o,
3252                     .regs = args_ct[o].regs >> 1,
3253                     .newreg = args_ct[o].newreg,
3254                 };
3255                 args_ct[o].pair = 2;
3256                 args_ct[o].pair_index = i;
3257                 tcg_debug_assert(ct_str[1] == '\0');
3258                 continue;
3259             }
3260 
3261             do {
3262                 switch (*ct_str) {
3263                 case 'i':
3264                     args_ct[i].ct |= TCG_CT_CONST;
3265                     break;
3266 #ifdef TCG_REG_ZERO
3267                 case 'z':
3268                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3269                     break;
3270 #endif
3271 
3272                 /* Include all of the target-specific constraints. */
3273 
3274 #undef CONST
3275 #define CONST(CASE, MASK) \
3276     case CASE: args_ct[i].ct |= MASK; break;
3277 #define REGS(CASE, MASK) \
3278     case CASE: args_ct[i].regs |= MASK; break;
3279 
3280 #include "tcg-target-con-str.h"
3281 
3282 #undef REGS
3283 #undef CONST
3284                 default:
3285                 case '0' ... '9':
3286                 case '&':
3287                 case 'p':
3288                 case 'm':
3289                     /* Typo in TCGConstraintSet constraint. */
3290                     g_assert_not_reached();
3291                 }
3292             } while (*++ct_str != '\0');
3293         }
3294 
3295         /*
3296          * Fix up output pairs that are aliased with inputs.
3297          * When we created the alias, we copied pair from the output.
3298          * There are three cases:
3299          *    (1a) Pairs of inputs alias pairs of outputs.
3300          *    (1b) One input aliases the first of a pair of outputs.
3301          *    (2)  One input aliases the second of a pair of outputs.
3302          *
3303          * Case 1a is handled by making sure that the pair_index'es are
3304          * properly updated so that they appear the same as a pair of inputs.
3305          *
3306          * Case 1b is handled by setting the pair_index of the input to
3307          * itself, simply so it doesn't point to an unrelated argument.
3308          * Since we don't encounter the "second" during the input allocation
3309          * phase, nothing happens with the second half of the input pair.
3310          *
3311          * Case 2 is handled by setting the second input to pair=3, the
3312          * first output to pair=3, and the pair_index'es to match.
3313          */
3314         if (saw_alias_pair) {
3315             for (int i = nb_oargs; i < nb_args; i++) {
3316                 int o, o2, i2;
3317 
3318                 /*
3319                  * Since [0-9pm] must be alone in the constraint string,
3320                  * the only way they can both be set is if the pair comes
3321                  * from the output alias.
3322                  */
3323                 if (!args_ct[i].ialias) {
3324                     continue;
3325                 }
3326                 switch (args_ct[i].pair) {
3327                 case 0:
3328                     break;
3329                 case 1:
3330                     o = args_ct[i].alias_index;
3331                     o2 = args_ct[o].pair_index;
3332                     tcg_debug_assert(args_ct[o].pair == 1);
3333                     tcg_debug_assert(args_ct[o2].pair == 2);
3334                     if (args_ct[o2].oalias) {
3335                         /* Case 1a */
3336                         i2 = args_ct[o2].alias_index;
3337                         tcg_debug_assert(args_ct[i2].pair == 2);
3338                         args_ct[i2].pair_index = i;
3339                         args_ct[i].pair_index = i2;
3340                     } else {
3341                         /* Case 1b */
3342                         args_ct[i].pair_index = i;
3343                     }
3344                     break;
3345                 case 2:
3346                     o = args_ct[i].alias_index;
3347                     o2 = args_ct[o].pair_index;
3348                     tcg_debug_assert(args_ct[o].pair == 2);
3349                     tcg_debug_assert(args_ct[o2].pair == 1);
3350                     if (args_ct[o2].oalias) {
3351                         /* Case 1a */
3352                         i2 = args_ct[o2].alias_index;
3353                         tcg_debug_assert(args_ct[i2].pair == 1);
3354                         args_ct[i2].pair_index = i;
3355                         args_ct[i].pair_index = i2;
3356                     } else {
3357                         /* Case 2 */
3358                         args_ct[i].pair = 3;
3359                         args_ct[o2].pair = 3;
3360                         args_ct[i].pair_index = o2;
3361                         args_ct[o2].pair_index = i;
3362                     }
3363                     break;
3364                 default:
3365                     g_assert_not_reached();
3366                 }
3367             }
3368         }
3369 
3370         /* sort the constraints (XXX: this is just an heuristic) */
3371         sort_constraints(args_ct, 0, nb_oargs);
3372         sort_constraints(args_ct, nb_oargs, nb_iargs);
3373     }
3374 }
3375 
3376 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3377 {
3378     TCGOpcode opc = op->opc;
3379     TCGType type = TCGOP_TYPE(op);
3380     unsigned flags = TCGOP_FLAGS(op);
3381     const TCGOpDef *def = &tcg_op_defs[opc];
3382     const TCGOutOp *outop = all_outop[opc];
3383     TCGConstraintSetIndex con_set;
3384 
3385     if (def->flags & TCG_OPF_NOT_PRESENT) {
3386         return empty_cts;
3387     }
3388 
3389     if (outop) {
3390         con_set = outop->static_constraint;
3391         if (con_set == C_Dynamic) {
3392             con_set = outop->dynamic_constraint(type, flags);
3393         }
3394     } else {
3395         con_set = tcg_target_op_def(opc, type, flags);
3396     }
3397     tcg_debug_assert(con_set >= 0);
3398     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3399 
3400     /* The constraint arguments must match TCGOpcode arguments. */
3401     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3402     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3403 
3404     return all_cts[con_set];
3405 }
3406 
3407 static void remove_label_use(TCGOp *op, int idx)
3408 {
3409     TCGLabel *label = arg_label(op->args[idx]);
3410     TCGLabelUse *use;
3411 
3412     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3413         if (use->op == op) {
3414             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3415             return;
3416         }
3417     }
3418     g_assert_not_reached();
3419 }
3420 
3421 void tcg_op_remove(TCGContext *s, TCGOp *op)
3422 {
3423     switch (op->opc) {
3424     case INDEX_op_br:
3425         remove_label_use(op, 0);
3426         break;
3427     case INDEX_op_brcond_i32:
3428     case INDEX_op_brcond_i64:
3429         remove_label_use(op, 3);
3430         break;
3431     case INDEX_op_brcond2_i32:
3432         remove_label_use(op, 5);
3433         break;
3434     default:
3435         break;
3436     }
3437 
3438     QTAILQ_REMOVE(&s->ops, op, link);
3439     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3440     s->nb_ops--;
3441 }
3442 
3443 void tcg_remove_ops_after(TCGOp *op)
3444 {
3445     TCGContext *s = tcg_ctx;
3446 
3447     while (true) {
3448         TCGOp *last = tcg_last_op();
3449         if (last == op) {
3450             return;
3451         }
3452         tcg_op_remove(s, last);
3453     }
3454 }
3455 
3456 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3457 {
3458     TCGContext *s = tcg_ctx;
3459     TCGOp *op = NULL;
3460 
3461     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3462         QTAILQ_FOREACH(op, &s->free_ops, link) {
3463             if (nargs <= op->nargs) {
3464                 QTAILQ_REMOVE(&s->free_ops, op, link);
3465                 nargs = op->nargs;
3466                 goto found;
3467             }
3468         }
3469     }
3470 
3471     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3472     nargs = MAX(4, nargs);
3473     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3474 
3475  found:
3476     memset(op, 0, offsetof(TCGOp, link));
3477     op->opc = opc;
3478     op->nargs = nargs;
3479 
3480     /* Check for bitfield overflow. */
3481     tcg_debug_assert(op->nargs == nargs);
3482 
3483     s->nb_ops++;
3484     return op;
3485 }
3486 
3487 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3488 {
3489     TCGOp *op = tcg_op_alloc(opc, nargs);
3490 
3491     if (tcg_ctx->emit_before_op) {
3492         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3493     } else {
3494         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3495     }
3496     return op;
3497 }
3498 
3499 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3500                             TCGOpcode opc, TCGType type, unsigned nargs)
3501 {
3502     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3503 
3504     TCGOP_TYPE(new_op) = type;
3505     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3506     return new_op;
3507 }
3508 
3509 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3510                            TCGOpcode opc, TCGType type, unsigned nargs)
3511 {
3512     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3513 
3514     TCGOP_TYPE(new_op) = type;
3515     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3516     return new_op;
3517 }
3518 
3519 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3520 {
3521     TCGLabelUse *u;
3522 
3523     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3524         TCGOp *op = u->op;
3525         switch (op->opc) {
3526         case INDEX_op_br:
3527             op->args[0] = label_arg(to);
3528             break;
3529         case INDEX_op_brcond_i32:
3530         case INDEX_op_brcond_i64:
3531             op->args[3] = label_arg(to);
3532             break;
3533         case INDEX_op_brcond2_i32:
3534             op->args[5] = label_arg(to);
3535             break;
3536         default:
3537             g_assert_not_reached();
3538         }
3539     }
3540 
3541     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3542 }
3543 
3544 /* Reachable analysis : remove unreachable code.  */
3545 static void __attribute__((noinline))
3546 reachable_code_pass(TCGContext *s)
3547 {
3548     TCGOp *op, *op_next, *op_prev;
3549     bool dead = false;
3550 
3551     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3552         bool remove = dead;
3553         TCGLabel *label;
3554 
3555         switch (op->opc) {
3556         case INDEX_op_set_label:
3557             label = arg_label(op->args[0]);
3558 
3559             /*
3560              * Note that the first op in the TB is always a load,
3561              * so there is always something before a label.
3562              */
3563             op_prev = QTAILQ_PREV(op, link);
3564 
3565             /*
3566              * If we find two sequential labels, move all branches to
3567              * reference the second label and remove the first label.
3568              * Do this before branch to next optimization, so that the
3569              * middle label is out of the way.
3570              */
3571             if (op_prev->opc == INDEX_op_set_label) {
3572                 move_label_uses(label, arg_label(op_prev->args[0]));
3573                 tcg_op_remove(s, op_prev);
3574                 op_prev = QTAILQ_PREV(op, link);
3575             }
3576 
3577             /*
3578              * Optimization can fold conditional branches to unconditional.
3579              * If we find a label which is preceded by an unconditional
3580              * branch to next, remove the branch.  We couldn't do this when
3581              * processing the branch because any dead code between the branch
3582              * and label had not yet been removed.
3583              */
3584             if (op_prev->opc == INDEX_op_br &&
3585                 label == arg_label(op_prev->args[0])) {
3586                 tcg_op_remove(s, op_prev);
3587                 /* Fall through means insns become live again.  */
3588                 dead = false;
3589             }
3590 
3591             if (QSIMPLEQ_EMPTY(&label->branches)) {
3592                 /*
3593                  * While there is an occasional backward branch, virtually
3594                  * all branches generated by the translators are forward.
3595                  * Which means that generally we will have already removed
3596                  * all references to the label that will be, and there is
3597                  * little to be gained by iterating.
3598                  */
3599                 remove = true;
3600             } else {
3601                 /* Once we see a label, insns become live again.  */
3602                 dead = false;
3603                 remove = false;
3604             }
3605             break;
3606 
3607         case INDEX_op_br:
3608         case INDEX_op_exit_tb:
3609         case INDEX_op_goto_ptr:
3610             /* Unconditional branches; everything following is dead.  */
3611             dead = true;
3612             break;
3613 
3614         case INDEX_op_call:
3615             /* Notice noreturn helper calls, raising exceptions.  */
3616             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3617                 dead = true;
3618             }
3619             break;
3620 
3621         case INDEX_op_insn_start:
3622             /* Never remove -- we need to keep these for unwind.  */
3623             remove = false;
3624             break;
3625 
3626         default:
3627             break;
3628         }
3629 
3630         if (remove) {
3631             tcg_op_remove(s, op);
3632         }
3633     }
3634 }
3635 
3636 #define TS_DEAD  1
3637 #define TS_MEM   2
3638 
3639 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3640 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3641 
3642 /* For liveness_pass_1, the register preferences for a given temp.  */
3643 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3644 {
3645     return ts->state_ptr;
3646 }
3647 
3648 /* For liveness_pass_1, reset the preferences for a given temp to the
3649  * maximal regset for its type.
3650  */
3651 static inline void la_reset_pref(TCGTemp *ts)
3652 {
3653     *la_temp_pref(ts)
3654         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3655 }
3656 
3657 /* liveness analysis: end of function: all temps are dead, and globals
3658    should be in memory. */
3659 static void la_func_end(TCGContext *s, int ng, int nt)
3660 {
3661     int i;
3662 
3663     for (i = 0; i < ng; ++i) {
3664         s->temps[i].state = TS_DEAD | TS_MEM;
3665         la_reset_pref(&s->temps[i]);
3666     }
3667     for (i = ng; i < nt; ++i) {
3668         s->temps[i].state = TS_DEAD;
3669         la_reset_pref(&s->temps[i]);
3670     }
3671 }
3672 
3673 /* liveness analysis: end of basic block: all temps are dead, globals
3674    and local temps should be in memory. */
3675 static void la_bb_end(TCGContext *s, int ng, int nt)
3676 {
3677     int i;
3678 
3679     for (i = 0; i < nt; ++i) {
3680         TCGTemp *ts = &s->temps[i];
3681         int state;
3682 
3683         switch (ts->kind) {
3684         case TEMP_FIXED:
3685         case TEMP_GLOBAL:
3686         case TEMP_TB:
3687             state = TS_DEAD | TS_MEM;
3688             break;
3689         case TEMP_EBB:
3690         case TEMP_CONST:
3691             state = TS_DEAD;
3692             break;
3693         default:
3694             g_assert_not_reached();
3695         }
3696         ts->state = state;
3697         la_reset_pref(ts);
3698     }
3699 }
3700 
3701 /* liveness analysis: sync globals back to memory.  */
3702 static void la_global_sync(TCGContext *s, int ng)
3703 {
3704     int i;
3705 
3706     for (i = 0; i < ng; ++i) {
3707         int state = s->temps[i].state;
3708         s->temps[i].state = state | TS_MEM;
3709         if (state == TS_DEAD) {
3710             /* If the global was previously dead, reset prefs.  */
3711             la_reset_pref(&s->temps[i]);
3712         }
3713     }
3714 }
3715 
3716 /*
3717  * liveness analysis: conditional branch: all temps are dead unless
3718  * explicitly live-across-conditional-branch, globals and local temps
3719  * should be synced.
3720  */
3721 static void la_bb_sync(TCGContext *s, int ng, int nt)
3722 {
3723     la_global_sync(s, ng);
3724 
3725     for (int i = ng; i < nt; ++i) {
3726         TCGTemp *ts = &s->temps[i];
3727         int state;
3728 
3729         switch (ts->kind) {
3730         case TEMP_TB:
3731             state = ts->state;
3732             ts->state = state | TS_MEM;
3733             if (state != TS_DEAD) {
3734                 continue;
3735             }
3736             break;
3737         case TEMP_EBB:
3738         case TEMP_CONST:
3739             continue;
3740         default:
3741             g_assert_not_reached();
3742         }
3743         la_reset_pref(&s->temps[i]);
3744     }
3745 }
3746 
3747 /* liveness analysis: sync globals back to memory and kill.  */
3748 static void la_global_kill(TCGContext *s, int ng)
3749 {
3750     int i;
3751 
3752     for (i = 0; i < ng; i++) {
3753         s->temps[i].state = TS_DEAD | TS_MEM;
3754         la_reset_pref(&s->temps[i]);
3755     }
3756 }
3757 
3758 /* liveness analysis: note live globals crossing calls.  */
3759 static void la_cross_call(TCGContext *s, int nt)
3760 {
3761     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3762     int i;
3763 
3764     for (i = 0; i < nt; i++) {
3765         TCGTemp *ts = &s->temps[i];
3766         if (!(ts->state & TS_DEAD)) {
3767             TCGRegSet *pset = la_temp_pref(ts);
3768             TCGRegSet set = *pset;
3769 
3770             set &= mask;
3771             /* If the combination is not possible, restart.  */
3772             if (set == 0) {
3773                 set = tcg_target_available_regs[ts->type] & mask;
3774             }
3775             *pset = set;
3776         }
3777     }
3778 }
3779 
3780 /*
3781  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3782  * to TEMP_EBB, if possible.
3783  */
3784 static void __attribute__((noinline))
3785 liveness_pass_0(TCGContext *s)
3786 {
3787     void * const multiple_ebb = (void *)(uintptr_t)-1;
3788     int nb_temps = s->nb_temps;
3789     TCGOp *op, *ebb;
3790 
3791     for (int i = s->nb_globals; i < nb_temps; ++i) {
3792         s->temps[i].state_ptr = NULL;
3793     }
3794 
3795     /*
3796      * Represent each EBB by the op at which it begins.  In the case of
3797      * the first EBB, this is the first op, otherwise it is a label.
3798      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3799      * within a single EBB, else MULTIPLE_EBB.
3800      */
3801     ebb = QTAILQ_FIRST(&s->ops);
3802     QTAILQ_FOREACH(op, &s->ops, link) {
3803         const TCGOpDef *def;
3804         int nb_oargs, nb_iargs;
3805 
3806         switch (op->opc) {
3807         case INDEX_op_set_label:
3808             ebb = op;
3809             continue;
3810         case INDEX_op_discard:
3811             continue;
3812         case INDEX_op_call:
3813             nb_oargs = TCGOP_CALLO(op);
3814             nb_iargs = TCGOP_CALLI(op);
3815             break;
3816         default:
3817             def = &tcg_op_defs[op->opc];
3818             nb_oargs = def->nb_oargs;
3819             nb_iargs = def->nb_iargs;
3820             break;
3821         }
3822 
3823         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3824             TCGTemp *ts = arg_temp(op->args[i]);
3825 
3826             if (ts->kind != TEMP_TB) {
3827                 continue;
3828             }
3829             if (ts->state_ptr == NULL) {
3830                 ts->state_ptr = ebb;
3831             } else if (ts->state_ptr != ebb) {
3832                 ts->state_ptr = multiple_ebb;
3833             }
3834         }
3835     }
3836 
3837     /*
3838      * For TEMP_TB that turned out not to be used beyond one EBB,
3839      * reduce the liveness to TEMP_EBB.
3840      */
3841     for (int i = s->nb_globals; i < nb_temps; ++i) {
3842         TCGTemp *ts = &s->temps[i];
3843         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3844             ts->kind = TEMP_EBB;
3845         }
3846     }
3847 }
3848 
3849 /* Liveness analysis : update the opc_arg_life array to tell if a
3850    given input arguments is dead. Instructions updating dead
3851    temporaries are removed. */
3852 static void __attribute__((noinline))
3853 liveness_pass_1(TCGContext *s)
3854 {
3855     int nb_globals = s->nb_globals;
3856     int nb_temps = s->nb_temps;
3857     TCGOp *op, *op_prev;
3858     TCGRegSet *prefs;
3859     int i;
3860 
3861     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3862     for (i = 0; i < nb_temps; ++i) {
3863         s->temps[i].state_ptr = prefs + i;
3864     }
3865 
3866     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3867     la_func_end(s, nb_globals, nb_temps);
3868 
3869     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3870         int nb_iargs, nb_oargs;
3871         TCGOpcode opc_new, opc_new2;
3872         bool have_opc_new2;
3873         TCGLifeData arg_life = 0;
3874         TCGTemp *ts;
3875         TCGOpcode opc = op->opc;
3876         const TCGOpDef *def = &tcg_op_defs[opc];
3877         const TCGArgConstraint *args_ct;
3878 
3879         switch (opc) {
3880         case INDEX_op_call:
3881             {
3882                 const TCGHelperInfo *info = tcg_call_info(op);
3883                 int call_flags = tcg_call_flags(op);
3884 
3885                 nb_oargs = TCGOP_CALLO(op);
3886                 nb_iargs = TCGOP_CALLI(op);
3887 
3888                 /* pure functions can be removed if their result is unused */
3889                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3890                     for (i = 0; i < nb_oargs; i++) {
3891                         ts = arg_temp(op->args[i]);
3892                         if (ts->state != TS_DEAD) {
3893                             goto do_not_remove_call;
3894                         }
3895                     }
3896                     goto do_remove;
3897                 }
3898             do_not_remove_call:
3899 
3900                 /* Output args are dead.  */
3901                 for (i = 0; i < nb_oargs; i++) {
3902                     ts = arg_temp(op->args[i]);
3903                     if (ts->state & TS_DEAD) {
3904                         arg_life |= DEAD_ARG << i;
3905                     }
3906                     if (ts->state & TS_MEM) {
3907                         arg_life |= SYNC_ARG << i;
3908                     }
3909                     ts->state = TS_DEAD;
3910                     la_reset_pref(ts);
3911                 }
3912 
3913                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3914                 memset(op->output_pref, 0, sizeof(op->output_pref));
3915 
3916                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3917                                     TCG_CALL_NO_READ_GLOBALS))) {
3918                     la_global_kill(s, nb_globals);
3919                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3920                     la_global_sync(s, nb_globals);
3921                 }
3922 
3923                 /* Record arguments that die in this helper.  */
3924                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3925                     ts = arg_temp(op->args[i]);
3926                     if (ts->state & TS_DEAD) {
3927                         arg_life |= DEAD_ARG << i;
3928                     }
3929                 }
3930 
3931                 /* For all live registers, remove call-clobbered prefs.  */
3932                 la_cross_call(s, nb_temps);
3933 
3934                 /*
3935                  * Input arguments are live for preceding opcodes.
3936                  *
3937                  * For those arguments that die, and will be allocated in
3938                  * registers, clear the register set for that arg, to be
3939                  * filled in below.  For args that will be on the stack,
3940                  * reset to any available reg.  Process arguments in reverse
3941                  * order so that if a temp is used more than once, the stack
3942                  * reset to max happens before the register reset to 0.
3943                  */
3944                 for (i = nb_iargs - 1; i >= 0; i--) {
3945                     const TCGCallArgumentLoc *loc = &info->in[i];
3946                     ts = arg_temp(op->args[nb_oargs + i]);
3947 
3948                     if (ts->state & TS_DEAD) {
3949                         switch (loc->kind) {
3950                         case TCG_CALL_ARG_NORMAL:
3951                         case TCG_CALL_ARG_EXTEND_U:
3952                         case TCG_CALL_ARG_EXTEND_S:
3953                             if (arg_slot_reg_p(loc->arg_slot)) {
3954                                 *la_temp_pref(ts) = 0;
3955                                 break;
3956                             }
3957                             /* fall through */
3958                         default:
3959                             *la_temp_pref(ts) =
3960                                 tcg_target_available_regs[ts->type];
3961                             break;
3962                         }
3963                         ts->state &= ~TS_DEAD;
3964                     }
3965                 }
3966 
3967                 /*
3968                  * For each input argument, add its input register to prefs.
3969                  * If a temp is used once, this produces a single set bit;
3970                  * if a temp is used multiple times, this produces a set.
3971                  */
3972                 for (i = 0; i < nb_iargs; i++) {
3973                     const TCGCallArgumentLoc *loc = &info->in[i];
3974                     ts = arg_temp(op->args[nb_oargs + i]);
3975 
3976                     switch (loc->kind) {
3977                     case TCG_CALL_ARG_NORMAL:
3978                     case TCG_CALL_ARG_EXTEND_U:
3979                     case TCG_CALL_ARG_EXTEND_S:
3980                         if (arg_slot_reg_p(loc->arg_slot)) {
3981                             tcg_regset_set_reg(*la_temp_pref(ts),
3982                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3983                         }
3984                         break;
3985                     default:
3986                         break;
3987                     }
3988                 }
3989             }
3990             break;
3991         case INDEX_op_insn_start:
3992             break;
3993         case INDEX_op_discard:
3994             /* mark the temporary as dead */
3995             ts = arg_temp(op->args[0]);
3996             ts->state = TS_DEAD;
3997             la_reset_pref(ts);
3998             break;
3999 
4000         case INDEX_op_add2_i32:
4001         case INDEX_op_add2_i64:
4002             opc_new = INDEX_op_add;
4003             goto do_addsub2;
4004         case INDEX_op_sub2_i32:
4005             opc_new = INDEX_op_sub_i32;
4006             goto do_addsub2;
4007         case INDEX_op_sub2_i64:
4008             opc_new = INDEX_op_sub_i64;
4009         do_addsub2:
4010             nb_iargs = 4;
4011             nb_oargs = 2;
4012             /* Test if the high part of the operation is dead, but not
4013                the low part.  The result can be optimized to a simple
4014                add or sub.  This happens often for x86_64 guest when the
4015                cpu mode is set to 32 bit.  */
4016             if (arg_temp(op->args[1])->state == TS_DEAD) {
4017                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4018                     goto do_remove;
4019                 }
4020                 /* Replace the opcode and adjust the args in place,
4021                    leaving 3 unused args at the end.  */
4022                 op->opc = opc = opc_new;
4023                 op->args[1] = op->args[2];
4024                 op->args[2] = op->args[4];
4025                 /* Fall through and mark the single-word operation live.  */
4026                 nb_iargs = 2;
4027                 nb_oargs = 1;
4028             }
4029             goto do_not_remove;
4030 
4031         case INDEX_op_mulu2_i32:
4032             opc_new = INDEX_op_mul_i32;
4033             opc_new2 = INDEX_op_muluh_i32;
4034             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
4035             goto do_mul2;
4036         case INDEX_op_muls2_i32:
4037             opc_new = INDEX_op_mul_i32;
4038             opc_new2 = INDEX_op_mulsh_i32;
4039             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
4040             goto do_mul2;
4041         case INDEX_op_mulu2_i64:
4042             opc_new = INDEX_op_mul_i64;
4043             opc_new2 = INDEX_op_muluh_i64;
4044             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
4045             goto do_mul2;
4046         case INDEX_op_muls2_i64:
4047             opc_new = INDEX_op_mul_i64;
4048             opc_new2 = INDEX_op_mulsh_i64;
4049             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
4050             goto do_mul2;
4051         do_mul2:
4052             nb_iargs = 2;
4053             nb_oargs = 2;
4054             if (arg_temp(op->args[1])->state == TS_DEAD) {
4055                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4056                     /* Both parts of the operation are dead.  */
4057                     goto do_remove;
4058                 }
4059                 /* The high part of the operation is dead; generate the low. */
4060                 op->opc = opc = opc_new;
4061                 op->args[1] = op->args[2];
4062                 op->args[2] = op->args[3];
4063             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
4064                 /* The low part of the operation is dead; generate the high. */
4065                 op->opc = opc = opc_new2;
4066                 op->args[0] = op->args[1];
4067                 op->args[1] = op->args[2];
4068                 op->args[2] = op->args[3];
4069             } else {
4070                 goto do_not_remove;
4071             }
4072             /* Mark the single-word operation live.  */
4073             nb_oargs = 1;
4074             goto do_not_remove;
4075 
4076         default:
4077             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4078             nb_iargs = def->nb_iargs;
4079             nb_oargs = def->nb_oargs;
4080 
4081             /* Test if the operation can be removed because all
4082                its outputs are dead. We assume that nb_oargs == 0
4083                implies side effects */
4084             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4085                 for (i = 0; i < nb_oargs; i++) {
4086                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4087                         goto do_not_remove;
4088                     }
4089                 }
4090                 goto do_remove;
4091             }
4092             goto do_not_remove;
4093 
4094         do_remove:
4095             tcg_op_remove(s, op);
4096             break;
4097 
4098         do_not_remove:
4099             for (i = 0; i < nb_oargs; i++) {
4100                 ts = arg_temp(op->args[i]);
4101 
4102                 /* Remember the preference of the uses that followed.  */
4103                 if (i < ARRAY_SIZE(op->output_pref)) {
4104                     op->output_pref[i] = *la_temp_pref(ts);
4105                 }
4106 
4107                 /* Output args are dead.  */
4108                 if (ts->state & TS_DEAD) {
4109                     arg_life |= DEAD_ARG << i;
4110                 }
4111                 if (ts->state & TS_MEM) {
4112                     arg_life |= SYNC_ARG << i;
4113                 }
4114                 ts->state = TS_DEAD;
4115                 la_reset_pref(ts);
4116             }
4117 
4118             /* If end of basic block, update.  */
4119             if (def->flags & TCG_OPF_BB_EXIT) {
4120                 la_func_end(s, nb_globals, nb_temps);
4121             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4122                 la_bb_sync(s, nb_globals, nb_temps);
4123             } else if (def->flags & TCG_OPF_BB_END) {
4124                 la_bb_end(s, nb_globals, nb_temps);
4125             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4126                 la_global_sync(s, nb_globals);
4127                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4128                     la_cross_call(s, nb_temps);
4129                 }
4130             }
4131 
4132             /* Record arguments that die in this opcode.  */
4133             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4134                 ts = arg_temp(op->args[i]);
4135                 if (ts->state & TS_DEAD) {
4136                     arg_life |= DEAD_ARG << i;
4137                 }
4138             }
4139 
4140             /* Input arguments are live for preceding opcodes.  */
4141             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4142                 ts = arg_temp(op->args[i]);
4143                 if (ts->state & TS_DEAD) {
4144                     /* For operands that were dead, initially allow
4145                        all regs for the type.  */
4146                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4147                     ts->state &= ~TS_DEAD;
4148                 }
4149             }
4150 
4151             /* Incorporate constraints for this operand.  */
4152             switch (opc) {
4153             case INDEX_op_mov:
4154                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4155                    have proper constraints.  That said, special case
4156                    moves to propagate preferences backward.  */
4157                 if (IS_DEAD_ARG(1)) {
4158                     *la_temp_pref(arg_temp(op->args[0]))
4159                         = *la_temp_pref(arg_temp(op->args[1]));
4160                 }
4161                 break;
4162 
4163             default:
4164                 args_ct = opcode_args_ct(op);
4165                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4166                     const TCGArgConstraint *ct = &args_ct[i];
4167                     TCGRegSet set, *pset;
4168 
4169                     ts = arg_temp(op->args[i]);
4170                     pset = la_temp_pref(ts);
4171                     set = *pset;
4172 
4173                     set &= ct->regs;
4174                     if (ct->ialias) {
4175                         set &= output_pref(op, ct->alias_index);
4176                     }
4177                     /* If the combination is not possible, restart.  */
4178                     if (set == 0) {
4179                         set = ct->regs;
4180                     }
4181                     *pset = set;
4182                 }
4183                 break;
4184             }
4185             break;
4186         }
4187         op->life = arg_life;
4188     }
4189 }
4190 
4191 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4192 static bool __attribute__((noinline))
4193 liveness_pass_2(TCGContext *s)
4194 {
4195     int nb_globals = s->nb_globals;
4196     int nb_temps, i;
4197     bool changes = false;
4198     TCGOp *op, *op_next;
4199 
4200     /* Create a temporary for each indirect global.  */
4201     for (i = 0; i < nb_globals; ++i) {
4202         TCGTemp *its = &s->temps[i];
4203         if (its->indirect_reg) {
4204             TCGTemp *dts = tcg_temp_alloc(s);
4205             dts->type = its->type;
4206             dts->base_type = its->base_type;
4207             dts->temp_subindex = its->temp_subindex;
4208             dts->kind = TEMP_EBB;
4209             its->state_ptr = dts;
4210         } else {
4211             its->state_ptr = NULL;
4212         }
4213         /* All globals begin dead.  */
4214         its->state = TS_DEAD;
4215     }
4216     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4217         TCGTemp *its = &s->temps[i];
4218         its->state_ptr = NULL;
4219         its->state = TS_DEAD;
4220     }
4221 
4222     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4223         TCGOpcode opc = op->opc;
4224         const TCGOpDef *def = &tcg_op_defs[opc];
4225         TCGLifeData arg_life = op->life;
4226         int nb_iargs, nb_oargs, call_flags;
4227         TCGTemp *arg_ts, *dir_ts;
4228 
4229         if (opc == INDEX_op_call) {
4230             nb_oargs = TCGOP_CALLO(op);
4231             nb_iargs = TCGOP_CALLI(op);
4232             call_flags = tcg_call_flags(op);
4233         } else {
4234             nb_iargs = def->nb_iargs;
4235             nb_oargs = def->nb_oargs;
4236 
4237             /* Set flags similar to how calls require.  */
4238             if (def->flags & TCG_OPF_COND_BRANCH) {
4239                 /* Like reading globals: sync_globals */
4240                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4241             } else if (def->flags & TCG_OPF_BB_END) {
4242                 /* Like writing globals: save_globals */
4243                 call_flags = 0;
4244             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4245                 /* Like reading globals: sync_globals */
4246                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4247             } else {
4248                 /* No effect on globals.  */
4249                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4250                               TCG_CALL_NO_WRITE_GLOBALS);
4251             }
4252         }
4253 
4254         /* Make sure that input arguments are available.  */
4255         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4256             arg_ts = arg_temp(op->args[i]);
4257             dir_ts = arg_ts->state_ptr;
4258             if (dir_ts && arg_ts->state == TS_DEAD) {
4259                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4260                                   ? INDEX_op_ld_i32
4261                                   : INDEX_op_ld_i64);
4262                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4263                                                   arg_ts->type, 3);
4264 
4265                 lop->args[0] = temp_arg(dir_ts);
4266                 lop->args[1] = temp_arg(arg_ts->mem_base);
4267                 lop->args[2] = arg_ts->mem_offset;
4268 
4269                 /* Loaded, but synced with memory.  */
4270                 arg_ts->state = TS_MEM;
4271             }
4272         }
4273 
4274         /* Perform input replacement, and mark inputs that became dead.
4275            No action is required except keeping temp_state up to date
4276            so that we reload when needed.  */
4277         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4278             arg_ts = arg_temp(op->args[i]);
4279             dir_ts = arg_ts->state_ptr;
4280             if (dir_ts) {
4281                 op->args[i] = temp_arg(dir_ts);
4282                 changes = true;
4283                 if (IS_DEAD_ARG(i)) {
4284                     arg_ts->state = TS_DEAD;
4285                 }
4286             }
4287         }
4288 
4289         /* Liveness analysis should ensure that the following are
4290            all correct, for call sites and basic block end points.  */
4291         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4292             /* Nothing to do */
4293         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4294             for (i = 0; i < nb_globals; ++i) {
4295                 /* Liveness should see that globals are synced back,
4296                    that is, either TS_DEAD or TS_MEM.  */
4297                 arg_ts = &s->temps[i];
4298                 tcg_debug_assert(arg_ts->state_ptr == 0
4299                                  || arg_ts->state != 0);
4300             }
4301         } else {
4302             for (i = 0; i < nb_globals; ++i) {
4303                 /* Liveness should see that globals are saved back,
4304                    that is, TS_DEAD, waiting to be reloaded.  */
4305                 arg_ts = &s->temps[i];
4306                 tcg_debug_assert(arg_ts->state_ptr == 0
4307                                  || arg_ts->state == TS_DEAD);
4308             }
4309         }
4310 
4311         /* Outputs become available.  */
4312         if (opc == INDEX_op_mov) {
4313             arg_ts = arg_temp(op->args[0]);
4314             dir_ts = arg_ts->state_ptr;
4315             if (dir_ts) {
4316                 op->args[0] = temp_arg(dir_ts);
4317                 changes = true;
4318 
4319                 /* The output is now live and modified.  */
4320                 arg_ts->state = 0;
4321 
4322                 if (NEED_SYNC_ARG(0)) {
4323                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4324                                       ? INDEX_op_st_i32
4325                                       : INDEX_op_st_i64);
4326                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4327                                                      arg_ts->type, 3);
4328                     TCGTemp *out_ts = dir_ts;
4329 
4330                     if (IS_DEAD_ARG(0)) {
4331                         out_ts = arg_temp(op->args[1]);
4332                         arg_ts->state = TS_DEAD;
4333                         tcg_op_remove(s, op);
4334                     } else {
4335                         arg_ts->state = TS_MEM;
4336                     }
4337 
4338                     sop->args[0] = temp_arg(out_ts);
4339                     sop->args[1] = temp_arg(arg_ts->mem_base);
4340                     sop->args[2] = arg_ts->mem_offset;
4341                 } else {
4342                     tcg_debug_assert(!IS_DEAD_ARG(0));
4343                 }
4344             }
4345         } else {
4346             for (i = 0; i < nb_oargs; i++) {
4347                 arg_ts = arg_temp(op->args[i]);
4348                 dir_ts = arg_ts->state_ptr;
4349                 if (!dir_ts) {
4350                     continue;
4351                 }
4352                 op->args[i] = temp_arg(dir_ts);
4353                 changes = true;
4354 
4355                 /* The output is now live and modified.  */
4356                 arg_ts->state = 0;
4357 
4358                 /* Sync outputs upon their last write.  */
4359                 if (NEED_SYNC_ARG(i)) {
4360                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4361                                       ? INDEX_op_st_i32
4362                                       : INDEX_op_st_i64);
4363                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4364                                                      arg_ts->type, 3);
4365 
4366                     sop->args[0] = temp_arg(dir_ts);
4367                     sop->args[1] = temp_arg(arg_ts->mem_base);
4368                     sop->args[2] = arg_ts->mem_offset;
4369 
4370                     arg_ts->state = TS_MEM;
4371                 }
4372                 /* Drop outputs that are dead.  */
4373                 if (IS_DEAD_ARG(i)) {
4374                     arg_ts->state = TS_DEAD;
4375                 }
4376             }
4377         }
4378     }
4379 
4380     return changes;
4381 }
4382 
4383 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4384 {
4385     intptr_t off;
4386     int size, align;
4387 
4388     /* When allocating an object, look at the full type. */
4389     size = tcg_type_size(ts->base_type);
4390     switch (ts->base_type) {
4391     case TCG_TYPE_I32:
4392         align = 4;
4393         break;
4394     case TCG_TYPE_I64:
4395     case TCG_TYPE_V64:
4396         align = 8;
4397         break;
4398     case TCG_TYPE_I128:
4399     case TCG_TYPE_V128:
4400     case TCG_TYPE_V256:
4401         /*
4402          * Note that we do not require aligned storage for V256,
4403          * and that we provide alignment for I128 to match V128,
4404          * even if that's above what the host ABI requires.
4405          */
4406         align = 16;
4407         break;
4408     default:
4409         g_assert_not_reached();
4410     }
4411 
4412     /*
4413      * Assume the stack is sufficiently aligned.
4414      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4415      * and do not require 16 byte vector alignment.  This seems slightly
4416      * easier than fully parameterizing the above switch statement.
4417      */
4418     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4419     off = ROUND_UP(s->current_frame_offset, align);
4420 
4421     /* If we've exhausted the stack frame, restart with a smaller TB. */
4422     if (off + size > s->frame_end) {
4423         tcg_raise_tb_overflow(s);
4424     }
4425     s->current_frame_offset = off + size;
4426 #if defined(__sparc__)
4427     off += TCG_TARGET_STACK_BIAS;
4428 #endif
4429 
4430     /* If the object was subdivided, assign memory to all the parts. */
4431     if (ts->base_type != ts->type) {
4432         int part_size = tcg_type_size(ts->type);
4433         int part_count = size / part_size;
4434 
4435         /*
4436          * Each part is allocated sequentially in tcg_temp_new_internal.
4437          * Jump back to the first part by subtracting the current index.
4438          */
4439         ts -= ts->temp_subindex;
4440         for (int i = 0; i < part_count; ++i) {
4441             ts[i].mem_offset = off + i * part_size;
4442             ts[i].mem_base = s->frame_temp;
4443             ts[i].mem_allocated = 1;
4444         }
4445     } else {
4446         ts->mem_offset = off;
4447         ts->mem_base = s->frame_temp;
4448         ts->mem_allocated = 1;
4449     }
4450 }
4451 
4452 /* Assign @reg to @ts, and update reg_to_temp[]. */
4453 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4454 {
4455     if (ts->val_type == TEMP_VAL_REG) {
4456         TCGReg old = ts->reg;
4457         tcg_debug_assert(s->reg_to_temp[old] == ts);
4458         if (old == reg) {
4459             return;
4460         }
4461         s->reg_to_temp[old] = NULL;
4462     }
4463     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4464     s->reg_to_temp[reg] = ts;
4465     ts->val_type = TEMP_VAL_REG;
4466     ts->reg = reg;
4467 }
4468 
4469 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4470 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4471 {
4472     tcg_debug_assert(type != TEMP_VAL_REG);
4473     if (ts->val_type == TEMP_VAL_REG) {
4474         TCGReg reg = ts->reg;
4475         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4476         s->reg_to_temp[reg] = NULL;
4477     }
4478     ts->val_type = type;
4479 }
4480 
4481 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4482 
4483 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4484    mark it free; otherwise mark it dead.  */
4485 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4486 {
4487     TCGTempVal new_type;
4488 
4489     switch (ts->kind) {
4490     case TEMP_FIXED:
4491         return;
4492     case TEMP_GLOBAL:
4493     case TEMP_TB:
4494         new_type = TEMP_VAL_MEM;
4495         break;
4496     case TEMP_EBB:
4497         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4498         break;
4499     case TEMP_CONST:
4500         new_type = TEMP_VAL_CONST;
4501         break;
4502     default:
4503         g_assert_not_reached();
4504     }
4505     set_temp_val_nonreg(s, ts, new_type);
4506 }
4507 
4508 /* Mark a temporary as dead.  */
4509 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4510 {
4511     temp_free_or_dead(s, ts, 1);
4512 }
4513 
4514 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4515    registers needs to be allocated to store a constant.  If 'free_or_dead'
4516    is non-zero, subsequently release the temporary; if it is positive, the
4517    temp is dead; if it is negative, the temp is free.  */
4518 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4519                       TCGRegSet preferred_regs, int free_or_dead)
4520 {
4521     if (!temp_readonly(ts) && !ts->mem_coherent) {
4522         if (!ts->mem_allocated) {
4523             temp_allocate_frame(s, ts);
4524         }
4525         switch (ts->val_type) {
4526         case TEMP_VAL_CONST:
4527             /* If we're going to free the temp immediately, then we won't
4528                require it later in a register, so attempt to store the
4529                constant to memory directly.  */
4530             if (free_or_dead
4531                 && tcg_out_sti(s, ts->type, ts->val,
4532                                ts->mem_base->reg, ts->mem_offset)) {
4533                 break;
4534             }
4535             temp_load(s, ts, tcg_target_available_regs[ts->type],
4536                       allocated_regs, preferred_regs);
4537             /* fallthrough */
4538 
4539         case TEMP_VAL_REG:
4540             tcg_out_st(s, ts->type, ts->reg,
4541                        ts->mem_base->reg, ts->mem_offset);
4542             break;
4543 
4544         case TEMP_VAL_MEM:
4545             break;
4546 
4547         case TEMP_VAL_DEAD:
4548         default:
4549             g_assert_not_reached();
4550         }
4551         ts->mem_coherent = 1;
4552     }
4553     if (free_or_dead) {
4554         temp_free_or_dead(s, ts, free_or_dead);
4555     }
4556 }
4557 
4558 /* free register 'reg' by spilling the corresponding temporary if necessary */
4559 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4560 {
4561     TCGTemp *ts = s->reg_to_temp[reg];
4562     if (ts != NULL) {
4563         temp_sync(s, ts, allocated_regs, 0, -1);
4564     }
4565 }
4566 
4567 /**
4568  * tcg_reg_alloc:
4569  * @required_regs: Set of registers in which we must allocate.
4570  * @allocated_regs: Set of registers which must be avoided.
4571  * @preferred_regs: Set of registers we should prefer.
4572  * @rev: True if we search the registers in "indirect" order.
4573  *
4574  * The allocated register must be in @required_regs & ~@allocated_regs,
4575  * but if we can put it in @preferred_regs we may save a move later.
4576  */
4577 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4578                             TCGRegSet allocated_regs,
4579                             TCGRegSet preferred_regs, bool rev)
4580 {
4581     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4582     TCGRegSet reg_ct[2];
4583     const int *order;
4584 
4585     reg_ct[1] = required_regs & ~allocated_regs;
4586     tcg_debug_assert(reg_ct[1] != 0);
4587     reg_ct[0] = reg_ct[1] & preferred_regs;
4588 
4589     /* Skip the preferred_regs option if it cannot be satisfied,
4590        or if the preference made no difference.  */
4591     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4592 
4593     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4594 
4595     /* Try free registers, preferences first.  */
4596     for (j = f; j < 2; j++) {
4597         TCGRegSet set = reg_ct[j];
4598 
4599         if (tcg_regset_single(set)) {
4600             /* One register in the set.  */
4601             TCGReg reg = tcg_regset_first(set);
4602             if (s->reg_to_temp[reg] == NULL) {
4603                 return reg;
4604             }
4605         } else {
4606             for (i = 0; i < n; i++) {
4607                 TCGReg reg = order[i];
4608                 if (s->reg_to_temp[reg] == NULL &&
4609                     tcg_regset_test_reg(set, reg)) {
4610                     return reg;
4611                 }
4612             }
4613         }
4614     }
4615 
4616     /* We must spill something.  */
4617     for (j = f; j < 2; j++) {
4618         TCGRegSet set = reg_ct[j];
4619 
4620         if (tcg_regset_single(set)) {
4621             /* One register in the set.  */
4622             TCGReg reg = tcg_regset_first(set);
4623             tcg_reg_free(s, reg, allocated_regs);
4624             return reg;
4625         } else {
4626             for (i = 0; i < n; i++) {
4627                 TCGReg reg = order[i];
4628                 if (tcg_regset_test_reg(set, reg)) {
4629                     tcg_reg_free(s, reg, allocated_regs);
4630                     return reg;
4631                 }
4632             }
4633         }
4634     }
4635 
4636     g_assert_not_reached();
4637 }
4638 
4639 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4640                                  TCGRegSet allocated_regs,
4641                                  TCGRegSet preferred_regs, bool rev)
4642 {
4643     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4644     TCGRegSet reg_ct[2];
4645     const int *order;
4646 
4647     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4648     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4649     tcg_debug_assert(reg_ct[1] != 0);
4650     reg_ct[0] = reg_ct[1] & preferred_regs;
4651 
4652     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4653 
4654     /*
4655      * Skip the preferred_regs option if it cannot be satisfied,
4656      * or if the preference made no difference.
4657      */
4658     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4659 
4660     /*
4661      * Minimize the number of flushes by looking for 2 free registers first,
4662      * then a single flush, then two flushes.
4663      */
4664     for (fmin = 2; fmin >= 0; fmin--) {
4665         for (j = k; j < 2; j++) {
4666             TCGRegSet set = reg_ct[j];
4667 
4668             for (i = 0; i < n; i++) {
4669                 TCGReg reg = order[i];
4670 
4671                 if (tcg_regset_test_reg(set, reg)) {
4672                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4673                     if (f >= fmin) {
4674                         tcg_reg_free(s, reg, allocated_regs);
4675                         tcg_reg_free(s, reg + 1, allocated_regs);
4676                         return reg;
4677                     }
4678                 }
4679             }
4680         }
4681     }
4682     g_assert_not_reached();
4683 }
4684 
4685 /* Make sure the temporary is in a register.  If needed, allocate the register
4686    from DESIRED while avoiding ALLOCATED.  */
4687 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4688                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4689 {
4690     TCGReg reg;
4691 
4692     switch (ts->val_type) {
4693     case TEMP_VAL_REG:
4694         return;
4695     case TEMP_VAL_CONST:
4696         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4697                             preferred_regs, ts->indirect_base);
4698         if (ts->type <= TCG_TYPE_I64) {
4699             tcg_out_movi(s, ts->type, reg, ts->val);
4700         } else {
4701             uint64_t val = ts->val;
4702             MemOp vece = MO_64;
4703 
4704             /*
4705              * Find the minimal vector element that matches the constant.
4706              * The targets will, in general, have to do this search anyway,
4707              * do this generically.
4708              */
4709             if (val == dup_const(MO_8, val)) {
4710                 vece = MO_8;
4711             } else if (val == dup_const(MO_16, val)) {
4712                 vece = MO_16;
4713             } else if (val == dup_const(MO_32, val)) {
4714                 vece = MO_32;
4715             }
4716 
4717             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4718         }
4719         ts->mem_coherent = 0;
4720         break;
4721     case TEMP_VAL_MEM:
4722         if (!ts->mem_allocated) {
4723             temp_allocate_frame(s, ts);
4724         }
4725         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4726                             preferred_regs, ts->indirect_base);
4727         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4728         ts->mem_coherent = 1;
4729         break;
4730     case TEMP_VAL_DEAD:
4731     default:
4732         g_assert_not_reached();
4733     }
4734     set_temp_val_reg(s, ts, reg);
4735 }
4736 
4737 /* Save a temporary to memory. 'allocated_regs' is used in case a
4738    temporary registers needs to be allocated to store a constant.  */
4739 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4740 {
4741     /* The liveness analysis already ensures that globals are back
4742        in memory. Keep an tcg_debug_assert for safety. */
4743     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4744 }
4745 
4746 /* save globals to their canonical location and assume they can be
4747    modified be the following code. 'allocated_regs' is used in case a
4748    temporary registers needs to be allocated to store a constant. */
4749 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4750 {
4751     int i, n;
4752 
4753     for (i = 0, n = s->nb_globals; i < n; i++) {
4754         temp_save(s, &s->temps[i], allocated_regs);
4755     }
4756 }
4757 
4758 /* sync globals to their canonical location and assume they can be
4759    read by the following code. 'allocated_regs' is used in case a
4760    temporary registers needs to be allocated to store a constant. */
4761 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4762 {
4763     int i, n;
4764 
4765     for (i = 0, n = s->nb_globals; i < n; i++) {
4766         TCGTemp *ts = &s->temps[i];
4767         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4768                          || ts->kind == TEMP_FIXED
4769                          || ts->mem_coherent);
4770     }
4771 }
4772 
4773 /* at the end of a basic block, we assume all temporaries are dead and
4774    all globals are stored at their canonical location. */
4775 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4776 {
4777     int i;
4778 
4779     for (i = s->nb_globals; i < s->nb_temps; i++) {
4780         TCGTemp *ts = &s->temps[i];
4781 
4782         switch (ts->kind) {
4783         case TEMP_TB:
4784             temp_save(s, ts, allocated_regs);
4785             break;
4786         case TEMP_EBB:
4787             /* The liveness analysis already ensures that temps are dead.
4788                Keep an tcg_debug_assert for safety. */
4789             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4790             break;
4791         case TEMP_CONST:
4792             /* Similarly, we should have freed any allocated register. */
4793             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4794             break;
4795         default:
4796             g_assert_not_reached();
4797         }
4798     }
4799 
4800     save_globals(s, allocated_regs);
4801 }
4802 
4803 /*
4804  * At a conditional branch, we assume all temporaries are dead unless
4805  * explicitly live-across-conditional-branch; all globals and local
4806  * temps are synced to their location.
4807  */
4808 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4809 {
4810     sync_globals(s, allocated_regs);
4811 
4812     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4813         TCGTemp *ts = &s->temps[i];
4814         /*
4815          * The liveness analysis already ensures that temps are dead.
4816          * Keep tcg_debug_asserts for safety.
4817          */
4818         switch (ts->kind) {
4819         case TEMP_TB:
4820             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4821             break;
4822         case TEMP_EBB:
4823         case TEMP_CONST:
4824             break;
4825         default:
4826             g_assert_not_reached();
4827         }
4828     }
4829 }
4830 
4831 /*
4832  * Specialized code generation for INDEX_op_mov_* with a constant.
4833  */
4834 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4835                                   tcg_target_ulong val, TCGLifeData arg_life,
4836                                   TCGRegSet preferred_regs)
4837 {
4838     /* ENV should not be modified.  */
4839     tcg_debug_assert(!temp_readonly(ots));
4840 
4841     /* The movi is not explicitly generated here.  */
4842     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4843     ots->val = val;
4844     ots->mem_coherent = 0;
4845     if (NEED_SYNC_ARG(0)) {
4846         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4847     } else if (IS_DEAD_ARG(0)) {
4848         temp_dead(s, ots);
4849     }
4850 }
4851 
4852 /*
4853  * Specialized code generation for INDEX_op_mov_*.
4854  */
4855 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4856 {
4857     const TCGLifeData arg_life = op->life;
4858     TCGRegSet allocated_regs, preferred_regs;
4859     TCGTemp *ts, *ots;
4860     TCGType otype, itype;
4861     TCGReg oreg, ireg;
4862 
4863     allocated_regs = s->reserved_regs;
4864     preferred_regs = output_pref(op, 0);
4865     ots = arg_temp(op->args[0]);
4866     ts = arg_temp(op->args[1]);
4867 
4868     /* ENV should not be modified.  */
4869     tcg_debug_assert(!temp_readonly(ots));
4870 
4871     /* Note that otype != itype for no-op truncation.  */
4872     otype = ots->type;
4873     itype = ts->type;
4874 
4875     if (ts->val_type == TEMP_VAL_CONST) {
4876         /* propagate constant or generate sti */
4877         tcg_target_ulong val = ts->val;
4878         if (IS_DEAD_ARG(1)) {
4879             temp_dead(s, ts);
4880         }
4881         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4882         return;
4883     }
4884 
4885     /* If the source value is in memory we're going to be forced
4886        to have it in a register in order to perform the copy.  Copy
4887        the SOURCE value into its own register first, that way we
4888        don't have to reload SOURCE the next time it is used. */
4889     if (ts->val_type == TEMP_VAL_MEM) {
4890         temp_load(s, ts, tcg_target_available_regs[itype],
4891                   allocated_regs, preferred_regs);
4892     }
4893     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4894     ireg = ts->reg;
4895 
4896     if (IS_DEAD_ARG(0)) {
4897         /* mov to a non-saved dead register makes no sense (even with
4898            liveness analysis disabled). */
4899         tcg_debug_assert(NEED_SYNC_ARG(0));
4900         if (!ots->mem_allocated) {
4901             temp_allocate_frame(s, ots);
4902         }
4903         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4904         if (IS_DEAD_ARG(1)) {
4905             temp_dead(s, ts);
4906         }
4907         temp_dead(s, ots);
4908         return;
4909     }
4910 
4911     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4912         /*
4913          * The mov can be suppressed.  Kill input first, so that it
4914          * is unlinked from reg_to_temp, then set the output to the
4915          * reg that we saved from the input.
4916          */
4917         temp_dead(s, ts);
4918         oreg = ireg;
4919     } else {
4920         if (ots->val_type == TEMP_VAL_REG) {
4921             oreg = ots->reg;
4922         } else {
4923             /* Make sure to not spill the input register during allocation. */
4924             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4925                                  allocated_regs | ((TCGRegSet)1 << ireg),
4926                                  preferred_regs, ots->indirect_base);
4927         }
4928         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4929             /*
4930              * Cross register class move not supported.
4931              * Store the source register into the destination slot
4932              * and leave the destination temp as TEMP_VAL_MEM.
4933              */
4934             assert(!temp_readonly(ots));
4935             if (!ts->mem_allocated) {
4936                 temp_allocate_frame(s, ots);
4937             }
4938             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4939             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4940             ots->mem_coherent = 1;
4941             return;
4942         }
4943     }
4944     set_temp_val_reg(s, ots, oreg);
4945     ots->mem_coherent = 0;
4946 
4947     if (NEED_SYNC_ARG(0)) {
4948         temp_sync(s, ots, allocated_regs, 0, 0);
4949     }
4950 }
4951 
4952 /*
4953  * Specialized code generation for INDEX_op_dup_vec.
4954  */
4955 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4956 {
4957     const TCGLifeData arg_life = op->life;
4958     TCGRegSet dup_out_regs, dup_in_regs;
4959     const TCGArgConstraint *dup_args_ct;
4960     TCGTemp *its, *ots;
4961     TCGType itype, vtype;
4962     unsigned vece;
4963     int lowpart_ofs;
4964     bool ok;
4965 
4966     ots = arg_temp(op->args[0]);
4967     its = arg_temp(op->args[1]);
4968 
4969     /* ENV should not be modified.  */
4970     tcg_debug_assert(!temp_readonly(ots));
4971 
4972     itype = its->type;
4973     vece = TCGOP_VECE(op);
4974     vtype = TCGOP_TYPE(op);
4975 
4976     if (its->val_type == TEMP_VAL_CONST) {
4977         /* Propagate constant via movi -> dupi.  */
4978         tcg_target_ulong val = its->val;
4979         if (IS_DEAD_ARG(1)) {
4980             temp_dead(s, its);
4981         }
4982         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4983         return;
4984     }
4985 
4986     dup_args_ct = opcode_args_ct(op);
4987     dup_out_regs = dup_args_ct[0].regs;
4988     dup_in_regs = dup_args_ct[1].regs;
4989 
4990     /* Allocate the output register now.  */
4991     if (ots->val_type != TEMP_VAL_REG) {
4992         TCGRegSet allocated_regs = s->reserved_regs;
4993         TCGReg oreg;
4994 
4995         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4996             /* Make sure to not spill the input register. */
4997             tcg_regset_set_reg(allocated_regs, its->reg);
4998         }
4999         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5000                              output_pref(op, 0), ots->indirect_base);
5001         set_temp_val_reg(s, ots, oreg);
5002     }
5003 
5004     switch (its->val_type) {
5005     case TEMP_VAL_REG:
5006         /*
5007          * The dup constriaints must be broad, covering all possible VECE.
5008          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5009          * to fail, indicating that extra moves are required for that case.
5010          */
5011         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5012             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5013                 goto done;
5014             }
5015             /* Try again from memory or a vector input register.  */
5016         }
5017         if (!its->mem_coherent) {
5018             /*
5019              * The input register is not synced, and so an extra store
5020              * would be required to use memory.  Attempt an integer-vector
5021              * register move first.  We do not have a TCGRegSet for this.
5022              */
5023             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5024                 break;
5025             }
5026             /* Sync the temp back to its slot and load from there.  */
5027             temp_sync(s, its, s->reserved_regs, 0, 0);
5028         }
5029         /* fall through */
5030 
5031     case TEMP_VAL_MEM:
5032         lowpart_ofs = 0;
5033         if (HOST_BIG_ENDIAN) {
5034             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5035         }
5036         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5037                              its->mem_offset + lowpart_ofs)) {
5038             goto done;
5039         }
5040         /* Load the input into the destination vector register. */
5041         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5042         break;
5043 
5044     default:
5045         g_assert_not_reached();
5046     }
5047 
5048     /* We now have a vector input register, so dup must succeed. */
5049     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5050     tcg_debug_assert(ok);
5051 
5052  done:
5053     ots->mem_coherent = 0;
5054     if (IS_DEAD_ARG(1)) {
5055         temp_dead(s, its);
5056     }
5057     if (NEED_SYNC_ARG(0)) {
5058         temp_sync(s, ots, s->reserved_regs, 0, 0);
5059     }
5060     if (IS_DEAD_ARG(0)) {
5061         temp_dead(s, ots);
5062     }
5063 }
5064 
5065 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5066 {
5067     const TCGLifeData arg_life = op->life;
5068     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5069     TCGRegSet i_allocated_regs;
5070     TCGRegSet o_allocated_regs;
5071     int i, k, nb_iargs, nb_oargs;
5072     TCGReg reg;
5073     TCGArg arg;
5074     const TCGArgConstraint *args_ct;
5075     const TCGArgConstraint *arg_ct;
5076     TCGTemp *ts;
5077     TCGArg new_args[TCG_MAX_OP_ARGS];
5078     int const_args[TCG_MAX_OP_ARGS];
5079     TCGCond op_cond;
5080 
5081     nb_oargs = def->nb_oargs;
5082     nb_iargs = def->nb_iargs;
5083 
5084     /* copy constants */
5085     memcpy(new_args + nb_oargs + nb_iargs,
5086            op->args + nb_oargs + nb_iargs,
5087            sizeof(TCGArg) * def->nb_cargs);
5088 
5089     i_allocated_regs = s->reserved_regs;
5090     o_allocated_regs = s->reserved_regs;
5091 
5092     switch (op->opc) {
5093     case INDEX_op_brcond_i32:
5094     case INDEX_op_brcond_i64:
5095         op_cond = op->args[2];
5096         break;
5097     case INDEX_op_setcond_i32:
5098     case INDEX_op_setcond_i64:
5099     case INDEX_op_negsetcond_i32:
5100     case INDEX_op_negsetcond_i64:
5101     case INDEX_op_cmp_vec:
5102         op_cond = op->args[3];
5103         break;
5104     case INDEX_op_brcond2_i32:
5105         op_cond = op->args[4];
5106         break;
5107     case INDEX_op_movcond_i32:
5108     case INDEX_op_movcond_i64:
5109     case INDEX_op_setcond2_i32:
5110     case INDEX_op_cmpsel_vec:
5111         op_cond = op->args[5];
5112         break;
5113     default:
5114         /* No condition within opcode. */
5115         op_cond = TCG_COND_ALWAYS;
5116         break;
5117     }
5118 
5119     args_ct = opcode_args_ct(op);
5120 
5121     /* satisfy input constraints */
5122     for (k = 0; k < nb_iargs; k++) {
5123         TCGRegSet i_preferred_regs, i_required_regs;
5124         bool allocate_new_reg, copyto_new_reg;
5125         TCGTemp *ts2;
5126         int i1, i2;
5127 
5128         i = args_ct[nb_oargs + k].sort_index;
5129         arg = op->args[i];
5130         arg_ct = &args_ct[i];
5131         ts = arg_temp(arg);
5132 
5133         if (ts->val_type == TEMP_VAL_CONST) {
5134 #ifdef TCG_REG_ZERO
5135             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5136                 /* Hardware zero register: indicate register via non-const. */
5137                 const_args[i] = 0;
5138                 new_args[i] = TCG_REG_ZERO;
5139                 continue;
5140             }
5141 #endif
5142 
5143             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5144                                        op_cond, TCGOP_VECE(op))) {
5145                 /* constant is OK for instruction */
5146                 const_args[i] = 1;
5147                 new_args[i] = ts->val;
5148                 continue;
5149             }
5150         }
5151 
5152         reg = ts->reg;
5153         i_preferred_regs = 0;
5154         i_required_regs = arg_ct->regs;
5155         allocate_new_reg = false;
5156         copyto_new_reg = false;
5157 
5158         switch (arg_ct->pair) {
5159         case 0: /* not paired */
5160             if (arg_ct->ialias) {
5161                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5162 
5163                 /*
5164                  * If the input is readonly, then it cannot also be an
5165                  * output and aliased to itself.  If the input is not
5166                  * dead after the instruction, we must allocate a new
5167                  * register and move it.
5168                  */
5169                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5170                     || args_ct[arg_ct->alias_index].newreg) {
5171                     allocate_new_reg = true;
5172                 } else if (ts->val_type == TEMP_VAL_REG) {
5173                     /*
5174                      * Check if the current register has already been
5175                      * allocated for another input.
5176                      */
5177                     allocate_new_reg =
5178                         tcg_regset_test_reg(i_allocated_regs, reg);
5179                 }
5180             }
5181             if (!allocate_new_reg) {
5182                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5183                           i_preferred_regs);
5184                 reg = ts->reg;
5185                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5186             }
5187             if (allocate_new_reg) {
5188                 /*
5189                  * Allocate a new register matching the constraint
5190                  * and move the temporary register into it.
5191                  */
5192                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5193                           i_allocated_regs, 0);
5194                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5195                                     i_preferred_regs, ts->indirect_base);
5196                 copyto_new_reg = true;
5197             }
5198             break;
5199 
5200         case 1:
5201             /* First of an input pair; if i1 == i2, the second is an output. */
5202             i1 = i;
5203             i2 = arg_ct->pair_index;
5204             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5205 
5206             /*
5207              * It is easier to default to allocating a new pair
5208              * and to identify a few cases where it's not required.
5209              */
5210             if (arg_ct->ialias) {
5211                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5212                 if (IS_DEAD_ARG(i1) &&
5213                     IS_DEAD_ARG(i2) &&
5214                     !temp_readonly(ts) &&
5215                     ts->val_type == TEMP_VAL_REG &&
5216                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5217                     tcg_regset_test_reg(i_required_regs, reg) &&
5218                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5219                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5220                     (ts2
5221                      ? ts2->val_type == TEMP_VAL_REG &&
5222                        ts2->reg == reg + 1 &&
5223                        !temp_readonly(ts2)
5224                      : s->reg_to_temp[reg + 1] == NULL)) {
5225                     break;
5226                 }
5227             } else {
5228                 /* Without aliasing, the pair must also be an input. */
5229                 tcg_debug_assert(ts2);
5230                 if (ts->val_type == TEMP_VAL_REG &&
5231                     ts2->val_type == TEMP_VAL_REG &&
5232                     ts2->reg == reg + 1 &&
5233                     tcg_regset_test_reg(i_required_regs, reg)) {
5234                     break;
5235                 }
5236             }
5237             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5238                                      0, ts->indirect_base);
5239             goto do_pair;
5240 
5241         case 2: /* pair second */
5242             reg = new_args[arg_ct->pair_index] + 1;
5243             goto do_pair;
5244 
5245         case 3: /* ialias with second output, no first input */
5246             tcg_debug_assert(arg_ct->ialias);
5247             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5248 
5249             if (IS_DEAD_ARG(i) &&
5250                 !temp_readonly(ts) &&
5251                 ts->val_type == TEMP_VAL_REG &&
5252                 reg > 0 &&
5253                 s->reg_to_temp[reg - 1] == NULL &&
5254                 tcg_regset_test_reg(i_required_regs, reg) &&
5255                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5256                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5257                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5258                 break;
5259             }
5260             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5261                                      i_allocated_regs, 0,
5262                                      ts->indirect_base);
5263             tcg_regset_set_reg(i_allocated_regs, reg);
5264             reg += 1;
5265             goto do_pair;
5266 
5267         do_pair:
5268             /*
5269              * If an aliased input is not dead after the instruction,
5270              * we must allocate a new register and move it.
5271              */
5272             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5273                 TCGRegSet t_allocated_regs = i_allocated_regs;
5274 
5275                 /*
5276                  * Because of the alias, and the continued life, make sure
5277                  * that the temp is somewhere *other* than the reg pair,
5278                  * and we get a copy in reg.
5279                  */
5280                 tcg_regset_set_reg(t_allocated_regs, reg);
5281                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5282                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5283                     /* If ts was already in reg, copy it somewhere else. */
5284                     TCGReg nr;
5285                     bool ok;
5286 
5287                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5288                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5289                                        t_allocated_regs, 0, ts->indirect_base);
5290                     ok = tcg_out_mov(s, ts->type, nr, reg);
5291                     tcg_debug_assert(ok);
5292 
5293                     set_temp_val_reg(s, ts, nr);
5294                 } else {
5295                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5296                               t_allocated_regs, 0);
5297                     copyto_new_reg = true;
5298                 }
5299             } else {
5300                 /* Preferably allocate to reg, otherwise copy. */
5301                 i_required_regs = (TCGRegSet)1 << reg;
5302                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5303                           i_preferred_regs);
5304                 copyto_new_reg = ts->reg != reg;
5305             }
5306             break;
5307 
5308         default:
5309             g_assert_not_reached();
5310         }
5311 
5312         if (copyto_new_reg) {
5313             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5314                 /*
5315                  * Cross register class move not supported.  Sync the
5316                  * temp back to its slot and load from there.
5317                  */
5318                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5319                 tcg_out_ld(s, ts->type, reg,
5320                            ts->mem_base->reg, ts->mem_offset);
5321             }
5322         }
5323         new_args[i] = reg;
5324         const_args[i] = 0;
5325         tcg_regset_set_reg(i_allocated_regs, reg);
5326     }
5327 
5328     /* mark dead temporaries and free the associated registers */
5329     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5330         if (IS_DEAD_ARG(i)) {
5331             temp_dead(s, arg_temp(op->args[i]));
5332         }
5333     }
5334 
5335     if (def->flags & TCG_OPF_COND_BRANCH) {
5336         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5337     } else if (def->flags & TCG_OPF_BB_END) {
5338         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5339     } else {
5340         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5341             /* XXX: permit generic clobber register list ? */
5342             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5343                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5344                     tcg_reg_free(s, i, i_allocated_regs);
5345                 }
5346             }
5347         }
5348         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5349             /* sync globals if the op has side effects and might trigger
5350                an exception. */
5351             sync_globals(s, i_allocated_regs);
5352         }
5353 
5354         /* satisfy the output constraints */
5355         for (k = 0; k < nb_oargs; k++) {
5356             i = args_ct[k].sort_index;
5357             arg = op->args[i];
5358             arg_ct = &args_ct[i];
5359             ts = arg_temp(arg);
5360 
5361             /* ENV should not be modified.  */
5362             tcg_debug_assert(!temp_readonly(ts));
5363 
5364             switch (arg_ct->pair) {
5365             case 0: /* not paired */
5366                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5367                     reg = new_args[arg_ct->alias_index];
5368                 } else if (arg_ct->newreg) {
5369                     reg = tcg_reg_alloc(s, arg_ct->regs,
5370                                         i_allocated_regs | o_allocated_regs,
5371                                         output_pref(op, k), ts->indirect_base);
5372                 } else {
5373                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5374                                         output_pref(op, k), ts->indirect_base);
5375                 }
5376                 break;
5377 
5378             case 1: /* first of pair */
5379                 if (arg_ct->oalias) {
5380                     reg = new_args[arg_ct->alias_index];
5381                 } else if (arg_ct->newreg) {
5382                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5383                                              i_allocated_regs | o_allocated_regs,
5384                                              output_pref(op, k),
5385                                              ts->indirect_base);
5386                 } else {
5387                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5388                                              output_pref(op, k),
5389                                              ts->indirect_base);
5390                 }
5391                 break;
5392 
5393             case 2: /* second of pair */
5394                 if (arg_ct->oalias) {
5395                     reg = new_args[arg_ct->alias_index];
5396                 } else {
5397                     reg = new_args[arg_ct->pair_index] + 1;
5398                 }
5399                 break;
5400 
5401             case 3: /* first of pair, aliasing with a second input */
5402                 tcg_debug_assert(!arg_ct->newreg);
5403                 reg = new_args[arg_ct->pair_index] - 1;
5404                 break;
5405 
5406             default:
5407                 g_assert_not_reached();
5408             }
5409             tcg_regset_set_reg(o_allocated_regs, reg);
5410             set_temp_val_reg(s, ts, reg);
5411             ts->mem_coherent = 0;
5412             new_args[i] = reg;
5413         }
5414     }
5415 
5416     /* emit instruction */
5417     TCGType type = TCGOP_TYPE(op);
5418     switch (op->opc) {
5419     case INDEX_op_ext_i32_i64:
5420         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5421         break;
5422     case INDEX_op_extu_i32_i64:
5423         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5424         break;
5425     case INDEX_op_extrl_i64_i32:
5426         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5427         break;
5428 
5429     case INDEX_op_add:
5430     case INDEX_op_and:
5431     case INDEX_op_andc:
5432     case INDEX_op_eqv:
5433     case INDEX_op_nand:
5434     case INDEX_op_nor:
5435     case INDEX_op_or:
5436     case INDEX_op_orc:
5437     case INDEX_op_xor:
5438         {
5439             const TCGOutOpBinary *out =
5440                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5441 
5442             /* Constants should never appear in the first source operand. */
5443             tcg_debug_assert(!const_args[1]);
5444             if (const_args[2]) {
5445                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5446             } else {
5447                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5448             }
5449         }
5450         break;
5451 
5452     default:
5453         if (def->flags & TCG_OPF_VECTOR) {
5454             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5455                            TCGOP_VECE(op), new_args, const_args);
5456         } else {
5457             tcg_out_op(s, op->opc, type, new_args, const_args);
5458         }
5459         break;
5460     }
5461 
5462     /* move the outputs in the correct register if needed */
5463     for(i = 0; i < nb_oargs; i++) {
5464         ts = arg_temp(op->args[i]);
5465 
5466         /* ENV should not be modified.  */
5467         tcg_debug_assert(!temp_readonly(ts));
5468 
5469         if (NEED_SYNC_ARG(i)) {
5470             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5471         } else if (IS_DEAD_ARG(i)) {
5472             temp_dead(s, ts);
5473         }
5474     }
5475 }
5476 
5477 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5478 {
5479     const TCGLifeData arg_life = op->life;
5480     TCGTemp *ots, *itsl, *itsh;
5481     TCGType vtype = TCGOP_TYPE(op);
5482 
5483     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5484     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5485     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5486 
5487     ots = arg_temp(op->args[0]);
5488     itsl = arg_temp(op->args[1]);
5489     itsh = arg_temp(op->args[2]);
5490 
5491     /* ENV should not be modified.  */
5492     tcg_debug_assert(!temp_readonly(ots));
5493 
5494     /* Allocate the output register now.  */
5495     if (ots->val_type != TEMP_VAL_REG) {
5496         TCGRegSet allocated_regs = s->reserved_regs;
5497         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5498         TCGReg oreg;
5499 
5500         /* Make sure to not spill the input registers. */
5501         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5502             tcg_regset_set_reg(allocated_regs, itsl->reg);
5503         }
5504         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5505             tcg_regset_set_reg(allocated_regs, itsh->reg);
5506         }
5507 
5508         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5509                              output_pref(op, 0), ots->indirect_base);
5510         set_temp_val_reg(s, ots, oreg);
5511     }
5512 
5513     /* Promote dup2 of immediates to dupi_vec. */
5514     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5515         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5516         MemOp vece = MO_64;
5517 
5518         if (val == dup_const(MO_8, val)) {
5519             vece = MO_8;
5520         } else if (val == dup_const(MO_16, val)) {
5521             vece = MO_16;
5522         } else if (val == dup_const(MO_32, val)) {
5523             vece = MO_32;
5524         }
5525 
5526         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5527         goto done;
5528     }
5529 
5530     /* If the two inputs form one 64-bit value, try dupm_vec. */
5531     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5532         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5533         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5534         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5535 
5536         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5537         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5538 
5539         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5540                              its->mem_base->reg, its->mem_offset)) {
5541             goto done;
5542         }
5543     }
5544 
5545     /* Fall back to generic expansion. */
5546     return false;
5547 
5548  done:
5549     ots->mem_coherent = 0;
5550     if (IS_DEAD_ARG(1)) {
5551         temp_dead(s, itsl);
5552     }
5553     if (IS_DEAD_ARG(2)) {
5554         temp_dead(s, itsh);
5555     }
5556     if (NEED_SYNC_ARG(0)) {
5557         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5558     } else if (IS_DEAD_ARG(0)) {
5559         temp_dead(s, ots);
5560     }
5561     return true;
5562 }
5563 
5564 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5565                          TCGRegSet allocated_regs)
5566 {
5567     if (ts->val_type == TEMP_VAL_REG) {
5568         if (ts->reg != reg) {
5569             tcg_reg_free(s, reg, allocated_regs);
5570             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5571                 /*
5572                  * Cross register class move not supported.  Sync the
5573                  * temp back to its slot and load from there.
5574                  */
5575                 temp_sync(s, ts, allocated_regs, 0, 0);
5576                 tcg_out_ld(s, ts->type, reg,
5577                            ts->mem_base->reg, ts->mem_offset);
5578             }
5579         }
5580     } else {
5581         TCGRegSet arg_set = 0;
5582 
5583         tcg_reg_free(s, reg, allocated_regs);
5584         tcg_regset_set_reg(arg_set, reg);
5585         temp_load(s, ts, arg_set, allocated_regs, 0);
5586     }
5587 }
5588 
5589 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5590                          TCGRegSet allocated_regs)
5591 {
5592     /*
5593      * When the destination is on the stack, load up the temp and store.
5594      * If there are many call-saved registers, the temp might live to
5595      * see another use; otherwise it'll be discarded.
5596      */
5597     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5598     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5599                arg_slot_stk_ofs(arg_slot));
5600 }
5601 
5602 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5603                             TCGTemp *ts, TCGRegSet *allocated_regs)
5604 {
5605     if (arg_slot_reg_p(l->arg_slot)) {
5606         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5607         load_arg_reg(s, reg, ts, *allocated_regs);
5608         tcg_regset_set_reg(*allocated_regs, reg);
5609     } else {
5610         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5611     }
5612 }
5613 
5614 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5615                          intptr_t ref_off, TCGRegSet *allocated_regs)
5616 {
5617     TCGReg reg;
5618 
5619     if (arg_slot_reg_p(arg_slot)) {
5620         reg = tcg_target_call_iarg_regs[arg_slot];
5621         tcg_reg_free(s, reg, *allocated_regs);
5622         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5623         tcg_regset_set_reg(*allocated_regs, reg);
5624     } else {
5625         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5626                             *allocated_regs, 0, false);
5627         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5628         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5629                    arg_slot_stk_ofs(arg_slot));
5630     }
5631 }
5632 
5633 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5634 {
5635     const int nb_oargs = TCGOP_CALLO(op);
5636     const int nb_iargs = TCGOP_CALLI(op);
5637     const TCGLifeData arg_life = op->life;
5638     const TCGHelperInfo *info = tcg_call_info(op);
5639     TCGRegSet allocated_regs = s->reserved_regs;
5640     int i;
5641 
5642     /*
5643      * Move inputs into place in reverse order,
5644      * so that we place stacked arguments first.
5645      */
5646     for (i = nb_iargs - 1; i >= 0; --i) {
5647         const TCGCallArgumentLoc *loc = &info->in[i];
5648         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5649 
5650         switch (loc->kind) {
5651         case TCG_CALL_ARG_NORMAL:
5652         case TCG_CALL_ARG_EXTEND_U:
5653         case TCG_CALL_ARG_EXTEND_S:
5654             load_arg_normal(s, loc, ts, &allocated_regs);
5655             break;
5656         case TCG_CALL_ARG_BY_REF:
5657             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5658             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5659                          arg_slot_stk_ofs(loc->ref_slot),
5660                          &allocated_regs);
5661             break;
5662         case TCG_CALL_ARG_BY_REF_N:
5663             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5664             break;
5665         default:
5666             g_assert_not_reached();
5667         }
5668     }
5669 
5670     /* Mark dead temporaries and free the associated registers.  */
5671     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5672         if (IS_DEAD_ARG(i)) {
5673             temp_dead(s, arg_temp(op->args[i]));
5674         }
5675     }
5676 
5677     /* Clobber call registers.  */
5678     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5679         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5680             tcg_reg_free(s, i, allocated_regs);
5681         }
5682     }
5683 
5684     /*
5685      * Save globals if they might be written by the helper,
5686      * sync them if they might be read.
5687      */
5688     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5689         /* Nothing to do */
5690     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5691         sync_globals(s, allocated_regs);
5692     } else {
5693         save_globals(s, allocated_regs);
5694     }
5695 
5696     /*
5697      * If the ABI passes a pointer to the returned struct as the first
5698      * argument, load that now.  Pass a pointer to the output home slot.
5699      */
5700     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5701         TCGTemp *ts = arg_temp(op->args[0]);
5702 
5703         if (!ts->mem_allocated) {
5704             temp_allocate_frame(s, ts);
5705         }
5706         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5707     }
5708 
5709     tcg_out_call(s, tcg_call_func(op), info);
5710 
5711     /* Assign output registers and emit moves if needed.  */
5712     switch (info->out_kind) {
5713     case TCG_CALL_RET_NORMAL:
5714         for (i = 0; i < nb_oargs; i++) {
5715             TCGTemp *ts = arg_temp(op->args[i]);
5716             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5717 
5718             /* ENV should not be modified.  */
5719             tcg_debug_assert(!temp_readonly(ts));
5720 
5721             set_temp_val_reg(s, ts, reg);
5722             ts->mem_coherent = 0;
5723         }
5724         break;
5725 
5726     case TCG_CALL_RET_BY_VEC:
5727         {
5728             TCGTemp *ts = arg_temp(op->args[0]);
5729 
5730             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5731             tcg_debug_assert(ts->temp_subindex == 0);
5732             if (!ts->mem_allocated) {
5733                 temp_allocate_frame(s, ts);
5734             }
5735             tcg_out_st(s, TCG_TYPE_V128,
5736                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5737                        ts->mem_base->reg, ts->mem_offset);
5738         }
5739         /* fall through to mark all parts in memory */
5740 
5741     case TCG_CALL_RET_BY_REF:
5742         /* The callee has performed a write through the reference. */
5743         for (i = 0; i < nb_oargs; i++) {
5744             TCGTemp *ts = arg_temp(op->args[i]);
5745             ts->val_type = TEMP_VAL_MEM;
5746         }
5747         break;
5748 
5749     default:
5750         g_assert_not_reached();
5751     }
5752 
5753     /* Flush or discard output registers as needed. */
5754     for (i = 0; i < nb_oargs; i++) {
5755         TCGTemp *ts = arg_temp(op->args[i]);
5756         if (NEED_SYNC_ARG(i)) {
5757             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5758         } else if (IS_DEAD_ARG(i)) {
5759             temp_dead(s, ts);
5760         }
5761     }
5762 }
5763 
5764 /**
5765  * atom_and_align_for_opc:
5766  * @s: tcg context
5767  * @opc: memory operation code
5768  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5769  * @allow_two_ops: true if we are prepared to issue two operations
5770  *
5771  * Return the alignment and atomicity to use for the inline fast path
5772  * for the given memory operation.  The alignment may be larger than
5773  * that specified in @opc, and the correct alignment will be diagnosed
5774  * by the slow path helper.
5775  *
5776  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5777  * and issue two loads or stores for subalignment.
5778  */
5779 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5780                                            MemOp host_atom, bool allow_two_ops)
5781 {
5782     MemOp align = memop_alignment_bits(opc);
5783     MemOp size = opc & MO_SIZE;
5784     MemOp half = size ? size - 1 : 0;
5785     MemOp atom = opc & MO_ATOM_MASK;
5786     MemOp atmax;
5787 
5788     switch (atom) {
5789     case MO_ATOM_NONE:
5790         /* The operation requires no specific atomicity. */
5791         atmax = MO_8;
5792         break;
5793 
5794     case MO_ATOM_IFALIGN:
5795         atmax = size;
5796         break;
5797 
5798     case MO_ATOM_IFALIGN_PAIR:
5799         atmax = half;
5800         break;
5801 
5802     case MO_ATOM_WITHIN16:
5803         atmax = size;
5804         if (size == MO_128) {
5805             /* Misalignment implies !within16, and therefore no atomicity. */
5806         } else if (host_atom != MO_ATOM_WITHIN16) {
5807             /* The host does not implement within16, so require alignment. */
5808             align = MAX(align, size);
5809         }
5810         break;
5811 
5812     case MO_ATOM_WITHIN16_PAIR:
5813         atmax = size;
5814         /*
5815          * Misalignment implies !within16, and therefore half atomicity.
5816          * Any host prepared for two operations can implement this with
5817          * half alignment.
5818          */
5819         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5820             align = MAX(align, half);
5821         }
5822         break;
5823 
5824     case MO_ATOM_SUBALIGN:
5825         atmax = size;
5826         if (host_atom != MO_ATOM_SUBALIGN) {
5827             /* If unaligned but not odd, there are subobjects up to half. */
5828             if (allow_two_ops) {
5829                 align = MAX(align, half);
5830             } else {
5831                 align = MAX(align, size);
5832             }
5833         }
5834         break;
5835 
5836     default:
5837         g_assert_not_reached();
5838     }
5839 
5840     return (TCGAtomAlign){ .atom = atmax, .align = align };
5841 }
5842 
5843 /*
5844  * Similarly for qemu_ld/st slow path helpers.
5845  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5846  * using only the provided backend tcg_out_* functions.
5847  */
5848 
5849 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5850 {
5851     int ofs = arg_slot_stk_ofs(slot);
5852 
5853     /*
5854      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5855      * require extension to uint64_t, adjust the address for uint32_t.
5856      */
5857     if (HOST_BIG_ENDIAN &&
5858         TCG_TARGET_REG_BITS == 64 &&
5859         type == TCG_TYPE_I32) {
5860         ofs += 4;
5861     }
5862     return ofs;
5863 }
5864 
5865 static void tcg_out_helper_load_slots(TCGContext *s,
5866                                       unsigned nmov, TCGMovExtend *mov,
5867                                       const TCGLdstHelperParam *parm)
5868 {
5869     unsigned i;
5870     TCGReg dst3;
5871 
5872     /*
5873      * Start from the end, storing to the stack first.
5874      * This frees those registers, so we need not consider overlap.
5875      */
5876     for (i = nmov; i-- > 0; ) {
5877         unsigned slot = mov[i].dst;
5878 
5879         if (arg_slot_reg_p(slot)) {
5880             goto found_reg;
5881         }
5882 
5883         TCGReg src = mov[i].src;
5884         TCGType dst_type = mov[i].dst_type;
5885         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5886 
5887         /* The argument is going onto the stack; extend into scratch. */
5888         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5889             tcg_debug_assert(parm->ntmp != 0);
5890             mov[i].dst = src = parm->tmp[0];
5891             tcg_out_movext1(s, &mov[i]);
5892         }
5893 
5894         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5895                    tcg_out_helper_stk_ofs(dst_type, slot));
5896     }
5897     return;
5898 
5899  found_reg:
5900     /*
5901      * The remaining arguments are in registers.
5902      * Convert slot numbers to argument registers.
5903      */
5904     nmov = i + 1;
5905     for (i = 0; i < nmov; ++i) {
5906         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5907     }
5908 
5909     switch (nmov) {
5910     case 4:
5911         /* The backend must have provided enough temps for the worst case. */
5912         tcg_debug_assert(parm->ntmp >= 2);
5913 
5914         dst3 = mov[3].dst;
5915         for (unsigned j = 0; j < 3; ++j) {
5916             if (dst3 == mov[j].src) {
5917                 /*
5918                  * Conflict. Copy the source to a temporary, perform the
5919                  * remaining moves, then the extension from our scratch
5920                  * on the way out.
5921                  */
5922                 TCGReg scratch = parm->tmp[1];
5923 
5924                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5925                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5926                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5927                 break;
5928             }
5929         }
5930 
5931         /* No conflicts: perform this move and continue. */
5932         tcg_out_movext1(s, &mov[3]);
5933         /* fall through */
5934 
5935     case 3:
5936         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5937                         parm->ntmp ? parm->tmp[0] : -1);
5938         break;
5939     case 2:
5940         tcg_out_movext2(s, mov, mov + 1,
5941                         parm->ntmp ? parm->tmp[0] : -1);
5942         break;
5943     case 1:
5944         tcg_out_movext1(s, mov);
5945         break;
5946     default:
5947         g_assert_not_reached();
5948     }
5949 }
5950 
5951 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5952                                     TCGType type, tcg_target_long imm,
5953                                     const TCGLdstHelperParam *parm)
5954 {
5955     if (arg_slot_reg_p(slot)) {
5956         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5957     } else {
5958         int ofs = tcg_out_helper_stk_ofs(type, slot);
5959         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5960             tcg_debug_assert(parm->ntmp != 0);
5961             tcg_out_movi(s, type, parm->tmp[0], imm);
5962             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5963         }
5964     }
5965 }
5966 
5967 static void tcg_out_helper_load_common_args(TCGContext *s,
5968                                             const TCGLabelQemuLdst *ldst,
5969                                             const TCGLdstHelperParam *parm,
5970                                             const TCGHelperInfo *info,
5971                                             unsigned next_arg)
5972 {
5973     TCGMovExtend ptr_mov = {
5974         .dst_type = TCG_TYPE_PTR,
5975         .src_type = TCG_TYPE_PTR,
5976         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5977     };
5978     const TCGCallArgumentLoc *loc = &info->in[0];
5979     TCGType type;
5980     unsigned slot;
5981     tcg_target_ulong imm;
5982 
5983     /*
5984      * Handle env, which is always first.
5985      */
5986     ptr_mov.dst = loc->arg_slot;
5987     ptr_mov.src = TCG_AREG0;
5988     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5989 
5990     /*
5991      * Handle oi.
5992      */
5993     imm = ldst->oi;
5994     loc = &info->in[next_arg];
5995     type = TCG_TYPE_I32;
5996     switch (loc->kind) {
5997     case TCG_CALL_ARG_NORMAL:
5998         break;
5999     case TCG_CALL_ARG_EXTEND_U:
6000     case TCG_CALL_ARG_EXTEND_S:
6001         /* No extension required for MemOpIdx. */
6002         tcg_debug_assert(imm <= INT32_MAX);
6003         type = TCG_TYPE_REG;
6004         break;
6005     default:
6006         g_assert_not_reached();
6007     }
6008     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6009     next_arg++;
6010 
6011     /*
6012      * Handle ra.
6013      */
6014     loc = &info->in[next_arg];
6015     slot = loc->arg_slot;
6016     if (parm->ra_gen) {
6017         int arg_reg = -1;
6018         TCGReg ra_reg;
6019 
6020         if (arg_slot_reg_p(slot)) {
6021             arg_reg = tcg_target_call_iarg_regs[slot];
6022         }
6023         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6024 
6025         ptr_mov.dst = slot;
6026         ptr_mov.src = ra_reg;
6027         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6028     } else {
6029         imm = (uintptr_t)ldst->raddr;
6030         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6031     }
6032 }
6033 
6034 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6035                                        const TCGCallArgumentLoc *loc,
6036                                        TCGType dst_type, TCGType src_type,
6037                                        TCGReg lo, TCGReg hi)
6038 {
6039     MemOp reg_mo;
6040 
6041     if (dst_type <= TCG_TYPE_REG) {
6042         MemOp src_ext;
6043 
6044         switch (loc->kind) {
6045         case TCG_CALL_ARG_NORMAL:
6046             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6047             break;
6048         case TCG_CALL_ARG_EXTEND_U:
6049             dst_type = TCG_TYPE_REG;
6050             src_ext = MO_UL;
6051             break;
6052         case TCG_CALL_ARG_EXTEND_S:
6053             dst_type = TCG_TYPE_REG;
6054             src_ext = MO_SL;
6055             break;
6056         default:
6057             g_assert_not_reached();
6058         }
6059 
6060         mov[0].dst = loc->arg_slot;
6061         mov[0].dst_type = dst_type;
6062         mov[0].src = lo;
6063         mov[0].src_type = src_type;
6064         mov[0].src_ext = src_ext;
6065         return 1;
6066     }
6067 
6068     if (TCG_TARGET_REG_BITS == 32) {
6069         assert(dst_type == TCG_TYPE_I64);
6070         reg_mo = MO_32;
6071     } else {
6072         assert(dst_type == TCG_TYPE_I128);
6073         reg_mo = MO_64;
6074     }
6075 
6076     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6077     mov[0].src = lo;
6078     mov[0].dst_type = TCG_TYPE_REG;
6079     mov[0].src_type = TCG_TYPE_REG;
6080     mov[0].src_ext = reg_mo;
6081 
6082     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6083     mov[1].src = hi;
6084     mov[1].dst_type = TCG_TYPE_REG;
6085     mov[1].src_type = TCG_TYPE_REG;
6086     mov[1].src_ext = reg_mo;
6087 
6088     return 2;
6089 }
6090 
6091 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6092                                    const TCGLdstHelperParam *parm)
6093 {
6094     const TCGHelperInfo *info;
6095     const TCGCallArgumentLoc *loc;
6096     TCGMovExtend mov[2];
6097     unsigned next_arg, nmov;
6098     MemOp mop = get_memop(ldst->oi);
6099 
6100     switch (mop & MO_SIZE) {
6101     case MO_8:
6102     case MO_16:
6103     case MO_32:
6104         info = &info_helper_ld32_mmu;
6105         break;
6106     case MO_64:
6107         info = &info_helper_ld64_mmu;
6108         break;
6109     case MO_128:
6110         info = &info_helper_ld128_mmu;
6111         break;
6112     default:
6113         g_assert_not_reached();
6114     }
6115 
6116     /* Defer env argument. */
6117     next_arg = 1;
6118 
6119     loc = &info->in[next_arg];
6120     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6121         /*
6122          * 32-bit host with 32-bit guest: zero-extend the guest address
6123          * to 64-bits for the helper by storing the low part, then
6124          * load a zero for the high part.
6125          */
6126         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6127                                TCG_TYPE_I32, TCG_TYPE_I32,
6128                                ldst->addr_reg, -1);
6129         tcg_out_helper_load_slots(s, 1, mov, parm);
6130 
6131         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6132                                 TCG_TYPE_I32, 0, parm);
6133         next_arg += 2;
6134     } else {
6135         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6136                                       ldst->addr_reg, -1);
6137         tcg_out_helper_load_slots(s, nmov, mov, parm);
6138         next_arg += nmov;
6139     }
6140 
6141     switch (info->out_kind) {
6142     case TCG_CALL_RET_NORMAL:
6143     case TCG_CALL_RET_BY_VEC:
6144         break;
6145     case TCG_CALL_RET_BY_REF:
6146         /*
6147          * The return reference is in the first argument slot.
6148          * We need memory in which to return: re-use the top of stack.
6149          */
6150         {
6151             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6152 
6153             if (arg_slot_reg_p(0)) {
6154                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6155                                  TCG_REG_CALL_STACK, ofs_slot0);
6156             } else {
6157                 tcg_debug_assert(parm->ntmp != 0);
6158                 tcg_out_addi_ptr(s, parm->tmp[0],
6159                                  TCG_REG_CALL_STACK, ofs_slot0);
6160                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6161                            TCG_REG_CALL_STACK, ofs_slot0);
6162             }
6163         }
6164         break;
6165     default:
6166         g_assert_not_reached();
6167     }
6168 
6169     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6170 }
6171 
6172 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6173                                   bool load_sign,
6174                                   const TCGLdstHelperParam *parm)
6175 {
6176     MemOp mop = get_memop(ldst->oi);
6177     TCGMovExtend mov[2];
6178     int ofs_slot0;
6179 
6180     switch (ldst->type) {
6181     case TCG_TYPE_I64:
6182         if (TCG_TARGET_REG_BITS == 32) {
6183             break;
6184         }
6185         /* fall through */
6186 
6187     case TCG_TYPE_I32:
6188         mov[0].dst = ldst->datalo_reg;
6189         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6190         mov[0].dst_type = ldst->type;
6191         mov[0].src_type = TCG_TYPE_REG;
6192 
6193         /*
6194          * If load_sign, then we allowed the helper to perform the
6195          * appropriate sign extension to tcg_target_ulong, and all
6196          * we need now is a plain move.
6197          *
6198          * If they do not, then we expect the relevant extension
6199          * instruction to be no more expensive than a move, and
6200          * we thus save the icache etc by only using one of two
6201          * helper functions.
6202          */
6203         if (load_sign || !(mop & MO_SIGN)) {
6204             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6205                 mov[0].src_ext = MO_32;
6206             } else {
6207                 mov[0].src_ext = MO_64;
6208             }
6209         } else {
6210             mov[0].src_ext = mop & MO_SSIZE;
6211         }
6212         tcg_out_movext1(s, mov);
6213         return;
6214 
6215     case TCG_TYPE_I128:
6216         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6217         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6218         switch (TCG_TARGET_CALL_RET_I128) {
6219         case TCG_CALL_RET_NORMAL:
6220             break;
6221         case TCG_CALL_RET_BY_VEC:
6222             tcg_out_st(s, TCG_TYPE_V128,
6223                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6224                        TCG_REG_CALL_STACK, ofs_slot0);
6225             /* fall through */
6226         case TCG_CALL_RET_BY_REF:
6227             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6228                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6229             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6230                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6231             return;
6232         default:
6233             g_assert_not_reached();
6234         }
6235         break;
6236 
6237     default:
6238         g_assert_not_reached();
6239     }
6240 
6241     mov[0].dst = ldst->datalo_reg;
6242     mov[0].src =
6243         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6244     mov[0].dst_type = TCG_TYPE_REG;
6245     mov[0].src_type = TCG_TYPE_REG;
6246     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6247 
6248     mov[1].dst = ldst->datahi_reg;
6249     mov[1].src =
6250         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6251     mov[1].dst_type = TCG_TYPE_REG;
6252     mov[1].src_type = TCG_TYPE_REG;
6253     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6254 
6255     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6256 }
6257 
6258 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6259                                    const TCGLdstHelperParam *parm)
6260 {
6261     const TCGHelperInfo *info;
6262     const TCGCallArgumentLoc *loc;
6263     TCGMovExtend mov[4];
6264     TCGType data_type;
6265     unsigned next_arg, nmov, n;
6266     MemOp mop = get_memop(ldst->oi);
6267 
6268     switch (mop & MO_SIZE) {
6269     case MO_8:
6270     case MO_16:
6271     case MO_32:
6272         info = &info_helper_st32_mmu;
6273         data_type = TCG_TYPE_I32;
6274         break;
6275     case MO_64:
6276         info = &info_helper_st64_mmu;
6277         data_type = TCG_TYPE_I64;
6278         break;
6279     case MO_128:
6280         info = &info_helper_st128_mmu;
6281         data_type = TCG_TYPE_I128;
6282         break;
6283     default:
6284         g_assert_not_reached();
6285     }
6286 
6287     /* Defer env argument. */
6288     next_arg = 1;
6289     nmov = 0;
6290 
6291     /* Handle addr argument. */
6292     loc = &info->in[next_arg];
6293     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6294     if (TCG_TARGET_REG_BITS == 32) {
6295         /*
6296          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6297          * to 64-bits for the helper by storing the low part.  Later,
6298          * after we have processed the register inputs, we will load a
6299          * zero for the high part.
6300          */
6301         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6302                                TCG_TYPE_I32, TCG_TYPE_I32,
6303                                ldst->addr_reg, -1);
6304         next_arg += 2;
6305         nmov += 1;
6306     } else {
6307         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6308                                    ldst->addr_reg, -1);
6309         next_arg += n;
6310         nmov += n;
6311     }
6312 
6313     /* Handle data argument. */
6314     loc = &info->in[next_arg];
6315     switch (loc->kind) {
6316     case TCG_CALL_ARG_NORMAL:
6317     case TCG_CALL_ARG_EXTEND_U:
6318     case TCG_CALL_ARG_EXTEND_S:
6319         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6320                                    ldst->datalo_reg, ldst->datahi_reg);
6321         next_arg += n;
6322         nmov += n;
6323         tcg_out_helper_load_slots(s, nmov, mov, parm);
6324         break;
6325 
6326     case TCG_CALL_ARG_BY_REF:
6327         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6328         tcg_debug_assert(data_type == TCG_TYPE_I128);
6329         tcg_out_st(s, TCG_TYPE_I64,
6330                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6331                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6332         tcg_out_st(s, TCG_TYPE_I64,
6333                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6334                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6335 
6336         tcg_out_helper_load_slots(s, nmov, mov, parm);
6337 
6338         if (arg_slot_reg_p(loc->arg_slot)) {
6339             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6340                              TCG_REG_CALL_STACK,
6341                              arg_slot_stk_ofs(loc->ref_slot));
6342         } else {
6343             tcg_debug_assert(parm->ntmp != 0);
6344             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6345                              arg_slot_stk_ofs(loc->ref_slot));
6346             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6347                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6348         }
6349         next_arg += 2;
6350         break;
6351 
6352     default:
6353         g_assert_not_reached();
6354     }
6355 
6356     if (TCG_TARGET_REG_BITS == 32) {
6357         /* Zero extend the address by loading a zero for the high part. */
6358         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6359         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6360     }
6361 
6362     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6363 }
6364 
6365 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6366 {
6367     int i, start_words, num_insns;
6368     TCGOp *op;
6369 
6370     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6371                  && qemu_log_in_addr_range(pc_start))) {
6372         FILE *logfile = qemu_log_trylock();
6373         if (logfile) {
6374             fprintf(logfile, "OP:\n");
6375             tcg_dump_ops(s, logfile, false);
6376             fprintf(logfile, "\n");
6377             qemu_log_unlock(logfile);
6378         }
6379     }
6380 
6381 #ifdef CONFIG_DEBUG_TCG
6382     /* Ensure all labels referenced have been emitted.  */
6383     {
6384         TCGLabel *l;
6385         bool error = false;
6386 
6387         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6388             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6389                 qemu_log_mask(CPU_LOG_TB_OP,
6390                               "$L%d referenced but not present.\n", l->id);
6391                 error = true;
6392             }
6393         }
6394         assert(!error);
6395     }
6396 #endif
6397 
6398     /* Do not reuse any EBB that may be allocated within the TB. */
6399     tcg_temp_ebb_reset_freed(s);
6400 
6401     tcg_optimize(s);
6402 
6403     reachable_code_pass(s);
6404     liveness_pass_0(s);
6405     liveness_pass_1(s);
6406 
6407     if (s->nb_indirects > 0) {
6408         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6409                      && qemu_log_in_addr_range(pc_start))) {
6410             FILE *logfile = qemu_log_trylock();
6411             if (logfile) {
6412                 fprintf(logfile, "OP before indirect lowering:\n");
6413                 tcg_dump_ops(s, logfile, false);
6414                 fprintf(logfile, "\n");
6415                 qemu_log_unlock(logfile);
6416             }
6417         }
6418 
6419         /* Replace indirect temps with direct temps.  */
6420         if (liveness_pass_2(s)) {
6421             /* If changes were made, re-run liveness.  */
6422             liveness_pass_1(s);
6423         }
6424     }
6425 
6426     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6427                  && qemu_log_in_addr_range(pc_start))) {
6428         FILE *logfile = qemu_log_trylock();
6429         if (logfile) {
6430             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6431             tcg_dump_ops(s, logfile, true);
6432             fprintf(logfile, "\n");
6433             qemu_log_unlock(logfile);
6434         }
6435     }
6436 
6437     /* Initialize goto_tb jump offsets. */
6438     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6439     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6440     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6441     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6442 
6443     tcg_reg_alloc_start(s);
6444 
6445     /*
6446      * Reset the buffer pointers when restarting after overflow.
6447      * TODO: Move this into translate-all.c with the rest of the
6448      * buffer management.  Having only this done here is confusing.
6449      */
6450     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6451     s->code_ptr = s->code_buf;
6452     s->data_gen_ptr = NULL;
6453 
6454     QSIMPLEQ_INIT(&s->ldst_labels);
6455     s->pool_labels = NULL;
6456 
6457     start_words = s->insn_start_words;
6458     s->gen_insn_data =
6459         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6460 
6461     tcg_out_tb_start(s);
6462 
6463     num_insns = -1;
6464     QTAILQ_FOREACH(op, &s->ops, link) {
6465         TCGOpcode opc = op->opc;
6466 
6467         switch (opc) {
6468         case INDEX_op_mov:
6469         case INDEX_op_mov_vec:
6470             tcg_reg_alloc_mov(s, op);
6471             break;
6472         case INDEX_op_dup_vec:
6473             tcg_reg_alloc_dup(s, op);
6474             break;
6475         case INDEX_op_insn_start:
6476             if (num_insns >= 0) {
6477                 size_t off = tcg_current_code_size(s);
6478                 s->gen_insn_end_off[num_insns] = off;
6479                 /* Assert that we do not overflow our stored offset.  */
6480                 assert(s->gen_insn_end_off[num_insns] == off);
6481             }
6482             num_insns++;
6483             for (i = 0; i < start_words; ++i) {
6484                 s->gen_insn_data[num_insns * start_words + i] =
6485                     tcg_get_insn_start_param(op, i);
6486             }
6487             break;
6488         case INDEX_op_discard:
6489             temp_dead(s, arg_temp(op->args[0]));
6490             break;
6491         case INDEX_op_set_label:
6492             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6493             tcg_out_label(s, arg_label(op->args[0]));
6494             break;
6495         case INDEX_op_call:
6496             tcg_reg_alloc_call(s, op);
6497             break;
6498         case INDEX_op_exit_tb:
6499             tcg_out_exit_tb(s, op->args[0]);
6500             break;
6501         case INDEX_op_goto_tb:
6502             tcg_out_goto_tb(s, op->args[0]);
6503             break;
6504         case INDEX_op_dup2_vec:
6505             if (tcg_reg_alloc_dup2(s, op)) {
6506                 break;
6507             }
6508             /* fall through */
6509         default:
6510             /* Sanity check that we've not introduced any unhandled opcodes. */
6511             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6512                                               TCGOP_FLAGS(op)));
6513             /* Note: in order to speed up the code, it would be much
6514                faster to have specialized register allocator functions for
6515                some common argument patterns */
6516             tcg_reg_alloc_op(s, op);
6517             break;
6518         }
6519         /* Test for (pending) buffer overflow.  The assumption is that any
6520            one operation beginning below the high water mark cannot overrun
6521            the buffer completely.  Thus we can test for overflow after
6522            generating code without having to check during generation.  */
6523         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6524             return -1;
6525         }
6526         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6527         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6528             return -2;
6529         }
6530     }
6531     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6532     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6533 
6534     /* Generate TB finalization at the end of block */
6535     i = tcg_out_ldst_finalize(s);
6536     if (i < 0) {
6537         return i;
6538     }
6539     i = tcg_out_pool_finalize(s);
6540     if (i < 0) {
6541         return i;
6542     }
6543     if (!tcg_resolve_relocs(s)) {
6544         return -2;
6545     }
6546 
6547 #ifndef CONFIG_TCG_INTERPRETER
6548     /* flush instruction cache */
6549     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6550                         (uintptr_t)s->code_buf,
6551                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6552 #endif
6553 
6554     return tcg_current_code_size(s);
6555 }
6556 
6557 #ifdef ELF_HOST_MACHINE
6558 /* In order to use this feature, the backend needs to do three things:
6559 
6560    (1) Define ELF_HOST_MACHINE to indicate both what value to
6561        put into the ELF image and to indicate support for the feature.
6562 
6563    (2) Define tcg_register_jit.  This should create a buffer containing
6564        the contents of a .debug_frame section that describes the post-
6565        prologue unwind info for the tcg machine.
6566 
6567    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6568 */
6569 
6570 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6571 typedef enum {
6572     JIT_NOACTION = 0,
6573     JIT_REGISTER_FN,
6574     JIT_UNREGISTER_FN
6575 } jit_actions_t;
6576 
6577 struct jit_code_entry {
6578     struct jit_code_entry *next_entry;
6579     struct jit_code_entry *prev_entry;
6580     const void *symfile_addr;
6581     uint64_t symfile_size;
6582 };
6583 
6584 struct jit_descriptor {
6585     uint32_t version;
6586     uint32_t action_flag;
6587     struct jit_code_entry *relevant_entry;
6588     struct jit_code_entry *first_entry;
6589 };
6590 
6591 void __jit_debug_register_code(void) __attribute__((noinline));
6592 void __jit_debug_register_code(void)
6593 {
6594     asm("");
6595 }
6596 
6597 /* Must statically initialize the version, because GDB may check
6598    the version before we can set it.  */
6599 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6600 
6601 /* End GDB interface.  */
6602 
6603 static int find_string(const char *strtab, const char *str)
6604 {
6605     const char *p = strtab + 1;
6606 
6607     while (1) {
6608         if (strcmp(p, str) == 0) {
6609             return p - strtab;
6610         }
6611         p += strlen(p) + 1;
6612     }
6613 }
6614 
6615 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6616                                  const void *debug_frame,
6617                                  size_t debug_frame_size)
6618 {
6619     struct __attribute__((packed)) DebugInfo {
6620         uint32_t  len;
6621         uint16_t  version;
6622         uint32_t  abbrev;
6623         uint8_t   ptr_size;
6624         uint8_t   cu_die;
6625         uint16_t  cu_lang;
6626         uintptr_t cu_low_pc;
6627         uintptr_t cu_high_pc;
6628         uint8_t   fn_die;
6629         char      fn_name[16];
6630         uintptr_t fn_low_pc;
6631         uintptr_t fn_high_pc;
6632         uint8_t   cu_eoc;
6633     };
6634 
6635     struct ElfImage {
6636         ElfW(Ehdr) ehdr;
6637         ElfW(Phdr) phdr;
6638         ElfW(Shdr) shdr[7];
6639         ElfW(Sym)  sym[2];
6640         struct DebugInfo di;
6641         uint8_t    da[24];
6642         char       str[80];
6643     };
6644 
6645     struct ElfImage *img;
6646 
6647     static const struct ElfImage img_template = {
6648         .ehdr = {
6649             .e_ident[EI_MAG0] = ELFMAG0,
6650             .e_ident[EI_MAG1] = ELFMAG1,
6651             .e_ident[EI_MAG2] = ELFMAG2,
6652             .e_ident[EI_MAG3] = ELFMAG3,
6653             .e_ident[EI_CLASS] = ELF_CLASS,
6654             .e_ident[EI_DATA] = ELF_DATA,
6655             .e_ident[EI_VERSION] = EV_CURRENT,
6656             .e_type = ET_EXEC,
6657             .e_machine = ELF_HOST_MACHINE,
6658             .e_version = EV_CURRENT,
6659             .e_phoff = offsetof(struct ElfImage, phdr),
6660             .e_shoff = offsetof(struct ElfImage, shdr),
6661             .e_ehsize = sizeof(ElfW(Shdr)),
6662             .e_phentsize = sizeof(ElfW(Phdr)),
6663             .e_phnum = 1,
6664             .e_shentsize = sizeof(ElfW(Shdr)),
6665             .e_shnum = ARRAY_SIZE(img->shdr),
6666             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6667 #ifdef ELF_HOST_FLAGS
6668             .e_flags = ELF_HOST_FLAGS,
6669 #endif
6670 #ifdef ELF_OSABI
6671             .e_ident[EI_OSABI] = ELF_OSABI,
6672 #endif
6673         },
6674         .phdr = {
6675             .p_type = PT_LOAD,
6676             .p_flags = PF_X,
6677         },
6678         .shdr = {
6679             [0] = { .sh_type = SHT_NULL },
6680             /* Trick: The contents of code_gen_buffer are not present in
6681                this fake ELF file; that got allocated elsewhere.  Therefore
6682                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6683                will not look for contents.  We can record any address.  */
6684             [1] = { /* .text */
6685                 .sh_type = SHT_NOBITS,
6686                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6687             },
6688             [2] = { /* .debug_info */
6689                 .sh_type = SHT_PROGBITS,
6690                 .sh_offset = offsetof(struct ElfImage, di),
6691                 .sh_size = sizeof(struct DebugInfo),
6692             },
6693             [3] = { /* .debug_abbrev */
6694                 .sh_type = SHT_PROGBITS,
6695                 .sh_offset = offsetof(struct ElfImage, da),
6696                 .sh_size = sizeof(img->da),
6697             },
6698             [4] = { /* .debug_frame */
6699                 .sh_type = SHT_PROGBITS,
6700                 .sh_offset = sizeof(struct ElfImage),
6701             },
6702             [5] = { /* .symtab */
6703                 .sh_type = SHT_SYMTAB,
6704                 .sh_offset = offsetof(struct ElfImage, sym),
6705                 .sh_size = sizeof(img->sym),
6706                 .sh_info = 1,
6707                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6708                 .sh_entsize = sizeof(ElfW(Sym)),
6709             },
6710             [6] = { /* .strtab */
6711                 .sh_type = SHT_STRTAB,
6712                 .sh_offset = offsetof(struct ElfImage, str),
6713                 .sh_size = sizeof(img->str),
6714             }
6715         },
6716         .sym = {
6717             [1] = { /* code_gen_buffer */
6718                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6719                 .st_shndx = 1,
6720             }
6721         },
6722         .di = {
6723             .len = sizeof(struct DebugInfo) - 4,
6724             .version = 2,
6725             .ptr_size = sizeof(void *),
6726             .cu_die = 1,
6727             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6728             .fn_die = 2,
6729             .fn_name = "code_gen_buffer"
6730         },
6731         .da = {
6732             1,          /* abbrev number (the cu) */
6733             0x11, 1,    /* DW_TAG_compile_unit, has children */
6734             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6735             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6736             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6737             0, 0,       /* end of abbrev */
6738             2,          /* abbrev number (the fn) */
6739             0x2e, 0,    /* DW_TAG_subprogram, no children */
6740             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6741             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6742             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6743             0, 0,       /* end of abbrev */
6744             0           /* no more abbrev */
6745         },
6746         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6747                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6748     };
6749 
6750     /* We only need a single jit entry; statically allocate it.  */
6751     static struct jit_code_entry one_entry;
6752 
6753     uintptr_t buf = (uintptr_t)buf_ptr;
6754     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6755     DebugFrameHeader *dfh;
6756 
6757     img = g_malloc(img_size);
6758     *img = img_template;
6759 
6760     img->phdr.p_vaddr = buf;
6761     img->phdr.p_paddr = buf;
6762     img->phdr.p_memsz = buf_size;
6763 
6764     img->shdr[1].sh_name = find_string(img->str, ".text");
6765     img->shdr[1].sh_addr = buf;
6766     img->shdr[1].sh_size = buf_size;
6767 
6768     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6769     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6770 
6771     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6772     img->shdr[4].sh_size = debug_frame_size;
6773 
6774     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6775     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6776 
6777     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6778     img->sym[1].st_value = buf;
6779     img->sym[1].st_size = buf_size;
6780 
6781     img->di.cu_low_pc = buf;
6782     img->di.cu_high_pc = buf + buf_size;
6783     img->di.fn_low_pc = buf;
6784     img->di.fn_high_pc = buf + buf_size;
6785 
6786     dfh = (DebugFrameHeader *)(img + 1);
6787     memcpy(dfh, debug_frame, debug_frame_size);
6788     dfh->fde.func_start = buf;
6789     dfh->fde.func_len = buf_size;
6790 
6791 #ifdef DEBUG_JIT
6792     /* Enable this block to be able to debug the ELF image file creation.
6793        One can use readelf, objdump, or other inspection utilities.  */
6794     {
6795         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6796         FILE *f = fopen(jit, "w+b");
6797         if (f) {
6798             if (fwrite(img, img_size, 1, f) != img_size) {
6799                 /* Avoid stupid unused return value warning for fwrite.  */
6800             }
6801             fclose(f);
6802         }
6803     }
6804 #endif
6805 
6806     one_entry.symfile_addr = img;
6807     one_entry.symfile_size = img_size;
6808 
6809     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6810     __jit_debug_descriptor.relevant_entry = &one_entry;
6811     __jit_debug_descriptor.first_entry = &one_entry;
6812     __jit_debug_register_code();
6813 }
6814 #else
6815 /* No support for the feature.  Provide the entry point expected by exec.c,
6816    and implement the internal function we declared earlier.  */
6817 
6818 static void tcg_register_jit_int(const void *buf, size_t size,
6819                                  const void *debug_frame,
6820                                  size_t debug_frame_size)
6821 {
6822 }
6823 
6824 void tcg_register_jit(const void *buf, size_t buf_size)
6825 {
6826 }
6827 #endif /* ELF_HOST_MACHINE */
6828 
6829 #if !TCG_TARGET_MAYBE_vec
6830 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6831 {
6832     g_assert_not_reached();
6833 }
6834 #endif
6835