xref: /openbmc/qemu/tcg/tcg.c (revision 49bd751497f3b71550b152ef9da0e265a94a64c1)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 #include "tcg-target.c.inc"
990 
991 #ifndef CONFIG_TCG_INTERPRETER
992 /* Validate CPUTLBDescFast placement. */
993 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
994                         sizeof(CPUNegativeOffsetState))
995                   < MIN_TLB_MASK_TABLE_OFS);
996 #endif
997 
998 /*
999  * Register V as the TCGOutOp for O.
1000  * This verifies that V is of type T, otherwise give a nice compiler error.
1001  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1002  */
1003 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1004 
1005 /* Register allocation descriptions for every TCGOpcode. */
1006 static const TCGOutOp * const all_outop[NB_OPS] = {
1007     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1008     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1009     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1010     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1011 };
1012 
1013 #undef OUTOP
1014 
1015 /*
1016  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1017  * and registered the target's TCG globals) must register with this function
1018  * before initiating translation.
1019  *
1020  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1021  * of tcg_region_init() for the reasoning behind this.
1022  *
1023  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1024  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1025  * is not used anymore for translation once this function is called.
1026  *
1027  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1028  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1029  * modes.
1030  */
1031 #ifdef CONFIG_USER_ONLY
1032 void tcg_register_thread(void)
1033 {
1034     tcg_ctx = &tcg_init_ctx;
1035 }
1036 #else
1037 void tcg_register_thread(void)
1038 {
1039     TCGContext *s = g_malloc(sizeof(*s));
1040     unsigned int i, n;
1041 
1042     *s = tcg_init_ctx;
1043 
1044     /* Relink mem_base.  */
1045     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1046         if (tcg_init_ctx.temps[i].mem_base) {
1047             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1048             tcg_debug_assert(b >= 0 && b < n);
1049             s->temps[i].mem_base = &s->temps[b];
1050         }
1051     }
1052 
1053     /* Claim an entry in tcg_ctxs */
1054     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1055     g_assert(n < tcg_max_ctxs);
1056     qatomic_set(&tcg_ctxs[n], s);
1057 
1058     if (n > 0) {
1059         tcg_region_initial_alloc(s);
1060     }
1061 
1062     tcg_ctx = s;
1063 }
1064 #endif /* !CONFIG_USER_ONLY */
1065 
1066 /* pool based memory allocation */
1067 void *tcg_malloc_internal(TCGContext *s, int size)
1068 {
1069     TCGPool *p;
1070     int pool_size;
1071 
1072     if (size > TCG_POOL_CHUNK_SIZE) {
1073         /* big malloc: insert a new pool (XXX: could optimize) */
1074         p = g_malloc(sizeof(TCGPool) + size);
1075         p->size = size;
1076         p->next = s->pool_first_large;
1077         s->pool_first_large = p;
1078         return p->data;
1079     } else {
1080         p = s->pool_current;
1081         if (!p) {
1082             p = s->pool_first;
1083             if (!p)
1084                 goto new_pool;
1085         } else {
1086             if (!p->next) {
1087             new_pool:
1088                 pool_size = TCG_POOL_CHUNK_SIZE;
1089                 p = g_malloc(sizeof(TCGPool) + pool_size);
1090                 p->size = pool_size;
1091                 p->next = NULL;
1092                 if (s->pool_current) {
1093                     s->pool_current->next = p;
1094                 } else {
1095                     s->pool_first = p;
1096                 }
1097             } else {
1098                 p = p->next;
1099             }
1100         }
1101     }
1102     s->pool_current = p;
1103     s->pool_cur = p->data + size;
1104     s->pool_end = p->data + p->size;
1105     return p->data;
1106 }
1107 
1108 void tcg_pool_reset(TCGContext *s)
1109 {
1110     TCGPool *p, *t;
1111     for (p = s->pool_first_large; p; p = t) {
1112         t = p->next;
1113         g_free(p);
1114     }
1115     s->pool_first_large = NULL;
1116     s->pool_cur = s->pool_end = NULL;
1117     s->pool_current = NULL;
1118 }
1119 
1120 /*
1121  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1122  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1123  * We only use these for layout in tcg_out_ld_helper_ret and
1124  * tcg_out_st_helper_args, and share them between several of
1125  * the helpers, with the end result that it's easier to build manually.
1126  */
1127 
1128 #if TCG_TARGET_REG_BITS == 32
1129 # define dh_typecode_ttl  dh_typecode_i32
1130 #else
1131 # define dh_typecode_ttl  dh_typecode_i64
1132 #endif
1133 
1134 static TCGHelperInfo info_helper_ld32_mmu = {
1135     .flags = TCG_CALL_NO_WG,
1136     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1137               | dh_typemask(env, 1)
1138               | dh_typemask(i64, 2)  /* uint64_t addr */
1139               | dh_typemask(i32, 3)  /* unsigned oi */
1140               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1141 };
1142 
1143 static TCGHelperInfo info_helper_ld64_mmu = {
1144     .flags = TCG_CALL_NO_WG,
1145     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1146               | dh_typemask(env, 1)
1147               | dh_typemask(i64, 2)  /* uint64_t addr */
1148               | dh_typemask(i32, 3)  /* unsigned oi */
1149               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1150 };
1151 
1152 static TCGHelperInfo info_helper_ld128_mmu = {
1153     .flags = TCG_CALL_NO_WG,
1154     .typemask = dh_typemask(i128, 0) /* return Int128 */
1155               | dh_typemask(env, 1)
1156               | dh_typemask(i64, 2)  /* uint64_t addr */
1157               | dh_typemask(i32, 3)  /* unsigned oi */
1158               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1159 };
1160 
1161 static TCGHelperInfo info_helper_st32_mmu = {
1162     .flags = TCG_CALL_NO_WG,
1163     .typemask = dh_typemask(void, 0)
1164               | dh_typemask(env, 1)
1165               | dh_typemask(i64, 2)  /* uint64_t addr */
1166               | dh_typemask(i32, 3)  /* uint32_t data */
1167               | dh_typemask(i32, 4)  /* unsigned oi */
1168               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1169 };
1170 
1171 static TCGHelperInfo info_helper_st64_mmu = {
1172     .flags = TCG_CALL_NO_WG,
1173     .typemask = dh_typemask(void, 0)
1174               | dh_typemask(env, 1)
1175               | dh_typemask(i64, 2)  /* uint64_t addr */
1176               | dh_typemask(i64, 3)  /* uint64_t data */
1177               | dh_typemask(i32, 4)  /* unsigned oi */
1178               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1179 };
1180 
1181 static TCGHelperInfo info_helper_st128_mmu = {
1182     .flags = TCG_CALL_NO_WG,
1183     .typemask = dh_typemask(void, 0)
1184               | dh_typemask(env, 1)
1185               | dh_typemask(i64, 2)  /* uint64_t addr */
1186               | dh_typemask(i128, 3) /* Int128 data */
1187               | dh_typemask(i32, 4)  /* unsigned oi */
1188               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1189 };
1190 
1191 #ifdef CONFIG_TCG_INTERPRETER
1192 static ffi_type *typecode_to_ffi(int argmask)
1193 {
1194     /*
1195      * libffi does not support __int128_t, so we have forced Int128
1196      * to use the structure definition instead of the builtin type.
1197      */
1198     static ffi_type *ffi_type_i128_elements[3] = {
1199         &ffi_type_uint64,
1200         &ffi_type_uint64,
1201         NULL
1202     };
1203     static ffi_type ffi_type_i128 = {
1204         .size = 16,
1205         .alignment = __alignof__(Int128),
1206         .type = FFI_TYPE_STRUCT,
1207         .elements = ffi_type_i128_elements,
1208     };
1209 
1210     switch (argmask) {
1211     case dh_typecode_void:
1212         return &ffi_type_void;
1213     case dh_typecode_i32:
1214         return &ffi_type_uint32;
1215     case dh_typecode_s32:
1216         return &ffi_type_sint32;
1217     case dh_typecode_i64:
1218         return &ffi_type_uint64;
1219     case dh_typecode_s64:
1220         return &ffi_type_sint64;
1221     case dh_typecode_ptr:
1222         return &ffi_type_pointer;
1223     case dh_typecode_i128:
1224         return &ffi_type_i128;
1225     }
1226     g_assert_not_reached();
1227 }
1228 
1229 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1230 {
1231     unsigned typemask = info->typemask;
1232     struct {
1233         ffi_cif cif;
1234         ffi_type *args[];
1235     } *ca;
1236     ffi_status status;
1237     int nargs;
1238 
1239     /* Ignoring the return type, find the last non-zero field. */
1240     nargs = 32 - clz32(typemask >> 3);
1241     nargs = DIV_ROUND_UP(nargs, 3);
1242     assert(nargs <= MAX_CALL_IARGS);
1243 
1244     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1245     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1246     ca->cif.nargs = nargs;
1247 
1248     if (nargs != 0) {
1249         ca->cif.arg_types = ca->args;
1250         for (int j = 0; j < nargs; ++j) {
1251             int typecode = extract32(typemask, (j + 1) * 3, 3);
1252             ca->args[j] = typecode_to_ffi(typecode);
1253         }
1254     }
1255 
1256     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1257                           ca->cif.rtype, ca->cif.arg_types);
1258     assert(status == FFI_OK);
1259 
1260     return &ca->cif;
1261 }
1262 
1263 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1264 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1265 #else
1266 #define HELPER_INFO_INIT(I)      (&(I)->init)
1267 #define HELPER_INFO_INIT_VAL(I)  1
1268 #endif /* CONFIG_TCG_INTERPRETER */
1269 
1270 static inline bool arg_slot_reg_p(unsigned arg_slot)
1271 {
1272     /*
1273      * Split the sizeof away from the comparison to avoid Werror from
1274      * "unsigned < 0 is always false", when iarg_regs is empty.
1275      */
1276     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1277     return arg_slot < nreg;
1278 }
1279 
1280 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1281 {
1282     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1283     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1284 
1285     tcg_debug_assert(stk_slot < max);
1286     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1287 }
1288 
1289 typedef struct TCGCumulativeArgs {
1290     int arg_idx;                /* tcg_gen_callN args[] */
1291     int info_in_idx;            /* TCGHelperInfo in[] */
1292     int arg_slot;               /* regs+stack slot */
1293     int ref_slot;               /* stack slots for references */
1294 } TCGCumulativeArgs;
1295 
1296 static void layout_arg_even(TCGCumulativeArgs *cum)
1297 {
1298     cum->arg_slot += cum->arg_slot & 1;
1299 }
1300 
1301 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1302                          TCGCallArgumentKind kind)
1303 {
1304     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1305 
1306     *loc = (TCGCallArgumentLoc){
1307         .kind = kind,
1308         .arg_idx = cum->arg_idx,
1309         .arg_slot = cum->arg_slot,
1310     };
1311     cum->info_in_idx++;
1312     cum->arg_slot++;
1313 }
1314 
1315 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1316                                 TCGHelperInfo *info, int n)
1317 {
1318     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1319 
1320     for (int i = 0; i < n; ++i) {
1321         /* Layout all using the same arg_idx, adjusting the subindex. */
1322         loc[i] = (TCGCallArgumentLoc){
1323             .kind = TCG_CALL_ARG_NORMAL,
1324             .arg_idx = cum->arg_idx,
1325             .tmp_subindex = i,
1326             .arg_slot = cum->arg_slot + i,
1327         };
1328     }
1329     cum->info_in_idx += n;
1330     cum->arg_slot += n;
1331 }
1332 
1333 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1334 {
1335     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1336     int n = 128 / TCG_TARGET_REG_BITS;
1337 
1338     /* The first subindex carries the pointer. */
1339     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1340 
1341     /*
1342      * The callee is allowed to clobber memory associated with
1343      * structure pass by-reference.  Therefore we must make copies.
1344      * Allocate space from "ref_slot", which will be adjusted to
1345      * follow the parameters on the stack.
1346      */
1347     loc[0].ref_slot = cum->ref_slot;
1348 
1349     /*
1350      * Subsequent words also go into the reference slot, but
1351      * do not accumulate into the regular arguments.
1352      */
1353     for (int i = 1; i < n; ++i) {
1354         loc[i] = (TCGCallArgumentLoc){
1355             .kind = TCG_CALL_ARG_BY_REF_N,
1356             .arg_idx = cum->arg_idx,
1357             .tmp_subindex = i,
1358             .ref_slot = cum->ref_slot + i,
1359         };
1360     }
1361     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1362     cum->ref_slot += n;
1363 }
1364 
1365 static void init_call_layout(TCGHelperInfo *info)
1366 {
1367     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1368     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1369     unsigned typemask = info->typemask;
1370     unsigned typecode;
1371     TCGCumulativeArgs cum = { };
1372 
1373     /*
1374      * Parse and place any function return value.
1375      */
1376     typecode = typemask & 7;
1377     switch (typecode) {
1378     case dh_typecode_void:
1379         info->nr_out = 0;
1380         break;
1381     case dh_typecode_i32:
1382     case dh_typecode_s32:
1383     case dh_typecode_ptr:
1384         info->nr_out = 1;
1385         info->out_kind = TCG_CALL_RET_NORMAL;
1386         break;
1387     case dh_typecode_i64:
1388     case dh_typecode_s64:
1389         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1390         info->out_kind = TCG_CALL_RET_NORMAL;
1391         /* Query the last register now to trigger any assert early. */
1392         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1393         break;
1394     case dh_typecode_i128:
1395         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1396         info->out_kind = TCG_TARGET_CALL_RET_I128;
1397         switch (TCG_TARGET_CALL_RET_I128) {
1398         case TCG_CALL_RET_NORMAL:
1399             /* Query the last register now to trigger any assert early. */
1400             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1401             break;
1402         case TCG_CALL_RET_BY_VEC:
1403             /* Query the single register now to trigger any assert early. */
1404             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1405             break;
1406         case TCG_CALL_RET_BY_REF:
1407             /*
1408              * Allocate the first argument to the output.
1409              * We don't need to store this anywhere, just make it
1410              * unavailable for use in the input loop below.
1411              */
1412             cum.arg_slot = 1;
1413             break;
1414         default:
1415             qemu_build_not_reached();
1416         }
1417         break;
1418     default:
1419         g_assert_not_reached();
1420     }
1421 
1422     /*
1423      * Parse and place function arguments.
1424      */
1425     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1426         TCGCallArgumentKind kind;
1427         TCGType type;
1428 
1429         typecode = typemask & 7;
1430         switch (typecode) {
1431         case dh_typecode_i32:
1432         case dh_typecode_s32:
1433             type = TCG_TYPE_I32;
1434             break;
1435         case dh_typecode_i64:
1436         case dh_typecode_s64:
1437             type = TCG_TYPE_I64;
1438             break;
1439         case dh_typecode_ptr:
1440             type = TCG_TYPE_PTR;
1441             break;
1442         case dh_typecode_i128:
1443             type = TCG_TYPE_I128;
1444             break;
1445         default:
1446             g_assert_not_reached();
1447         }
1448 
1449         switch (type) {
1450         case TCG_TYPE_I32:
1451             switch (TCG_TARGET_CALL_ARG_I32) {
1452             case TCG_CALL_ARG_EVEN:
1453                 layout_arg_even(&cum);
1454                 /* fall through */
1455             case TCG_CALL_ARG_NORMAL:
1456                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1457                 break;
1458             case TCG_CALL_ARG_EXTEND:
1459                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1460                 layout_arg_1(&cum, info, kind);
1461                 break;
1462             default:
1463                 qemu_build_not_reached();
1464             }
1465             break;
1466 
1467         case TCG_TYPE_I64:
1468             switch (TCG_TARGET_CALL_ARG_I64) {
1469             case TCG_CALL_ARG_EVEN:
1470                 layout_arg_even(&cum);
1471                 /* fall through */
1472             case TCG_CALL_ARG_NORMAL:
1473                 if (TCG_TARGET_REG_BITS == 32) {
1474                     layout_arg_normal_n(&cum, info, 2);
1475                 } else {
1476                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1477                 }
1478                 break;
1479             default:
1480                 qemu_build_not_reached();
1481             }
1482             break;
1483 
1484         case TCG_TYPE_I128:
1485             switch (TCG_TARGET_CALL_ARG_I128) {
1486             case TCG_CALL_ARG_EVEN:
1487                 layout_arg_even(&cum);
1488                 /* fall through */
1489             case TCG_CALL_ARG_NORMAL:
1490                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1491                 break;
1492             case TCG_CALL_ARG_BY_REF:
1493                 layout_arg_by_ref(&cum, info);
1494                 break;
1495             default:
1496                 qemu_build_not_reached();
1497             }
1498             break;
1499 
1500         default:
1501             g_assert_not_reached();
1502         }
1503     }
1504     info->nr_in = cum.info_in_idx;
1505 
1506     /* Validate that we didn't overrun the input array. */
1507     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1508     /* Validate the backend has enough argument space. */
1509     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1510 
1511     /*
1512      * Relocate the "ref_slot" area to the end of the parameters.
1513      * Minimizing this stack offset helps code size for x86,
1514      * which has a signed 8-bit offset encoding.
1515      */
1516     if (cum.ref_slot != 0) {
1517         int ref_base = 0;
1518 
1519         if (cum.arg_slot > max_reg_slots) {
1520             int align = __alignof(Int128) / sizeof(tcg_target_long);
1521 
1522             ref_base = cum.arg_slot - max_reg_slots;
1523             if (align > 1) {
1524                 ref_base = ROUND_UP(ref_base, align);
1525             }
1526         }
1527         assert(ref_base + cum.ref_slot <= max_stk_slots);
1528         ref_base += max_reg_slots;
1529 
1530         if (ref_base != 0) {
1531             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1532                 TCGCallArgumentLoc *loc = &info->in[i];
1533                 switch (loc->kind) {
1534                 case TCG_CALL_ARG_BY_REF:
1535                 case TCG_CALL_ARG_BY_REF_N:
1536                     loc->ref_slot += ref_base;
1537                     break;
1538                 default:
1539                     break;
1540                 }
1541             }
1542         }
1543     }
1544 }
1545 
1546 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1547 static void process_constraint_sets(void);
1548 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1549                                             TCGReg reg, const char *name);
1550 
1551 static void tcg_context_init(unsigned max_threads)
1552 {
1553     TCGContext *s = &tcg_init_ctx;
1554     int n, i;
1555     TCGTemp *ts;
1556 
1557     memset(s, 0, sizeof(*s));
1558     s->nb_globals = 0;
1559 
1560     init_call_layout(&info_helper_ld32_mmu);
1561     init_call_layout(&info_helper_ld64_mmu);
1562     init_call_layout(&info_helper_ld128_mmu);
1563     init_call_layout(&info_helper_st32_mmu);
1564     init_call_layout(&info_helper_st64_mmu);
1565     init_call_layout(&info_helper_st128_mmu);
1566 
1567     tcg_target_init(s);
1568     process_constraint_sets();
1569 
1570     /* Reverse the order of the saved registers, assuming they're all at
1571        the start of tcg_target_reg_alloc_order.  */
1572     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1573         int r = tcg_target_reg_alloc_order[n];
1574         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1575             break;
1576         }
1577     }
1578     for (i = 0; i < n; ++i) {
1579         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1580     }
1581     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1582         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1583     }
1584 
1585     tcg_ctx = s;
1586     /*
1587      * In user-mode we simply share the init context among threads, since we
1588      * use a single region. See the documentation tcg_region_init() for the
1589      * reasoning behind this.
1590      * In system-mode we will have at most max_threads TCG threads.
1591      */
1592 #ifdef CONFIG_USER_ONLY
1593     tcg_ctxs = &tcg_ctx;
1594     tcg_cur_ctxs = 1;
1595     tcg_max_ctxs = 1;
1596 #else
1597     tcg_max_ctxs = max_threads;
1598     tcg_ctxs = g_new0(TCGContext *, max_threads);
1599 #endif
1600 
1601     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1602     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1603     tcg_env = temp_tcgv_ptr(ts);
1604 }
1605 
1606 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1607 {
1608     tcg_context_init(max_threads);
1609     tcg_region_init(tb_size, splitwx, max_threads);
1610 }
1611 
1612 /*
1613  * Allocate TBs right before their corresponding translated code, making
1614  * sure that TBs and code are on different cache lines.
1615  */
1616 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1617 {
1618     uintptr_t align = qemu_icache_linesize;
1619     TranslationBlock *tb;
1620     void *next;
1621 
1622  retry:
1623     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1624     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1625 
1626     if (unlikely(next > s->code_gen_highwater)) {
1627         if (tcg_region_alloc(s)) {
1628             return NULL;
1629         }
1630         goto retry;
1631     }
1632     qatomic_set(&s->code_gen_ptr, next);
1633     return tb;
1634 }
1635 
1636 void tcg_prologue_init(void)
1637 {
1638     TCGContext *s = tcg_ctx;
1639     size_t prologue_size;
1640 
1641     s->code_ptr = s->code_gen_ptr;
1642     s->code_buf = s->code_gen_ptr;
1643     s->data_gen_ptr = NULL;
1644 
1645 #ifndef CONFIG_TCG_INTERPRETER
1646     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1647 #endif
1648 
1649     s->pool_labels = NULL;
1650 
1651     qemu_thread_jit_write();
1652     /* Generate the prologue.  */
1653     tcg_target_qemu_prologue(s);
1654 
1655     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1656     {
1657         int result = tcg_out_pool_finalize(s);
1658         tcg_debug_assert(result == 0);
1659     }
1660 
1661     prologue_size = tcg_current_code_size(s);
1662     perf_report_prologue(s->code_gen_ptr, prologue_size);
1663 
1664 #ifndef CONFIG_TCG_INTERPRETER
1665     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1666                         (uintptr_t)s->code_buf, prologue_size);
1667 #endif
1668 
1669     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1670         FILE *logfile = qemu_log_trylock();
1671         if (logfile) {
1672             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1673             if (s->data_gen_ptr) {
1674                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1675                 size_t data_size = prologue_size - code_size;
1676                 size_t i;
1677 
1678                 disas(logfile, s->code_gen_ptr, code_size);
1679 
1680                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1681                     if (sizeof(tcg_target_ulong) == 8) {
1682                         fprintf(logfile,
1683                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1684                                 (uintptr_t)s->data_gen_ptr + i,
1685                                 *(uint64_t *)(s->data_gen_ptr + i));
1686                     } else {
1687                         fprintf(logfile,
1688                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1689                                 (uintptr_t)s->data_gen_ptr + i,
1690                                 *(uint32_t *)(s->data_gen_ptr + i));
1691                     }
1692                 }
1693             } else {
1694                 disas(logfile, s->code_gen_ptr, prologue_size);
1695             }
1696             fprintf(logfile, "\n");
1697             qemu_log_unlock(logfile);
1698         }
1699     }
1700 
1701 #ifndef CONFIG_TCG_INTERPRETER
1702     /*
1703      * Assert that goto_ptr is implemented completely, setting an epilogue.
1704      * For tci, we use NULL as the signal to return from the interpreter,
1705      * so skip this check.
1706      */
1707     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1708 #endif
1709 
1710     tcg_region_prologue_set(s);
1711 }
1712 
1713 void tcg_func_start(TCGContext *s)
1714 {
1715     tcg_pool_reset(s);
1716     s->nb_temps = s->nb_globals;
1717 
1718     /* No temps have been previously allocated for size or locality.  */
1719     tcg_temp_ebb_reset_freed(s);
1720 
1721     /* No constant temps have been previously allocated. */
1722     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1723         if (s->const_table[i]) {
1724             g_hash_table_remove_all(s->const_table[i]);
1725         }
1726     }
1727 
1728     s->nb_ops = 0;
1729     s->nb_labels = 0;
1730     s->current_frame_offset = s->frame_start;
1731 
1732 #ifdef CONFIG_DEBUG_TCG
1733     s->goto_tb_issue_mask = 0;
1734 #endif
1735 
1736     QTAILQ_INIT(&s->ops);
1737     QTAILQ_INIT(&s->free_ops);
1738     s->emit_before_op = NULL;
1739     QSIMPLEQ_INIT(&s->labels);
1740 
1741     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1742     tcg_debug_assert(s->insn_start_words > 0);
1743 }
1744 
1745 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1746 {
1747     int n = s->nb_temps++;
1748 
1749     if (n >= TCG_MAX_TEMPS) {
1750         tcg_raise_tb_overflow(s);
1751     }
1752     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1753 }
1754 
1755 static TCGTemp *tcg_global_alloc(TCGContext *s)
1756 {
1757     TCGTemp *ts;
1758 
1759     tcg_debug_assert(s->nb_globals == s->nb_temps);
1760     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1761     s->nb_globals++;
1762     ts = tcg_temp_alloc(s);
1763     ts->kind = TEMP_GLOBAL;
1764 
1765     return ts;
1766 }
1767 
1768 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1769                                             TCGReg reg, const char *name)
1770 {
1771     TCGTemp *ts;
1772 
1773     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1774 
1775     ts = tcg_global_alloc(s);
1776     ts->base_type = type;
1777     ts->type = type;
1778     ts->kind = TEMP_FIXED;
1779     ts->reg = reg;
1780     ts->name = name;
1781     tcg_regset_set_reg(s->reserved_regs, reg);
1782 
1783     return ts;
1784 }
1785 
1786 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1787 {
1788     s->frame_start = start;
1789     s->frame_end = start + size;
1790     s->frame_temp
1791         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1792 }
1793 
1794 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1795                                             const char *name, TCGType type)
1796 {
1797     TCGContext *s = tcg_ctx;
1798     TCGTemp *base_ts = tcgv_ptr_temp(base);
1799     TCGTemp *ts = tcg_global_alloc(s);
1800     int indirect_reg = 0;
1801 
1802     switch (base_ts->kind) {
1803     case TEMP_FIXED:
1804         break;
1805     case TEMP_GLOBAL:
1806         /* We do not support double-indirect registers.  */
1807         tcg_debug_assert(!base_ts->indirect_reg);
1808         base_ts->indirect_base = 1;
1809         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1810                             ? 2 : 1);
1811         indirect_reg = 1;
1812         break;
1813     default:
1814         g_assert_not_reached();
1815     }
1816 
1817     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1818         TCGTemp *ts2 = tcg_global_alloc(s);
1819         char buf[64];
1820 
1821         ts->base_type = TCG_TYPE_I64;
1822         ts->type = TCG_TYPE_I32;
1823         ts->indirect_reg = indirect_reg;
1824         ts->mem_allocated = 1;
1825         ts->mem_base = base_ts;
1826         ts->mem_offset = offset;
1827         pstrcpy(buf, sizeof(buf), name);
1828         pstrcat(buf, sizeof(buf), "_0");
1829         ts->name = strdup(buf);
1830 
1831         tcg_debug_assert(ts2 == ts + 1);
1832         ts2->base_type = TCG_TYPE_I64;
1833         ts2->type = TCG_TYPE_I32;
1834         ts2->indirect_reg = indirect_reg;
1835         ts2->mem_allocated = 1;
1836         ts2->mem_base = base_ts;
1837         ts2->mem_offset = offset + 4;
1838         ts2->temp_subindex = 1;
1839         pstrcpy(buf, sizeof(buf), name);
1840         pstrcat(buf, sizeof(buf), "_1");
1841         ts2->name = strdup(buf);
1842     } else {
1843         ts->base_type = type;
1844         ts->type = type;
1845         ts->indirect_reg = indirect_reg;
1846         ts->mem_allocated = 1;
1847         ts->mem_base = base_ts;
1848         ts->mem_offset = offset;
1849         ts->name = name;
1850     }
1851     return ts;
1852 }
1853 
1854 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1855 {
1856     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1857     return temp_tcgv_i32(ts);
1858 }
1859 
1860 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1861 {
1862     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1863     return temp_tcgv_i64(ts);
1864 }
1865 
1866 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1867 {
1868     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1869     return temp_tcgv_ptr(ts);
1870 }
1871 
1872 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1873 {
1874     TCGContext *s = tcg_ctx;
1875     TCGTemp *ts;
1876     int n;
1877 
1878     if (kind == TEMP_EBB) {
1879         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1880 
1881         if (idx < TCG_MAX_TEMPS) {
1882             /* There is already an available temp with the right type.  */
1883             clear_bit(idx, s->free_temps[type].l);
1884 
1885             ts = &s->temps[idx];
1886             ts->temp_allocated = 1;
1887             tcg_debug_assert(ts->base_type == type);
1888             tcg_debug_assert(ts->kind == kind);
1889             return ts;
1890         }
1891     } else {
1892         tcg_debug_assert(kind == TEMP_TB);
1893     }
1894 
1895     switch (type) {
1896     case TCG_TYPE_I32:
1897     case TCG_TYPE_V64:
1898     case TCG_TYPE_V128:
1899     case TCG_TYPE_V256:
1900         n = 1;
1901         break;
1902     case TCG_TYPE_I64:
1903         n = 64 / TCG_TARGET_REG_BITS;
1904         break;
1905     case TCG_TYPE_I128:
1906         n = 128 / TCG_TARGET_REG_BITS;
1907         break;
1908     default:
1909         g_assert_not_reached();
1910     }
1911 
1912     ts = tcg_temp_alloc(s);
1913     ts->base_type = type;
1914     ts->temp_allocated = 1;
1915     ts->kind = kind;
1916 
1917     if (n == 1) {
1918         ts->type = type;
1919     } else {
1920         ts->type = TCG_TYPE_REG;
1921 
1922         for (int i = 1; i < n; ++i) {
1923             TCGTemp *ts2 = tcg_temp_alloc(s);
1924 
1925             tcg_debug_assert(ts2 == ts + i);
1926             ts2->base_type = type;
1927             ts2->type = TCG_TYPE_REG;
1928             ts2->temp_allocated = 1;
1929             ts2->temp_subindex = i;
1930             ts2->kind = kind;
1931         }
1932     }
1933     return ts;
1934 }
1935 
1936 TCGv_i32 tcg_temp_new_i32(void)
1937 {
1938     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1939 }
1940 
1941 TCGv_i32 tcg_temp_ebb_new_i32(void)
1942 {
1943     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1944 }
1945 
1946 TCGv_i64 tcg_temp_new_i64(void)
1947 {
1948     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1949 }
1950 
1951 TCGv_i64 tcg_temp_ebb_new_i64(void)
1952 {
1953     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1954 }
1955 
1956 TCGv_ptr tcg_temp_new_ptr(void)
1957 {
1958     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1959 }
1960 
1961 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1962 {
1963     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1964 }
1965 
1966 TCGv_i128 tcg_temp_new_i128(void)
1967 {
1968     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1969 }
1970 
1971 TCGv_i128 tcg_temp_ebb_new_i128(void)
1972 {
1973     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1974 }
1975 
1976 TCGv_vec tcg_temp_new_vec(TCGType type)
1977 {
1978     TCGTemp *t;
1979 
1980 #ifdef CONFIG_DEBUG_TCG
1981     switch (type) {
1982     case TCG_TYPE_V64:
1983         assert(TCG_TARGET_HAS_v64);
1984         break;
1985     case TCG_TYPE_V128:
1986         assert(TCG_TARGET_HAS_v128);
1987         break;
1988     case TCG_TYPE_V256:
1989         assert(TCG_TARGET_HAS_v256);
1990         break;
1991     default:
1992         g_assert_not_reached();
1993     }
1994 #endif
1995 
1996     t = tcg_temp_new_internal(type, TEMP_EBB);
1997     return temp_tcgv_vec(t);
1998 }
1999 
2000 /* Create a new temp of the same type as an existing temp.  */
2001 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2002 {
2003     TCGTemp *t = tcgv_vec_temp(match);
2004 
2005     tcg_debug_assert(t->temp_allocated != 0);
2006 
2007     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2008     return temp_tcgv_vec(t);
2009 }
2010 
2011 void tcg_temp_free_internal(TCGTemp *ts)
2012 {
2013     TCGContext *s = tcg_ctx;
2014 
2015     switch (ts->kind) {
2016     case TEMP_CONST:
2017     case TEMP_TB:
2018         /* Silently ignore free. */
2019         break;
2020     case TEMP_EBB:
2021         tcg_debug_assert(ts->temp_allocated != 0);
2022         ts->temp_allocated = 0;
2023         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2024         break;
2025     default:
2026         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2027         g_assert_not_reached();
2028     }
2029 }
2030 
2031 void tcg_temp_free_i32(TCGv_i32 arg)
2032 {
2033     tcg_temp_free_internal(tcgv_i32_temp(arg));
2034 }
2035 
2036 void tcg_temp_free_i64(TCGv_i64 arg)
2037 {
2038     tcg_temp_free_internal(tcgv_i64_temp(arg));
2039 }
2040 
2041 void tcg_temp_free_i128(TCGv_i128 arg)
2042 {
2043     tcg_temp_free_internal(tcgv_i128_temp(arg));
2044 }
2045 
2046 void tcg_temp_free_ptr(TCGv_ptr arg)
2047 {
2048     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2049 }
2050 
2051 void tcg_temp_free_vec(TCGv_vec arg)
2052 {
2053     tcg_temp_free_internal(tcgv_vec_temp(arg));
2054 }
2055 
2056 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2057 {
2058     TCGContext *s = tcg_ctx;
2059     GHashTable *h = s->const_table[type];
2060     TCGTemp *ts;
2061 
2062     if (h == NULL) {
2063         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2064         s->const_table[type] = h;
2065     }
2066 
2067     ts = g_hash_table_lookup(h, &val);
2068     if (ts == NULL) {
2069         int64_t *val_ptr;
2070 
2071         ts = tcg_temp_alloc(s);
2072 
2073         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2074             TCGTemp *ts2 = tcg_temp_alloc(s);
2075 
2076             tcg_debug_assert(ts2 == ts + 1);
2077 
2078             ts->base_type = TCG_TYPE_I64;
2079             ts->type = TCG_TYPE_I32;
2080             ts->kind = TEMP_CONST;
2081             ts->temp_allocated = 1;
2082 
2083             ts2->base_type = TCG_TYPE_I64;
2084             ts2->type = TCG_TYPE_I32;
2085             ts2->kind = TEMP_CONST;
2086             ts2->temp_allocated = 1;
2087             ts2->temp_subindex = 1;
2088 
2089             /*
2090              * Retain the full value of the 64-bit constant in the low
2091              * part, so that the hash table works.  Actual uses will
2092              * truncate the value to the low part.
2093              */
2094             ts[HOST_BIG_ENDIAN].val = val;
2095             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2096             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2097         } else {
2098             ts->base_type = type;
2099             ts->type = type;
2100             ts->kind = TEMP_CONST;
2101             ts->temp_allocated = 1;
2102             ts->val = val;
2103             val_ptr = &ts->val;
2104         }
2105         g_hash_table_insert(h, val_ptr, ts);
2106     }
2107 
2108     return ts;
2109 }
2110 
2111 TCGv_i32 tcg_constant_i32(int32_t val)
2112 {
2113     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2114 }
2115 
2116 TCGv_i64 tcg_constant_i64(int64_t val)
2117 {
2118     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2119 }
2120 
2121 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2122 {
2123     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2124 }
2125 
2126 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2127 {
2128     val = dup_const(vece, val);
2129     return temp_tcgv_vec(tcg_constant_internal(type, val));
2130 }
2131 
2132 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2133 {
2134     TCGTemp *t = tcgv_vec_temp(match);
2135 
2136     tcg_debug_assert(t->temp_allocated != 0);
2137     return tcg_constant_vec(t->base_type, vece, val);
2138 }
2139 
2140 #ifdef CONFIG_DEBUG_TCG
2141 size_t temp_idx(TCGTemp *ts)
2142 {
2143     ptrdiff_t n = ts - tcg_ctx->temps;
2144     assert(n >= 0 && n < tcg_ctx->nb_temps);
2145     return n;
2146 }
2147 
2148 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2149 {
2150     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2151 
2152     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2153     assert(o % sizeof(TCGTemp) == 0);
2154 
2155     return (void *)tcg_ctx + (uintptr_t)v;
2156 }
2157 #endif /* CONFIG_DEBUG_TCG */
2158 
2159 /*
2160  * Return true if OP may appear in the opcode stream with TYPE.
2161  * Test the runtime variable that controls each opcode.
2162  */
2163 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2164 {
2165     bool has_type;
2166 
2167     switch (type) {
2168     case TCG_TYPE_I32:
2169         has_type = true;
2170         break;
2171     case TCG_TYPE_I64:
2172         has_type = TCG_TARGET_REG_BITS == 64;
2173         break;
2174     case TCG_TYPE_V64:
2175         has_type = TCG_TARGET_HAS_v64;
2176         break;
2177     case TCG_TYPE_V128:
2178         has_type = TCG_TARGET_HAS_v128;
2179         break;
2180     case TCG_TYPE_V256:
2181         has_type = TCG_TARGET_HAS_v256;
2182         break;
2183     default:
2184         has_type = false;
2185         break;
2186     }
2187 
2188     switch (op) {
2189     case INDEX_op_discard:
2190     case INDEX_op_set_label:
2191     case INDEX_op_call:
2192     case INDEX_op_br:
2193     case INDEX_op_mb:
2194     case INDEX_op_insn_start:
2195     case INDEX_op_exit_tb:
2196     case INDEX_op_goto_tb:
2197     case INDEX_op_goto_ptr:
2198     case INDEX_op_qemu_ld_i32:
2199     case INDEX_op_qemu_st_i32:
2200     case INDEX_op_qemu_ld_i64:
2201     case INDEX_op_qemu_st_i64:
2202         return true;
2203 
2204     case INDEX_op_qemu_st8_i32:
2205         return TCG_TARGET_HAS_qemu_st8_i32;
2206 
2207     case INDEX_op_qemu_ld_i128:
2208     case INDEX_op_qemu_st_i128:
2209         return TCG_TARGET_HAS_qemu_ldst_i128;
2210 
2211     case INDEX_op_add:
2212     case INDEX_op_and:
2213     case INDEX_op_mov:
2214     case INDEX_op_or:
2215         return has_type;
2216 
2217     case INDEX_op_setcond_i32:
2218     case INDEX_op_brcond_i32:
2219     case INDEX_op_movcond_i32:
2220     case INDEX_op_ld8u_i32:
2221     case INDEX_op_ld8s_i32:
2222     case INDEX_op_ld16u_i32:
2223     case INDEX_op_ld16s_i32:
2224     case INDEX_op_ld_i32:
2225     case INDEX_op_st8_i32:
2226     case INDEX_op_st16_i32:
2227     case INDEX_op_st_i32:
2228     case INDEX_op_sub_i32:
2229     case INDEX_op_neg_i32:
2230     case INDEX_op_mul_i32:
2231     case INDEX_op_xor_i32:
2232     case INDEX_op_shl_i32:
2233     case INDEX_op_shr_i32:
2234     case INDEX_op_sar_i32:
2235     case INDEX_op_extract_i32:
2236     case INDEX_op_sextract_i32:
2237     case INDEX_op_deposit_i32:
2238         return true;
2239 
2240     case INDEX_op_negsetcond_i32:
2241         return TCG_TARGET_HAS_negsetcond_i32;
2242     case INDEX_op_div_i32:
2243     case INDEX_op_divu_i32:
2244         return TCG_TARGET_HAS_div_i32;
2245     case INDEX_op_rem_i32:
2246     case INDEX_op_remu_i32:
2247         return TCG_TARGET_HAS_rem_i32;
2248     case INDEX_op_div2_i32:
2249     case INDEX_op_divu2_i32:
2250         return TCG_TARGET_HAS_div2_i32;
2251     case INDEX_op_rotl_i32:
2252     case INDEX_op_rotr_i32:
2253         return TCG_TARGET_HAS_rot_i32;
2254     case INDEX_op_extract2_i32:
2255         return TCG_TARGET_HAS_extract2_i32;
2256     case INDEX_op_add2_i32:
2257         return TCG_TARGET_HAS_add2_i32;
2258     case INDEX_op_sub2_i32:
2259         return TCG_TARGET_HAS_sub2_i32;
2260     case INDEX_op_mulu2_i32:
2261         return TCG_TARGET_HAS_mulu2_i32;
2262     case INDEX_op_muls2_i32:
2263         return TCG_TARGET_HAS_muls2_i32;
2264     case INDEX_op_muluh_i32:
2265         return TCG_TARGET_HAS_muluh_i32;
2266     case INDEX_op_mulsh_i32:
2267         return TCG_TARGET_HAS_mulsh_i32;
2268     case INDEX_op_bswap16_i32:
2269         return TCG_TARGET_HAS_bswap16_i32;
2270     case INDEX_op_bswap32_i32:
2271         return TCG_TARGET_HAS_bswap32_i32;
2272     case INDEX_op_not_i32:
2273         return TCG_TARGET_HAS_not_i32;
2274     case INDEX_op_orc_i32:
2275         return TCG_TARGET_HAS_orc_i32;
2276     case INDEX_op_eqv_i32:
2277         return TCG_TARGET_HAS_eqv_i32;
2278     case INDEX_op_nand_i32:
2279         return TCG_TARGET_HAS_nand_i32;
2280     case INDEX_op_nor_i32:
2281         return TCG_TARGET_HAS_nor_i32;
2282     case INDEX_op_clz_i32:
2283         return TCG_TARGET_HAS_clz_i32;
2284     case INDEX_op_ctz_i32:
2285         return TCG_TARGET_HAS_ctz_i32;
2286     case INDEX_op_ctpop_i32:
2287         return TCG_TARGET_HAS_ctpop_i32;
2288 
2289     case INDEX_op_brcond2_i32:
2290     case INDEX_op_setcond2_i32:
2291         return TCG_TARGET_REG_BITS == 32;
2292 
2293     case INDEX_op_setcond_i64:
2294     case INDEX_op_brcond_i64:
2295     case INDEX_op_movcond_i64:
2296     case INDEX_op_ld8u_i64:
2297     case INDEX_op_ld8s_i64:
2298     case INDEX_op_ld16u_i64:
2299     case INDEX_op_ld16s_i64:
2300     case INDEX_op_ld32u_i64:
2301     case INDEX_op_ld32s_i64:
2302     case INDEX_op_ld_i64:
2303     case INDEX_op_st8_i64:
2304     case INDEX_op_st16_i64:
2305     case INDEX_op_st32_i64:
2306     case INDEX_op_st_i64:
2307     case INDEX_op_sub_i64:
2308     case INDEX_op_neg_i64:
2309     case INDEX_op_mul_i64:
2310     case INDEX_op_xor_i64:
2311     case INDEX_op_shl_i64:
2312     case INDEX_op_shr_i64:
2313     case INDEX_op_sar_i64:
2314     case INDEX_op_ext_i32_i64:
2315     case INDEX_op_extu_i32_i64:
2316     case INDEX_op_extract_i64:
2317     case INDEX_op_sextract_i64:
2318     case INDEX_op_deposit_i64:
2319         return TCG_TARGET_REG_BITS == 64;
2320 
2321     case INDEX_op_negsetcond_i64:
2322         return TCG_TARGET_HAS_negsetcond_i64;
2323     case INDEX_op_div_i64:
2324     case INDEX_op_divu_i64:
2325         return TCG_TARGET_HAS_div_i64;
2326     case INDEX_op_rem_i64:
2327     case INDEX_op_remu_i64:
2328         return TCG_TARGET_HAS_rem_i64;
2329     case INDEX_op_div2_i64:
2330     case INDEX_op_divu2_i64:
2331         return TCG_TARGET_HAS_div2_i64;
2332     case INDEX_op_rotl_i64:
2333     case INDEX_op_rotr_i64:
2334         return TCG_TARGET_HAS_rot_i64;
2335     case INDEX_op_extract2_i64:
2336         return TCG_TARGET_HAS_extract2_i64;
2337     case INDEX_op_extrl_i64_i32:
2338     case INDEX_op_extrh_i64_i32:
2339         return TCG_TARGET_HAS_extr_i64_i32;
2340     case INDEX_op_bswap16_i64:
2341         return TCG_TARGET_HAS_bswap16_i64;
2342     case INDEX_op_bswap32_i64:
2343         return TCG_TARGET_HAS_bswap32_i64;
2344     case INDEX_op_bswap64_i64:
2345         return TCG_TARGET_HAS_bswap64_i64;
2346     case INDEX_op_not_i64:
2347         return TCG_TARGET_HAS_not_i64;
2348     case INDEX_op_orc_i64:
2349         return TCG_TARGET_HAS_orc_i64;
2350     case INDEX_op_eqv_i64:
2351         return TCG_TARGET_HAS_eqv_i64;
2352     case INDEX_op_nand_i64:
2353         return TCG_TARGET_HAS_nand_i64;
2354     case INDEX_op_nor_i64:
2355         return TCG_TARGET_HAS_nor_i64;
2356     case INDEX_op_clz_i64:
2357         return TCG_TARGET_HAS_clz_i64;
2358     case INDEX_op_ctz_i64:
2359         return TCG_TARGET_HAS_ctz_i64;
2360     case INDEX_op_ctpop_i64:
2361         return TCG_TARGET_HAS_ctpop_i64;
2362     case INDEX_op_add2_i64:
2363         return TCG_TARGET_HAS_add2_i64;
2364     case INDEX_op_sub2_i64:
2365         return TCG_TARGET_HAS_sub2_i64;
2366     case INDEX_op_mulu2_i64:
2367         return TCG_TARGET_HAS_mulu2_i64;
2368     case INDEX_op_muls2_i64:
2369         return TCG_TARGET_HAS_muls2_i64;
2370     case INDEX_op_muluh_i64:
2371         return TCG_TARGET_HAS_muluh_i64;
2372     case INDEX_op_mulsh_i64:
2373         return TCG_TARGET_HAS_mulsh_i64;
2374 
2375     case INDEX_op_mov_vec:
2376     case INDEX_op_dup_vec:
2377     case INDEX_op_dupm_vec:
2378     case INDEX_op_ld_vec:
2379     case INDEX_op_st_vec:
2380     case INDEX_op_add_vec:
2381     case INDEX_op_sub_vec:
2382     case INDEX_op_and_vec:
2383     case INDEX_op_or_vec:
2384     case INDEX_op_xor_vec:
2385     case INDEX_op_cmp_vec:
2386         return has_type;
2387     case INDEX_op_dup2_vec:
2388         return has_type && TCG_TARGET_REG_BITS == 32;
2389     case INDEX_op_not_vec:
2390         return has_type && TCG_TARGET_HAS_not_vec;
2391     case INDEX_op_neg_vec:
2392         return has_type && TCG_TARGET_HAS_neg_vec;
2393     case INDEX_op_abs_vec:
2394         return has_type && TCG_TARGET_HAS_abs_vec;
2395     case INDEX_op_andc_vec:
2396         return has_type && TCG_TARGET_HAS_andc_vec;
2397     case INDEX_op_orc_vec:
2398         return has_type && TCG_TARGET_HAS_orc_vec;
2399     case INDEX_op_nand_vec:
2400         return has_type && TCG_TARGET_HAS_nand_vec;
2401     case INDEX_op_nor_vec:
2402         return has_type && TCG_TARGET_HAS_nor_vec;
2403     case INDEX_op_eqv_vec:
2404         return has_type && TCG_TARGET_HAS_eqv_vec;
2405     case INDEX_op_mul_vec:
2406         return has_type && TCG_TARGET_HAS_mul_vec;
2407     case INDEX_op_shli_vec:
2408     case INDEX_op_shri_vec:
2409     case INDEX_op_sari_vec:
2410         return has_type && TCG_TARGET_HAS_shi_vec;
2411     case INDEX_op_shls_vec:
2412     case INDEX_op_shrs_vec:
2413     case INDEX_op_sars_vec:
2414         return has_type && TCG_TARGET_HAS_shs_vec;
2415     case INDEX_op_shlv_vec:
2416     case INDEX_op_shrv_vec:
2417     case INDEX_op_sarv_vec:
2418         return has_type && TCG_TARGET_HAS_shv_vec;
2419     case INDEX_op_rotli_vec:
2420         return has_type && TCG_TARGET_HAS_roti_vec;
2421     case INDEX_op_rotls_vec:
2422         return has_type && TCG_TARGET_HAS_rots_vec;
2423     case INDEX_op_rotlv_vec:
2424     case INDEX_op_rotrv_vec:
2425         return has_type && TCG_TARGET_HAS_rotv_vec;
2426     case INDEX_op_ssadd_vec:
2427     case INDEX_op_usadd_vec:
2428     case INDEX_op_sssub_vec:
2429     case INDEX_op_ussub_vec:
2430         return has_type && TCG_TARGET_HAS_sat_vec;
2431     case INDEX_op_smin_vec:
2432     case INDEX_op_umin_vec:
2433     case INDEX_op_smax_vec:
2434     case INDEX_op_umax_vec:
2435         return has_type && TCG_TARGET_HAS_minmax_vec;
2436     case INDEX_op_bitsel_vec:
2437         return has_type && TCG_TARGET_HAS_bitsel_vec;
2438     case INDEX_op_cmpsel_vec:
2439         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2440 
2441     default:
2442         if (op < INDEX_op_last_generic) {
2443             const TCGOutOp *outop;
2444             TCGConstraintSetIndex con_set;
2445 
2446             if (!has_type) {
2447                 return false;
2448             }
2449 
2450             outop = all_outop[op];
2451             tcg_debug_assert(outop != NULL);
2452 
2453             con_set = outop->static_constraint;
2454             if (con_set == C_Dynamic) {
2455                 con_set = outop->dynamic_constraint(type, flags);
2456             }
2457             if (con_set >= 0) {
2458                 return true;
2459             }
2460             tcg_debug_assert(con_set == C_NotImplemented);
2461             return false;
2462         }
2463         tcg_debug_assert(op < NB_OPS);
2464         return true;
2465 
2466     case INDEX_op_last_generic:
2467         g_assert_not_reached();
2468     }
2469 }
2470 
2471 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2472 {
2473     unsigned width;
2474 
2475     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2476     width = (type == TCG_TYPE_I32 ? 32 : 64);
2477 
2478     tcg_debug_assert(ofs < width);
2479     tcg_debug_assert(len > 0);
2480     tcg_debug_assert(len <= width - ofs);
2481 
2482     return TCG_TARGET_deposit_valid(type, ofs, len);
2483 }
2484 
2485 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2486 
2487 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2488                           TCGTemp *ret, TCGTemp **args)
2489 {
2490     TCGv_i64 extend_free[MAX_CALL_IARGS];
2491     int n_extend = 0;
2492     TCGOp *op;
2493     int i, n, pi = 0, total_args;
2494 
2495     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2496         init_call_layout(info);
2497         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2498     }
2499 
2500     total_args = info->nr_out + info->nr_in + 2;
2501     op = tcg_op_alloc(INDEX_op_call, total_args);
2502 
2503 #ifdef CONFIG_PLUGIN
2504     /* Flag helpers that may affect guest state */
2505     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2506         tcg_ctx->plugin_insn->calls_helpers = true;
2507     }
2508 #endif
2509 
2510     TCGOP_CALLO(op) = n = info->nr_out;
2511     switch (n) {
2512     case 0:
2513         tcg_debug_assert(ret == NULL);
2514         break;
2515     case 1:
2516         tcg_debug_assert(ret != NULL);
2517         op->args[pi++] = temp_arg(ret);
2518         break;
2519     case 2:
2520     case 4:
2521         tcg_debug_assert(ret != NULL);
2522         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2523         tcg_debug_assert(ret->temp_subindex == 0);
2524         for (i = 0; i < n; ++i) {
2525             op->args[pi++] = temp_arg(ret + i);
2526         }
2527         break;
2528     default:
2529         g_assert_not_reached();
2530     }
2531 
2532     TCGOP_CALLI(op) = n = info->nr_in;
2533     for (i = 0; i < n; i++) {
2534         const TCGCallArgumentLoc *loc = &info->in[i];
2535         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2536 
2537         switch (loc->kind) {
2538         case TCG_CALL_ARG_NORMAL:
2539         case TCG_CALL_ARG_BY_REF:
2540         case TCG_CALL_ARG_BY_REF_N:
2541             op->args[pi++] = temp_arg(ts);
2542             break;
2543 
2544         case TCG_CALL_ARG_EXTEND_U:
2545         case TCG_CALL_ARG_EXTEND_S:
2546             {
2547                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2548                 TCGv_i32 orig = temp_tcgv_i32(ts);
2549 
2550                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2551                     tcg_gen_ext_i32_i64(temp, orig);
2552                 } else {
2553                     tcg_gen_extu_i32_i64(temp, orig);
2554                 }
2555                 op->args[pi++] = tcgv_i64_arg(temp);
2556                 extend_free[n_extend++] = temp;
2557             }
2558             break;
2559 
2560         default:
2561             g_assert_not_reached();
2562         }
2563     }
2564     op->args[pi++] = (uintptr_t)func;
2565     op->args[pi++] = (uintptr_t)info;
2566     tcg_debug_assert(pi == total_args);
2567 
2568     if (tcg_ctx->emit_before_op) {
2569         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2570     } else {
2571         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2572     }
2573 
2574     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2575     for (i = 0; i < n_extend; ++i) {
2576         tcg_temp_free_i64(extend_free[i]);
2577     }
2578 }
2579 
2580 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2581 {
2582     tcg_gen_callN(func, info, ret, NULL);
2583 }
2584 
2585 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2586 {
2587     tcg_gen_callN(func, info, ret, &t1);
2588 }
2589 
2590 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2591                    TCGTemp *t1, TCGTemp *t2)
2592 {
2593     TCGTemp *args[2] = { t1, t2 };
2594     tcg_gen_callN(func, info, ret, args);
2595 }
2596 
2597 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2598                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2599 {
2600     TCGTemp *args[3] = { t1, t2, t3 };
2601     tcg_gen_callN(func, info, ret, args);
2602 }
2603 
2604 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2605                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2606 {
2607     TCGTemp *args[4] = { t1, t2, t3, t4 };
2608     tcg_gen_callN(func, info, ret, args);
2609 }
2610 
2611 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2612                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2613 {
2614     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2615     tcg_gen_callN(func, info, ret, args);
2616 }
2617 
2618 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2619                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2620                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2621 {
2622     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2623     tcg_gen_callN(func, info, ret, args);
2624 }
2625 
2626 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2627                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2628                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2629 {
2630     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2631     tcg_gen_callN(func, info, ret, args);
2632 }
2633 
2634 static void tcg_reg_alloc_start(TCGContext *s)
2635 {
2636     int i, n;
2637 
2638     for (i = 0, n = s->nb_temps; i < n; i++) {
2639         TCGTemp *ts = &s->temps[i];
2640         TCGTempVal val = TEMP_VAL_MEM;
2641 
2642         switch (ts->kind) {
2643         case TEMP_CONST:
2644             val = TEMP_VAL_CONST;
2645             break;
2646         case TEMP_FIXED:
2647             val = TEMP_VAL_REG;
2648             break;
2649         case TEMP_GLOBAL:
2650             break;
2651         case TEMP_EBB:
2652             val = TEMP_VAL_DEAD;
2653             /* fall through */
2654         case TEMP_TB:
2655             ts->mem_allocated = 0;
2656             break;
2657         default:
2658             g_assert_not_reached();
2659         }
2660         ts->val_type = val;
2661     }
2662 
2663     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2664 }
2665 
2666 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2667                                  TCGTemp *ts)
2668 {
2669     int idx = temp_idx(ts);
2670 
2671     switch (ts->kind) {
2672     case TEMP_FIXED:
2673     case TEMP_GLOBAL:
2674         pstrcpy(buf, buf_size, ts->name);
2675         break;
2676     case TEMP_TB:
2677         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2678         break;
2679     case TEMP_EBB:
2680         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2681         break;
2682     case TEMP_CONST:
2683         switch (ts->type) {
2684         case TCG_TYPE_I32:
2685             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2686             break;
2687 #if TCG_TARGET_REG_BITS > 32
2688         case TCG_TYPE_I64:
2689             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2690             break;
2691 #endif
2692         case TCG_TYPE_V64:
2693         case TCG_TYPE_V128:
2694         case TCG_TYPE_V256:
2695             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2696                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2697             break;
2698         default:
2699             g_assert_not_reached();
2700         }
2701         break;
2702     }
2703     return buf;
2704 }
2705 
2706 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2707                              int buf_size, TCGArg arg)
2708 {
2709     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2710 }
2711 
2712 static const char * const cond_name[] =
2713 {
2714     [TCG_COND_NEVER] = "never",
2715     [TCG_COND_ALWAYS] = "always",
2716     [TCG_COND_EQ] = "eq",
2717     [TCG_COND_NE] = "ne",
2718     [TCG_COND_LT] = "lt",
2719     [TCG_COND_GE] = "ge",
2720     [TCG_COND_LE] = "le",
2721     [TCG_COND_GT] = "gt",
2722     [TCG_COND_LTU] = "ltu",
2723     [TCG_COND_GEU] = "geu",
2724     [TCG_COND_LEU] = "leu",
2725     [TCG_COND_GTU] = "gtu",
2726     [TCG_COND_TSTEQ] = "tsteq",
2727     [TCG_COND_TSTNE] = "tstne",
2728 };
2729 
2730 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2731 {
2732     [MO_UB]   = "ub",
2733     [MO_SB]   = "sb",
2734     [MO_LEUW] = "leuw",
2735     [MO_LESW] = "lesw",
2736     [MO_LEUL] = "leul",
2737     [MO_LESL] = "lesl",
2738     [MO_LEUQ] = "leq",
2739     [MO_BEUW] = "beuw",
2740     [MO_BESW] = "besw",
2741     [MO_BEUL] = "beul",
2742     [MO_BESL] = "besl",
2743     [MO_BEUQ] = "beq",
2744     [MO_128 + MO_BE] = "beo",
2745     [MO_128 + MO_LE] = "leo",
2746 };
2747 
2748 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2749     [MO_UNALN >> MO_ASHIFT]    = "un+",
2750     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2751     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2752     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2753     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2754     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2755     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2756     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2757 };
2758 
2759 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2760     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2761     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2762     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2763     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2764     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2765     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2766 };
2767 
2768 static const char bswap_flag_name[][6] = {
2769     [TCG_BSWAP_IZ] = "iz",
2770     [TCG_BSWAP_OZ] = "oz",
2771     [TCG_BSWAP_OS] = "os",
2772     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2773     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2774 };
2775 
2776 #ifdef CONFIG_PLUGIN
2777 static const char * const plugin_from_name[] = {
2778     "from-tb",
2779     "from-insn",
2780     "after-insn",
2781     "after-tb",
2782 };
2783 #endif
2784 
2785 static inline bool tcg_regset_single(TCGRegSet d)
2786 {
2787     return (d & (d - 1)) == 0;
2788 }
2789 
2790 static inline TCGReg tcg_regset_first(TCGRegSet d)
2791 {
2792     if (TCG_TARGET_NB_REGS <= 32) {
2793         return ctz32(d);
2794     } else {
2795         return ctz64(d);
2796     }
2797 }
2798 
2799 /* Return only the number of characters output -- no error return. */
2800 #define ne_fprintf(...) \
2801     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2802 
2803 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2804 {
2805     char buf[128];
2806     TCGOp *op;
2807 
2808     QTAILQ_FOREACH(op, &s->ops, link) {
2809         int i, k, nb_oargs, nb_iargs, nb_cargs;
2810         const TCGOpDef *def;
2811         TCGOpcode c;
2812         int col = 0;
2813 
2814         c = op->opc;
2815         def = &tcg_op_defs[c];
2816 
2817         if (c == INDEX_op_insn_start) {
2818             nb_oargs = 0;
2819             col += ne_fprintf(f, "\n ----");
2820 
2821             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2822                 col += ne_fprintf(f, " %016" PRIx64,
2823                                   tcg_get_insn_start_param(op, i));
2824             }
2825         } else if (c == INDEX_op_call) {
2826             const TCGHelperInfo *info = tcg_call_info(op);
2827             void *func = tcg_call_func(op);
2828 
2829             /* variable number of arguments */
2830             nb_oargs = TCGOP_CALLO(op);
2831             nb_iargs = TCGOP_CALLI(op);
2832             nb_cargs = def->nb_cargs;
2833 
2834             col += ne_fprintf(f, " %s ", def->name);
2835 
2836             /*
2837              * Print the function name from TCGHelperInfo, if available.
2838              * Note that plugins have a template function for the info,
2839              * but the actual function pointer comes from the plugin.
2840              */
2841             if (func == info->func) {
2842                 col += ne_fprintf(f, "%s", info->name);
2843             } else {
2844                 col += ne_fprintf(f, "plugin(%p)", func);
2845             }
2846 
2847             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2848             for (i = 0; i < nb_oargs; i++) {
2849                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2850                                                             op->args[i]));
2851             }
2852             for (i = 0; i < nb_iargs; i++) {
2853                 TCGArg arg = op->args[nb_oargs + i];
2854                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2855                 col += ne_fprintf(f, ",%s", t);
2856             }
2857         } else {
2858             if (def->flags & TCG_OPF_INT) {
2859                 col += ne_fprintf(f, " %s_i%d ",
2860                                   def->name,
2861                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2862             } else if (def->flags & TCG_OPF_VECTOR) {
2863                 col += ne_fprintf(f, "%s v%d,e%d,",
2864                                   def->name,
2865                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2866                                   8 << TCGOP_VECE(op));
2867             } else {
2868                 col += ne_fprintf(f, " %s ", def->name);
2869             }
2870 
2871             nb_oargs = def->nb_oargs;
2872             nb_iargs = def->nb_iargs;
2873             nb_cargs = def->nb_cargs;
2874 
2875             k = 0;
2876             for (i = 0; i < nb_oargs; i++) {
2877                 const char *sep =  k ? "," : "";
2878                 col += ne_fprintf(f, "%s%s", sep,
2879                                   tcg_get_arg_str(s, buf, sizeof(buf),
2880                                                   op->args[k++]));
2881             }
2882             for (i = 0; i < nb_iargs; i++) {
2883                 const char *sep =  k ? "," : "";
2884                 col += ne_fprintf(f, "%s%s", sep,
2885                                   tcg_get_arg_str(s, buf, sizeof(buf),
2886                                                   op->args[k++]));
2887             }
2888             switch (c) {
2889             case INDEX_op_brcond_i32:
2890             case INDEX_op_setcond_i32:
2891             case INDEX_op_negsetcond_i32:
2892             case INDEX_op_movcond_i32:
2893             case INDEX_op_brcond2_i32:
2894             case INDEX_op_setcond2_i32:
2895             case INDEX_op_brcond_i64:
2896             case INDEX_op_setcond_i64:
2897             case INDEX_op_negsetcond_i64:
2898             case INDEX_op_movcond_i64:
2899             case INDEX_op_cmp_vec:
2900             case INDEX_op_cmpsel_vec:
2901                 if (op->args[k] < ARRAY_SIZE(cond_name)
2902                     && cond_name[op->args[k]]) {
2903                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2904                 } else {
2905                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2906                 }
2907                 i = 1;
2908                 break;
2909             case INDEX_op_qemu_ld_i32:
2910             case INDEX_op_qemu_st_i32:
2911             case INDEX_op_qemu_st8_i32:
2912             case INDEX_op_qemu_ld_i64:
2913             case INDEX_op_qemu_st_i64:
2914             case INDEX_op_qemu_ld_i128:
2915             case INDEX_op_qemu_st_i128:
2916                 {
2917                     const char *s_al, *s_op, *s_at;
2918                     MemOpIdx oi = op->args[k++];
2919                     MemOp mop = get_memop(oi);
2920                     unsigned ix = get_mmuidx(oi);
2921 
2922                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2923                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2924                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2925                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2926 
2927                     /* If all fields are accounted for, print symbolically. */
2928                     if (!mop && s_al && s_op && s_at) {
2929                         col += ne_fprintf(f, ",%s%s%s,%u",
2930                                           s_at, s_al, s_op, ix);
2931                     } else {
2932                         mop = get_memop(oi);
2933                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2934                     }
2935                     i = 1;
2936                 }
2937                 break;
2938             case INDEX_op_bswap16_i32:
2939             case INDEX_op_bswap16_i64:
2940             case INDEX_op_bswap32_i32:
2941             case INDEX_op_bswap32_i64:
2942             case INDEX_op_bswap64_i64:
2943                 {
2944                     TCGArg flags = op->args[k];
2945                     const char *name = NULL;
2946 
2947                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2948                         name = bswap_flag_name[flags];
2949                     }
2950                     if (name) {
2951                         col += ne_fprintf(f, ",%s", name);
2952                     } else {
2953                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2954                     }
2955                     i = k = 1;
2956                 }
2957                 break;
2958 #ifdef CONFIG_PLUGIN
2959             case INDEX_op_plugin_cb:
2960                 {
2961                     TCGArg from = op->args[k++];
2962                     const char *name = NULL;
2963 
2964                     if (from < ARRAY_SIZE(plugin_from_name)) {
2965                         name = plugin_from_name[from];
2966                     }
2967                     if (name) {
2968                         col += ne_fprintf(f, "%s", name);
2969                     } else {
2970                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2971                     }
2972                     i = 1;
2973                 }
2974                 break;
2975 #endif
2976             default:
2977                 i = 0;
2978                 break;
2979             }
2980             switch (c) {
2981             case INDEX_op_set_label:
2982             case INDEX_op_br:
2983             case INDEX_op_brcond_i32:
2984             case INDEX_op_brcond_i64:
2985             case INDEX_op_brcond2_i32:
2986                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2987                                   arg_label(op->args[k])->id);
2988                 i++, k++;
2989                 break;
2990             case INDEX_op_mb:
2991                 {
2992                     TCGBar membar = op->args[k];
2993                     const char *b_op, *m_op;
2994 
2995                     switch (membar & TCG_BAR_SC) {
2996                     case 0:
2997                         b_op = "none";
2998                         break;
2999                     case TCG_BAR_LDAQ:
3000                         b_op = "acq";
3001                         break;
3002                     case TCG_BAR_STRL:
3003                         b_op = "rel";
3004                         break;
3005                     case TCG_BAR_SC:
3006                         b_op = "seq";
3007                         break;
3008                     default:
3009                         g_assert_not_reached();
3010                     }
3011 
3012                     switch (membar & TCG_MO_ALL) {
3013                     case 0:
3014                         m_op = "none";
3015                         break;
3016                     case TCG_MO_LD_LD:
3017                         m_op = "rr";
3018                         break;
3019                     case TCG_MO_LD_ST:
3020                         m_op = "rw";
3021                         break;
3022                     case TCG_MO_ST_LD:
3023                         m_op = "wr";
3024                         break;
3025                     case TCG_MO_ST_ST:
3026                         m_op = "ww";
3027                         break;
3028                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3029                         m_op = "rr+rw";
3030                         break;
3031                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3032                         m_op = "rr+wr";
3033                         break;
3034                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3035                         m_op = "rr+ww";
3036                         break;
3037                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3038                         m_op = "rw+wr";
3039                         break;
3040                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3041                         m_op = "rw+ww";
3042                         break;
3043                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3044                         m_op = "wr+ww";
3045                         break;
3046                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3047                         m_op = "rr+rw+wr";
3048                         break;
3049                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3050                         m_op = "rr+rw+ww";
3051                         break;
3052                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3053                         m_op = "rr+wr+ww";
3054                         break;
3055                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3056                         m_op = "rw+wr+ww";
3057                         break;
3058                     case TCG_MO_ALL:
3059                         m_op = "all";
3060                         break;
3061                     default:
3062                         g_assert_not_reached();
3063                     }
3064 
3065                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3066                     i++, k++;
3067                 }
3068                 break;
3069             default:
3070                 break;
3071             }
3072             for (; i < nb_cargs; i++, k++) {
3073                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3074                                   op->args[k]);
3075             }
3076         }
3077 
3078         if (have_prefs || op->life) {
3079             for (; col < 40; ++col) {
3080                 putc(' ', f);
3081             }
3082         }
3083 
3084         if (op->life) {
3085             unsigned life = op->life;
3086 
3087             if (life & (SYNC_ARG * 3)) {
3088                 ne_fprintf(f, "  sync:");
3089                 for (i = 0; i < 2; ++i) {
3090                     if (life & (SYNC_ARG << i)) {
3091                         ne_fprintf(f, " %d", i);
3092                     }
3093                 }
3094             }
3095             life /= DEAD_ARG;
3096             if (life) {
3097                 ne_fprintf(f, "  dead:");
3098                 for (i = 0; life; ++i, life >>= 1) {
3099                     if (life & 1) {
3100                         ne_fprintf(f, " %d", i);
3101                     }
3102                 }
3103             }
3104         }
3105 
3106         if (have_prefs) {
3107             for (i = 0; i < nb_oargs; ++i) {
3108                 TCGRegSet set = output_pref(op, i);
3109 
3110                 if (i == 0) {
3111                     ne_fprintf(f, "  pref=");
3112                 } else {
3113                     ne_fprintf(f, ",");
3114                 }
3115                 if (set == 0) {
3116                     ne_fprintf(f, "none");
3117                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3118                     ne_fprintf(f, "all");
3119 #ifdef CONFIG_DEBUG_TCG
3120                 } else if (tcg_regset_single(set)) {
3121                     TCGReg reg = tcg_regset_first(set);
3122                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3123 #endif
3124                 } else if (TCG_TARGET_NB_REGS <= 32) {
3125                     ne_fprintf(f, "0x%x", (uint32_t)set);
3126                 } else {
3127                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3128                 }
3129             }
3130         }
3131 
3132         putc('\n', f);
3133     }
3134 }
3135 
3136 /* we give more priority to constraints with less registers */
3137 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3138 {
3139     int n;
3140 
3141     arg_ct += k;
3142     n = ctpop64(arg_ct->regs);
3143 
3144     /*
3145      * Sort constraints of a single register first, which includes output
3146      * aliases (which must exactly match the input already allocated).
3147      */
3148     if (n == 1 || arg_ct->oalias) {
3149         return INT_MAX;
3150     }
3151 
3152     /*
3153      * Sort register pairs next, first then second immediately after.
3154      * Arbitrarily sort multiple pairs by the index of the first reg;
3155      * there shouldn't be many pairs.
3156      */
3157     switch (arg_ct->pair) {
3158     case 1:
3159     case 3:
3160         return (k + 1) * 2;
3161     case 2:
3162         return (arg_ct->pair_index + 1) * 2 - 1;
3163     }
3164 
3165     /* Finally, sort by decreasing register count. */
3166     assert(n > 1);
3167     return -n;
3168 }
3169 
3170 /* sort from highest priority to lowest */
3171 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3172 {
3173     int i, j;
3174 
3175     for (i = 0; i < n; i++) {
3176         a[start + i].sort_index = start + i;
3177     }
3178     if (n <= 1) {
3179         return;
3180     }
3181     for (i = 0; i < n - 1; i++) {
3182         for (j = i + 1; j < n; j++) {
3183             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3184             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3185             if (p1 < p2) {
3186                 int tmp = a[start + i].sort_index;
3187                 a[start + i].sort_index = a[start + j].sort_index;
3188                 a[start + j].sort_index = tmp;
3189             }
3190         }
3191     }
3192 }
3193 
3194 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3195 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3196 
3197 static void process_constraint_sets(void)
3198 {
3199     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3200         const TCGConstraintSet *tdefs = &constraint_sets[c];
3201         TCGArgConstraint *args_ct = all_cts[c];
3202         int nb_oargs = tdefs->nb_oargs;
3203         int nb_iargs = tdefs->nb_iargs;
3204         int nb_args = nb_oargs + nb_iargs;
3205         bool saw_alias_pair = false;
3206 
3207         for (int i = 0; i < nb_args; i++) {
3208             const char *ct_str = tdefs->args_ct_str[i];
3209             bool input_p = i >= nb_oargs;
3210             int o;
3211 
3212             switch (*ct_str) {
3213             case '0' ... '9':
3214                 o = *ct_str - '0';
3215                 tcg_debug_assert(input_p);
3216                 tcg_debug_assert(o < nb_oargs);
3217                 tcg_debug_assert(args_ct[o].regs != 0);
3218                 tcg_debug_assert(!args_ct[o].oalias);
3219                 args_ct[i] = args_ct[o];
3220                 /* The output sets oalias.  */
3221                 args_ct[o].oalias = 1;
3222                 args_ct[o].alias_index = i;
3223                 /* The input sets ialias. */
3224                 args_ct[i].ialias = 1;
3225                 args_ct[i].alias_index = o;
3226                 if (args_ct[i].pair) {
3227                     saw_alias_pair = true;
3228                 }
3229                 tcg_debug_assert(ct_str[1] == '\0');
3230                 continue;
3231 
3232             case '&':
3233                 tcg_debug_assert(!input_p);
3234                 args_ct[i].newreg = true;
3235                 ct_str++;
3236                 break;
3237 
3238             case 'p': /* plus */
3239                 /* Allocate to the register after the previous. */
3240                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3241                 o = i - 1;
3242                 tcg_debug_assert(!args_ct[o].pair);
3243                 tcg_debug_assert(!args_ct[o].ct);
3244                 args_ct[i] = (TCGArgConstraint){
3245                     .pair = 2,
3246                     .pair_index = o,
3247                     .regs = args_ct[o].regs << 1,
3248                     .newreg = args_ct[o].newreg,
3249                 };
3250                 args_ct[o].pair = 1;
3251                 args_ct[o].pair_index = i;
3252                 tcg_debug_assert(ct_str[1] == '\0');
3253                 continue;
3254 
3255             case 'm': /* minus */
3256                 /* Allocate to the register before the previous. */
3257                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3258                 o = i - 1;
3259                 tcg_debug_assert(!args_ct[o].pair);
3260                 tcg_debug_assert(!args_ct[o].ct);
3261                 args_ct[i] = (TCGArgConstraint){
3262                     .pair = 1,
3263                     .pair_index = o,
3264                     .regs = args_ct[o].regs >> 1,
3265                     .newreg = args_ct[o].newreg,
3266                 };
3267                 args_ct[o].pair = 2;
3268                 args_ct[o].pair_index = i;
3269                 tcg_debug_assert(ct_str[1] == '\0');
3270                 continue;
3271             }
3272 
3273             do {
3274                 switch (*ct_str) {
3275                 case 'i':
3276                     args_ct[i].ct |= TCG_CT_CONST;
3277                     break;
3278 #ifdef TCG_REG_ZERO
3279                 case 'z':
3280                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3281                     break;
3282 #endif
3283 
3284                 /* Include all of the target-specific constraints. */
3285 
3286 #undef CONST
3287 #define CONST(CASE, MASK) \
3288     case CASE: args_ct[i].ct |= MASK; break;
3289 #define REGS(CASE, MASK) \
3290     case CASE: args_ct[i].regs |= MASK; break;
3291 
3292 #include "tcg-target-con-str.h"
3293 
3294 #undef REGS
3295 #undef CONST
3296                 default:
3297                 case '0' ... '9':
3298                 case '&':
3299                 case 'p':
3300                 case 'm':
3301                     /* Typo in TCGConstraintSet constraint. */
3302                     g_assert_not_reached();
3303                 }
3304             } while (*++ct_str != '\0');
3305         }
3306 
3307         /*
3308          * Fix up output pairs that are aliased with inputs.
3309          * When we created the alias, we copied pair from the output.
3310          * There are three cases:
3311          *    (1a) Pairs of inputs alias pairs of outputs.
3312          *    (1b) One input aliases the first of a pair of outputs.
3313          *    (2)  One input aliases the second of a pair of outputs.
3314          *
3315          * Case 1a is handled by making sure that the pair_index'es are
3316          * properly updated so that they appear the same as a pair of inputs.
3317          *
3318          * Case 1b is handled by setting the pair_index of the input to
3319          * itself, simply so it doesn't point to an unrelated argument.
3320          * Since we don't encounter the "second" during the input allocation
3321          * phase, nothing happens with the second half of the input pair.
3322          *
3323          * Case 2 is handled by setting the second input to pair=3, the
3324          * first output to pair=3, and the pair_index'es to match.
3325          */
3326         if (saw_alias_pair) {
3327             for (int i = nb_oargs; i < nb_args; i++) {
3328                 int o, o2, i2;
3329 
3330                 /*
3331                  * Since [0-9pm] must be alone in the constraint string,
3332                  * the only way they can both be set is if the pair comes
3333                  * from the output alias.
3334                  */
3335                 if (!args_ct[i].ialias) {
3336                     continue;
3337                 }
3338                 switch (args_ct[i].pair) {
3339                 case 0:
3340                     break;
3341                 case 1:
3342                     o = args_ct[i].alias_index;
3343                     o2 = args_ct[o].pair_index;
3344                     tcg_debug_assert(args_ct[o].pair == 1);
3345                     tcg_debug_assert(args_ct[o2].pair == 2);
3346                     if (args_ct[o2].oalias) {
3347                         /* Case 1a */
3348                         i2 = args_ct[o2].alias_index;
3349                         tcg_debug_assert(args_ct[i2].pair == 2);
3350                         args_ct[i2].pair_index = i;
3351                         args_ct[i].pair_index = i2;
3352                     } else {
3353                         /* Case 1b */
3354                         args_ct[i].pair_index = i;
3355                     }
3356                     break;
3357                 case 2:
3358                     o = args_ct[i].alias_index;
3359                     o2 = args_ct[o].pair_index;
3360                     tcg_debug_assert(args_ct[o].pair == 2);
3361                     tcg_debug_assert(args_ct[o2].pair == 1);
3362                     if (args_ct[o2].oalias) {
3363                         /* Case 1a */
3364                         i2 = args_ct[o2].alias_index;
3365                         tcg_debug_assert(args_ct[i2].pair == 1);
3366                         args_ct[i2].pair_index = i;
3367                         args_ct[i].pair_index = i2;
3368                     } else {
3369                         /* Case 2 */
3370                         args_ct[i].pair = 3;
3371                         args_ct[o2].pair = 3;
3372                         args_ct[i].pair_index = o2;
3373                         args_ct[o2].pair_index = i;
3374                     }
3375                     break;
3376                 default:
3377                     g_assert_not_reached();
3378                 }
3379             }
3380         }
3381 
3382         /* sort the constraints (XXX: this is just an heuristic) */
3383         sort_constraints(args_ct, 0, nb_oargs);
3384         sort_constraints(args_ct, nb_oargs, nb_iargs);
3385     }
3386 }
3387 
3388 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3389 {
3390     TCGOpcode opc = op->opc;
3391     TCGType type = TCGOP_TYPE(op);
3392     unsigned flags = TCGOP_FLAGS(op);
3393     const TCGOpDef *def = &tcg_op_defs[opc];
3394     const TCGOutOp *outop = all_outop[opc];
3395     TCGConstraintSetIndex con_set;
3396 
3397     if (def->flags & TCG_OPF_NOT_PRESENT) {
3398         return empty_cts;
3399     }
3400 
3401     if (outop) {
3402         con_set = outop->static_constraint;
3403         if (con_set == C_Dynamic) {
3404             con_set = outop->dynamic_constraint(type, flags);
3405         }
3406     } else {
3407         con_set = tcg_target_op_def(opc, type, flags);
3408     }
3409     tcg_debug_assert(con_set >= 0);
3410     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3411 
3412     /* The constraint arguments must match TCGOpcode arguments. */
3413     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3414     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3415 
3416     return all_cts[con_set];
3417 }
3418 
3419 static void remove_label_use(TCGOp *op, int idx)
3420 {
3421     TCGLabel *label = arg_label(op->args[idx]);
3422     TCGLabelUse *use;
3423 
3424     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3425         if (use->op == op) {
3426             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3427             return;
3428         }
3429     }
3430     g_assert_not_reached();
3431 }
3432 
3433 void tcg_op_remove(TCGContext *s, TCGOp *op)
3434 {
3435     switch (op->opc) {
3436     case INDEX_op_br:
3437         remove_label_use(op, 0);
3438         break;
3439     case INDEX_op_brcond_i32:
3440     case INDEX_op_brcond_i64:
3441         remove_label_use(op, 3);
3442         break;
3443     case INDEX_op_brcond2_i32:
3444         remove_label_use(op, 5);
3445         break;
3446     default:
3447         break;
3448     }
3449 
3450     QTAILQ_REMOVE(&s->ops, op, link);
3451     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3452     s->nb_ops--;
3453 }
3454 
3455 void tcg_remove_ops_after(TCGOp *op)
3456 {
3457     TCGContext *s = tcg_ctx;
3458 
3459     while (true) {
3460         TCGOp *last = tcg_last_op();
3461         if (last == op) {
3462             return;
3463         }
3464         tcg_op_remove(s, last);
3465     }
3466 }
3467 
3468 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3469 {
3470     TCGContext *s = tcg_ctx;
3471     TCGOp *op = NULL;
3472 
3473     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3474         QTAILQ_FOREACH(op, &s->free_ops, link) {
3475             if (nargs <= op->nargs) {
3476                 QTAILQ_REMOVE(&s->free_ops, op, link);
3477                 nargs = op->nargs;
3478                 goto found;
3479             }
3480         }
3481     }
3482 
3483     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3484     nargs = MAX(4, nargs);
3485     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3486 
3487  found:
3488     memset(op, 0, offsetof(TCGOp, link));
3489     op->opc = opc;
3490     op->nargs = nargs;
3491 
3492     /* Check for bitfield overflow. */
3493     tcg_debug_assert(op->nargs == nargs);
3494 
3495     s->nb_ops++;
3496     return op;
3497 }
3498 
3499 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3500 {
3501     TCGOp *op = tcg_op_alloc(opc, nargs);
3502 
3503     if (tcg_ctx->emit_before_op) {
3504         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3505     } else {
3506         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3507     }
3508     return op;
3509 }
3510 
3511 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3512                             TCGOpcode opc, TCGType type, unsigned nargs)
3513 {
3514     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3515 
3516     TCGOP_TYPE(new_op) = type;
3517     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3518     return new_op;
3519 }
3520 
3521 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3522                            TCGOpcode opc, TCGType type, unsigned nargs)
3523 {
3524     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3525 
3526     TCGOP_TYPE(new_op) = type;
3527     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3528     return new_op;
3529 }
3530 
3531 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3532 {
3533     TCGLabelUse *u;
3534 
3535     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3536         TCGOp *op = u->op;
3537         switch (op->opc) {
3538         case INDEX_op_br:
3539             op->args[0] = label_arg(to);
3540             break;
3541         case INDEX_op_brcond_i32:
3542         case INDEX_op_brcond_i64:
3543             op->args[3] = label_arg(to);
3544             break;
3545         case INDEX_op_brcond2_i32:
3546             op->args[5] = label_arg(to);
3547             break;
3548         default:
3549             g_assert_not_reached();
3550         }
3551     }
3552 
3553     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3554 }
3555 
3556 /* Reachable analysis : remove unreachable code.  */
3557 static void __attribute__((noinline))
3558 reachable_code_pass(TCGContext *s)
3559 {
3560     TCGOp *op, *op_next, *op_prev;
3561     bool dead = false;
3562 
3563     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3564         bool remove = dead;
3565         TCGLabel *label;
3566 
3567         switch (op->opc) {
3568         case INDEX_op_set_label:
3569             label = arg_label(op->args[0]);
3570 
3571             /*
3572              * Note that the first op in the TB is always a load,
3573              * so there is always something before a label.
3574              */
3575             op_prev = QTAILQ_PREV(op, link);
3576 
3577             /*
3578              * If we find two sequential labels, move all branches to
3579              * reference the second label and remove the first label.
3580              * Do this before branch to next optimization, so that the
3581              * middle label is out of the way.
3582              */
3583             if (op_prev->opc == INDEX_op_set_label) {
3584                 move_label_uses(label, arg_label(op_prev->args[0]));
3585                 tcg_op_remove(s, op_prev);
3586                 op_prev = QTAILQ_PREV(op, link);
3587             }
3588 
3589             /*
3590              * Optimization can fold conditional branches to unconditional.
3591              * If we find a label which is preceded by an unconditional
3592              * branch to next, remove the branch.  We couldn't do this when
3593              * processing the branch because any dead code between the branch
3594              * and label had not yet been removed.
3595              */
3596             if (op_prev->opc == INDEX_op_br &&
3597                 label == arg_label(op_prev->args[0])) {
3598                 tcg_op_remove(s, op_prev);
3599                 /* Fall through means insns become live again.  */
3600                 dead = false;
3601             }
3602 
3603             if (QSIMPLEQ_EMPTY(&label->branches)) {
3604                 /*
3605                  * While there is an occasional backward branch, virtually
3606                  * all branches generated by the translators are forward.
3607                  * Which means that generally we will have already removed
3608                  * all references to the label that will be, and there is
3609                  * little to be gained by iterating.
3610                  */
3611                 remove = true;
3612             } else {
3613                 /* Once we see a label, insns become live again.  */
3614                 dead = false;
3615                 remove = false;
3616             }
3617             break;
3618 
3619         case INDEX_op_br:
3620         case INDEX_op_exit_tb:
3621         case INDEX_op_goto_ptr:
3622             /* Unconditional branches; everything following is dead.  */
3623             dead = true;
3624             break;
3625 
3626         case INDEX_op_call:
3627             /* Notice noreturn helper calls, raising exceptions.  */
3628             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3629                 dead = true;
3630             }
3631             break;
3632 
3633         case INDEX_op_insn_start:
3634             /* Never remove -- we need to keep these for unwind.  */
3635             remove = false;
3636             break;
3637 
3638         default:
3639             break;
3640         }
3641 
3642         if (remove) {
3643             tcg_op_remove(s, op);
3644         }
3645     }
3646 }
3647 
3648 #define TS_DEAD  1
3649 #define TS_MEM   2
3650 
3651 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3652 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3653 
3654 /* For liveness_pass_1, the register preferences for a given temp.  */
3655 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3656 {
3657     return ts->state_ptr;
3658 }
3659 
3660 /* For liveness_pass_1, reset the preferences for a given temp to the
3661  * maximal regset for its type.
3662  */
3663 static inline void la_reset_pref(TCGTemp *ts)
3664 {
3665     *la_temp_pref(ts)
3666         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3667 }
3668 
3669 /* liveness analysis: end of function: all temps are dead, and globals
3670    should be in memory. */
3671 static void la_func_end(TCGContext *s, int ng, int nt)
3672 {
3673     int i;
3674 
3675     for (i = 0; i < ng; ++i) {
3676         s->temps[i].state = TS_DEAD | TS_MEM;
3677         la_reset_pref(&s->temps[i]);
3678     }
3679     for (i = ng; i < nt; ++i) {
3680         s->temps[i].state = TS_DEAD;
3681         la_reset_pref(&s->temps[i]);
3682     }
3683 }
3684 
3685 /* liveness analysis: end of basic block: all temps are dead, globals
3686    and local temps should be in memory. */
3687 static void la_bb_end(TCGContext *s, int ng, int nt)
3688 {
3689     int i;
3690 
3691     for (i = 0; i < nt; ++i) {
3692         TCGTemp *ts = &s->temps[i];
3693         int state;
3694 
3695         switch (ts->kind) {
3696         case TEMP_FIXED:
3697         case TEMP_GLOBAL:
3698         case TEMP_TB:
3699             state = TS_DEAD | TS_MEM;
3700             break;
3701         case TEMP_EBB:
3702         case TEMP_CONST:
3703             state = TS_DEAD;
3704             break;
3705         default:
3706             g_assert_not_reached();
3707         }
3708         ts->state = state;
3709         la_reset_pref(ts);
3710     }
3711 }
3712 
3713 /* liveness analysis: sync globals back to memory.  */
3714 static void la_global_sync(TCGContext *s, int ng)
3715 {
3716     int i;
3717 
3718     for (i = 0; i < ng; ++i) {
3719         int state = s->temps[i].state;
3720         s->temps[i].state = state | TS_MEM;
3721         if (state == TS_DEAD) {
3722             /* If the global was previously dead, reset prefs.  */
3723             la_reset_pref(&s->temps[i]);
3724         }
3725     }
3726 }
3727 
3728 /*
3729  * liveness analysis: conditional branch: all temps are dead unless
3730  * explicitly live-across-conditional-branch, globals and local temps
3731  * should be synced.
3732  */
3733 static void la_bb_sync(TCGContext *s, int ng, int nt)
3734 {
3735     la_global_sync(s, ng);
3736 
3737     for (int i = ng; i < nt; ++i) {
3738         TCGTemp *ts = &s->temps[i];
3739         int state;
3740 
3741         switch (ts->kind) {
3742         case TEMP_TB:
3743             state = ts->state;
3744             ts->state = state | TS_MEM;
3745             if (state != TS_DEAD) {
3746                 continue;
3747             }
3748             break;
3749         case TEMP_EBB:
3750         case TEMP_CONST:
3751             continue;
3752         default:
3753             g_assert_not_reached();
3754         }
3755         la_reset_pref(&s->temps[i]);
3756     }
3757 }
3758 
3759 /* liveness analysis: sync globals back to memory and kill.  */
3760 static void la_global_kill(TCGContext *s, int ng)
3761 {
3762     int i;
3763 
3764     for (i = 0; i < ng; i++) {
3765         s->temps[i].state = TS_DEAD | TS_MEM;
3766         la_reset_pref(&s->temps[i]);
3767     }
3768 }
3769 
3770 /* liveness analysis: note live globals crossing calls.  */
3771 static void la_cross_call(TCGContext *s, int nt)
3772 {
3773     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3774     int i;
3775 
3776     for (i = 0; i < nt; i++) {
3777         TCGTemp *ts = &s->temps[i];
3778         if (!(ts->state & TS_DEAD)) {
3779             TCGRegSet *pset = la_temp_pref(ts);
3780             TCGRegSet set = *pset;
3781 
3782             set &= mask;
3783             /* If the combination is not possible, restart.  */
3784             if (set == 0) {
3785                 set = tcg_target_available_regs[ts->type] & mask;
3786             }
3787             *pset = set;
3788         }
3789     }
3790 }
3791 
3792 /*
3793  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3794  * to TEMP_EBB, if possible.
3795  */
3796 static void __attribute__((noinline))
3797 liveness_pass_0(TCGContext *s)
3798 {
3799     void * const multiple_ebb = (void *)(uintptr_t)-1;
3800     int nb_temps = s->nb_temps;
3801     TCGOp *op, *ebb;
3802 
3803     for (int i = s->nb_globals; i < nb_temps; ++i) {
3804         s->temps[i].state_ptr = NULL;
3805     }
3806 
3807     /*
3808      * Represent each EBB by the op at which it begins.  In the case of
3809      * the first EBB, this is the first op, otherwise it is a label.
3810      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3811      * within a single EBB, else MULTIPLE_EBB.
3812      */
3813     ebb = QTAILQ_FIRST(&s->ops);
3814     QTAILQ_FOREACH(op, &s->ops, link) {
3815         const TCGOpDef *def;
3816         int nb_oargs, nb_iargs;
3817 
3818         switch (op->opc) {
3819         case INDEX_op_set_label:
3820             ebb = op;
3821             continue;
3822         case INDEX_op_discard:
3823             continue;
3824         case INDEX_op_call:
3825             nb_oargs = TCGOP_CALLO(op);
3826             nb_iargs = TCGOP_CALLI(op);
3827             break;
3828         default:
3829             def = &tcg_op_defs[op->opc];
3830             nb_oargs = def->nb_oargs;
3831             nb_iargs = def->nb_iargs;
3832             break;
3833         }
3834 
3835         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3836             TCGTemp *ts = arg_temp(op->args[i]);
3837 
3838             if (ts->kind != TEMP_TB) {
3839                 continue;
3840             }
3841             if (ts->state_ptr == NULL) {
3842                 ts->state_ptr = ebb;
3843             } else if (ts->state_ptr != ebb) {
3844                 ts->state_ptr = multiple_ebb;
3845             }
3846         }
3847     }
3848 
3849     /*
3850      * For TEMP_TB that turned out not to be used beyond one EBB,
3851      * reduce the liveness to TEMP_EBB.
3852      */
3853     for (int i = s->nb_globals; i < nb_temps; ++i) {
3854         TCGTemp *ts = &s->temps[i];
3855         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3856             ts->kind = TEMP_EBB;
3857         }
3858     }
3859 }
3860 
3861 /* Liveness analysis : update the opc_arg_life array to tell if a
3862    given input arguments is dead. Instructions updating dead
3863    temporaries are removed. */
3864 static void __attribute__((noinline))
3865 liveness_pass_1(TCGContext *s)
3866 {
3867     int nb_globals = s->nb_globals;
3868     int nb_temps = s->nb_temps;
3869     TCGOp *op, *op_prev;
3870     TCGRegSet *prefs;
3871     int i;
3872 
3873     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3874     for (i = 0; i < nb_temps; ++i) {
3875         s->temps[i].state_ptr = prefs + i;
3876     }
3877 
3878     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3879     la_func_end(s, nb_globals, nb_temps);
3880 
3881     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3882         int nb_iargs, nb_oargs;
3883         TCGOpcode opc_new, opc_new2;
3884         bool have_opc_new2;
3885         TCGLifeData arg_life = 0;
3886         TCGTemp *ts;
3887         TCGOpcode opc = op->opc;
3888         const TCGOpDef *def = &tcg_op_defs[opc];
3889         const TCGArgConstraint *args_ct;
3890 
3891         switch (opc) {
3892         case INDEX_op_call:
3893             {
3894                 const TCGHelperInfo *info = tcg_call_info(op);
3895                 int call_flags = tcg_call_flags(op);
3896 
3897                 nb_oargs = TCGOP_CALLO(op);
3898                 nb_iargs = TCGOP_CALLI(op);
3899 
3900                 /* pure functions can be removed if their result is unused */
3901                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3902                     for (i = 0; i < nb_oargs; i++) {
3903                         ts = arg_temp(op->args[i]);
3904                         if (ts->state != TS_DEAD) {
3905                             goto do_not_remove_call;
3906                         }
3907                     }
3908                     goto do_remove;
3909                 }
3910             do_not_remove_call:
3911 
3912                 /* Output args are dead.  */
3913                 for (i = 0; i < nb_oargs; i++) {
3914                     ts = arg_temp(op->args[i]);
3915                     if (ts->state & TS_DEAD) {
3916                         arg_life |= DEAD_ARG << i;
3917                     }
3918                     if (ts->state & TS_MEM) {
3919                         arg_life |= SYNC_ARG << i;
3920                     }
3921                     ts->state = TS_DEAD;
3922                     la_reset_pref(ts);
3923                 }
3924 
3925                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3926                 memset(op->output_pref, 0, sizeof(op->output_pref));
3927 
3928                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3929                                     TCG_CALL_NO_READ_GLOBALS))) {
3930                     la_global_kill(s, nb_globals);
3931                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3932                     la_global_sync(s, nb_globals);
3933                 }
3934 
3935                 /* Record arguments that die in this helper.  */
3936                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3937                     ts = arg_temp(op->args[i]);
3938                     if (ts->state & TS_DEAD) {
3939                         arg_life |= DEAD_ARG << i;
3940                     }
3941                 }
3942 
3943                 /* For all live registers, remove call-clobbered prefs.  */
3944                 la_cross_call(s, nb_temps);
3945 
3946                 /*
3947                  * Input arguments are live for preceding opcodes.
3948                  *
3949                  * For those arguments that die, and will be allocated in
3950                  * registers, clear the register set for that arg, to be
3951                  * filled in below.  For args that will be on the stack,
3952                  * reset to any available reg.  Process arguments in reverse
3953                  * order so that if a temp is used more than once, the stack
3954                  * reset to max happens before the register reset to 0.
3955                  */
3956                 for (i = nb_iargs - 1; i >= 0; i--) {
3957                     const TCGCallArgumentLoc *loc = &info->in[i];
3958                     ts = arg_temp(op->args[nb_oargs + i]);
3959 
3960                     if (ts->state & TS_DEAD) {
3961                         switch (loc->kind) {
3962                         case TCG_CALL_ARG_NORMAL:
3963                         case TCG_CALL_ARG_EXTEND_U:
3964                         case TCG_CALL_ARG_EXTEND_S:
3965                             if (arg_slot_reg_p(loc->arg_slot)) {
3966                                 *la_temp_pref(ts) = 0;
3967                                 break;
3968                             }
3969                             /* fall through */
3970                         default:
3971                             *la_temp_pref(ts) =
3972                                 tcg_target_available_regs[ts->type];
3973                             break;
3974                         }
3975                         ts->state &= ~TS_DEAD;
3976                     }
3977                 }
3978 
3979                 /*
3980                  * For each input argument, add its input register to prefs.
3981                  * If a temp is used once, this produces a single set bit;
3982                  * if a temp is used multiple times, this produces a set.
3983                  */
3984                 for (i = 0; i < nb_iargs; i++) {
3985                     const TCGCallArgumentLoc *loc = &info->in[i];
3986                     ts = arg_temp(op->args[nb_oargs + i]);
3987 
3988                     switch (loc->kind) {
3989                     case TCG_CALL_ARG_NORMAL:
3990                     case TCG_CALL_ARG_EXTEND_U:
3991                     case TCG_CALL_ARG_EXTEND_S:
3992                         if (arg_slot_reg_p(loc->arg_slot)) {
3993                             tcg_regset_set_reg(*la_temp_pref(ts),
3994                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3995                         }
3996                         break;
3997                     default:
3998                         break;
3999                     }
4000                 }
4001             }
4002             break;
4003         case INDEX_op_insn_start:
4004             break;
4005         case INDEX_op_discard:
4006             /* mark the temporary as dead */
4007             ts = arg_temp(op->args[0]);
4008             ts->state = TS_DEAD;
4009             la_reset_pref(ts);
4010             break;
4011 
4012         case INDEX_op_add2_i32:
4013         case INDEX_op_add2_i64:
4014             opc_new = INDEX_op_add;
4015             goto do_addsub2;
4016         case INDEX_op_sub2_i32:
4017             opc_new = INDEX_op_sub_i32;
4018             goto do_addsub2;
4019         case INDEX_op_sub2_i64:
4020             opc_new = INDEX_op_sub_i64;
4021         do_addsub2:
4022             nb_iargs = 4;
4023             nb_oargs = 2;
4024             /* Test if the high part of the operation is dead, but not
4025                the low part.  The result can be optimized to a simple
4026                add or sub.  This happens often for x86_64 guest when the
4027                cpu mode is set to 32 bit.  */
4028             if (arg_temp(op->args[1])->state == TS_DEAD) {
4029                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4030                     goto do_remove;
4031                 }
4032                 /* Replace the opcode and adjust the args in place,
4033                    leaving 3 unused args at the end.  */
4034                 op->opc = opc = opc_new;
4035                 op->args[1] = op->args[2];
4036                 op->args[2] = op->args[4];
4037                 /* Fall through and mark the single-word operation live.  */
4038                 nb_iargs = 2;
4039                 nb_oargs = 1;
4040             }
4041             goto do_not_remove;
4042 
4043         case INDEX_op_mulu2_i32:
4044             opc_new = INDEX_op_mul_i32;
4045             opc_new2 = INDEX_op_muluh_i32;
4046             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
4047             goto do_mul2;
4048         case INDEX_op_muls2_i32:
4049             opc_new = INDEX_op_mul_i32;
4050             opc_new2 = INDEX_op_mulsh_i32;
4051             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
4052             goto do_mul2;
4053         case INDEX_op_mulu2_i64:
4054             opc_new = INDEX_op_mul_i64;
4055             opc_new2 = INDEX_op_muluh_i64;
4056             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
4057             goto do_mul2;
4058         case INDEX_op_muls2_i64:
4059             opc_new = INDEX_op_mul_i64;
4060             opc_new2 = INDEX_op_mulsh_i64;
4061             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
4062             goto do_mul2;
4063         do_mul2:
4064             nb_iargs = 2;
4065             nb_oargs = 2;
4066             if (arg_temp(op->args[1])->state == TS_DEAD) {
4067                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4068                     /* Both parts of the operation are dead.  */
4069                     goto do_remove;
4070                 }
4071                 /* The high part of the operation is dead; generate the low. */
4072                 op->opc = opc = opc_new;
4073                 op->args[1] = op->args[2];
4074                 op->args[2] = op->args[3];
4075             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
4076                 /* The low part of the operation is dead; generate the high. */
4077                 op->opc = opc = opc_new2;
4078                 op->args[0] = op->args[1];
4079                 op->args[1] = op->args[2];
4080                 op->args[2] = op->args[3];
4081             } else {
4082                 goto do_not_remove;
4083             }
4084             /* Mark the single-word operation live.  */
4085             nb_oargs = 1;
4086             goto do_not_remove;
4087 
4088         default:
4089             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4090             nb_iargs = def->nb_iargs;
4091             nb_oargs = def->nb_oargs;
4092 
4093             /* Test if the operation can be removed because all
4094                its outputs are dead. We assume that nb_oargs == 0
4095                implies side effects */
4096             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4097                 for (i = 0; i < nb_oargs; i++) {
4098                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4099                         goto do_not_remove;
4100                     }
4101                 }
4102                 goto do_remove;
4103             }
4104             goto do_not_remove;
4105 
4106         do_remove:
4107             tcg_op_remove(s, op);
4108             break;
4109 
4110         do_not_remove:
4111             for (i = 0; i < nb_oargs; i++) {
4112                 ts = arg_temp(op->args[i]);
4113 
4114                 /* Remember the preference of the uses that followed.  */
4115                 if (i < ARRAY_SIZE(op->output_pref)) {
4116                     op->output_pref[i] = *la_temp_pref(ts);
4117                 }
4118 
4119                 /* Output args are dead.  */
4120                 if (ts->state & TS_DEAD) {
4121                     arg_life |= DEAD_ARG << i;
4122                 }
4123                 if (ts->state & TS_MEM) {
4124                     arg_life |= SYNC_ARG << i;
4125                 }
4126                 ts->state = TS_DEAD;
4127                 la_reset_pref(ts);
4128             }
4129 
4130             /* If end of basic block, update.  */
4131             if (def->flags & TCG_OPF_BB_EXIT) {
4132                 la_func_end(s, nb_globals, nb_temps);
4133             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4134                 la_bb_sync(s, nb_globals, nb_temps);
4135             } else if (def->flags & TCG_OPF_BB_END) {
4136                 la_bb_end(s, nb_globals, nb_temps);
4137             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4138                 la_global_sync(s, nb_globals);
4139                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4140                     la_cross_call(s, nb_temps);
4141                 }
4142             }
4143 
4144             /* Record arguments that die in this opcode.  */
4145             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4146                 ts = arg_temp(op->args[i]);
4147                 if (ts->state & TS_DEAD) {
4148                     arg_life |= DEAD_ARG << i;
4149                 }
4150             }
4151 
4152             /* Input arguments are live for preceding opcodes.  */
4153             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4154                 ts = arg_temp(op->args[i]);
4155                 if (ts->state & TS_DEAD) {
4156                     /* For operands that were dead, initially allow
4157                        all regs for the type.  */
4158                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4159                     ts->state &= ~TS_DEAD;
4160                 }
4161             }
4162 
4163             /* Incorporate constraints for this operand.  */
4164             switch (opc) {
4165             case INDEX_op_mov:
4166                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4167                    have proper constraints.  That said, special case
4168                    moves to propagate preferences backward.  */
4169                 if (IS_DEAD_ARG(1)) {
4170                     *la_temp_pref(arg_temp(op->args[0]))
4171                         = *la_temp_pref(arg_temp(op->args[1]));
4172                 }
4173                 break;
4174 
4175             default:
4176                 args_ct = opcode_args_ct(op);
4177                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4178                     const TCGArgConstraint *ct = &args_ct[i];
4179                     TCGRegSet set, *pset;
4180 
4181                     ts = arg_temp(op->args[i]);
4182                     pset = la_temp_pref(ts);
4183                     set = *pset;
4184 
4185                     set &= ct->regs;
4186                     if (ct->ialias) {
4187                         set &= output_pref(op, ct->alias_index);
4188                     }
4189                     /* If the combination is not possible, restart.  */
4190                     if (set == 0) {
4191                         set = ct->regs;
4192                     }
4193                     *pset = set;
4194                 }
4195                 break;
4196             }
4197             break;
4198         }
4199         op->life = arg_life;
4200     }
4201 }
4202 
4203 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4204 static bool __attribute__((noinline))
4205 liveness_pass_2(TCGContext *s)
4206 {
4207     int nb_globals = s->nb_globals;
4208     int nb_temps, i;
4209     bool changes = false;
4210     TCGOp *op, *op_next;
4211 
4212     /* Create a temporary for each indirect global.  */
4213     for (i = 0; i < nb_globals; ++i) {
4214         TCGTemp *its = &s->temps[i];
4215         if (its->indirect_reg) {
4216             TCGTemp *dts = tcg_temp_alloc(s);
4217             dts->type = its->type;
4218             dts->base_type = its->base_type;
4219             dts->temp_subindex = its->temp_subindex;
4220             dts->kind = TEMP_EBB;
4221             its->state_ptr = dts;
4222         } else {
4223             its->state_ptr = NULL;
4224         }
4225         /* All globals begin dead.  */
4226         its->state = TS_DEAD;
4227     }
4228     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4229         TCGTemp *its = &s->temps[i];
4230         its->state_ptr = NULL;
4231         its->state = TS_DEAD;
4232     }
4233 
4234     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4235         TCGOpcode opc = op->opc;
4236         const TCGOpDef *def = &tcg_op_defs[opc];
4237         TCGLifeData arg_life = op->life;
4238         int nb_iargs, nb_oargs, call_flags;
4239         TCGTemp *arg_ts, *dir_ts;
4240 
4241         if (opc == INDEX_op_call) {
4242             nb_oargs = TCGOP_CALLO(op);
4243             nb_iargs = TCGOP_CALLI(op);
4244             call_flags = tcg_call_flags(op);
4245         } else {
4246             nb_iargs = def->nb_iargs;
4247             nb_oargs = def->nb_oargs;
4248 
4249             /* Set flags similar to how calls require.  */
4250             if (def->flags & TCG_OPF_COND_BRANCH) {
4251                 /* Like reading globals: sync_globals */
4252                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4253             } else if (def->flags & TCG_OPF_BB_END) {
4254                 /* Like writing globals: save_globals */
4255                 call_flags = 0;
4256             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4257                 /* Like reading globals: sync_globals */
4258                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4259             } else {
4260                 /* No effect on globals.  */
4261                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4262                               TCG_CALL_NO_WRITE_GLOBALS);
4263             }
4264         }
4265 
4266         /* Make sure that input arguments are available.  */
4267         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4268             arg_ts = arg_temp(op->args[i]);
4269             dir_ts = arg_ts->state_ptr;
4270             if (dir_ts && arg_ts->state == TS_DEAD) {
4271                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4272                                   ? INDEX_op_ld_i32
4273                                   : INDEX_op_ld_i64);
4274                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4275                                                   arg_ts->type, 3);
4276 
4277                 lop->args[0] = temp_arg(dir_ts);
4278                 lop->args[1] = temp_arg(arg_ts->mem_base);
4279                 lop->args[2] = arg_ts->mem_offset;
4280 
4281                 /* Loaded, but synced with memory.  */
4282                 arg_ts->state = TS_MEM;
4283             }
4284         }
4285 
4286         /* Perform input replacement, and mark inputs that became dead.
4287            No action is required except keeping temp_state up to date
4288            so that we reload when needed.  */
4289         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4290             arg_ts = arg_temp(op->args[i]);
4291             dir_ts = arg_ts->state_ptr;
4292             if (dir_ts) {
4293                 op->args[i] = temp_arg(dir_ts);
4294                 changes = true;
4295                 if (IS_DEAD_ARG(i)) {
4296                     arg_ts->state = TS_DEAD;
4297                 }
4298             }
4299         }
4300 
4301         /* Liveness analysis should ensure that the following are
4302            all correct, for call sites and basic block end points.  */
4303         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4304             /* Nothing to do */
4305         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4306             for (i = 0; i < nb_globals; ++i) {
4307                 /* Liveness should see that globals are synced back,
4308                    that is, either TS_DEAD or TS_MEM.  */
4309                 arg_ts = &s->temps[i];
4310                 tcg_debug_assert(arg_ts->state_ptr == 0
4311                                  || arg_ts->state != 0);
4312             }
4313         } else {
4314             for (i = 0; i < nb_globals; ++i) {
4315                 /* Liveness should see that globals are saved back,
4316                    that is, TS_DEAD, waiting to be reloaded.  */
4317                 arg_ts = &s->temps[i];
4318                 tcg_debug_assert(arg_ts->state_ptr == 0
4319                                  || arg_ts->state == TS_DEAD);
4320             }
4321         }
4322 
4323         /* Outputs become available.  */
4324         if (opc == INDEX_op_mov) {
4325             arg_ts = arg_temp(op->args[0]);
4326             dir_ts = arg_ts->state_ptr;
4327             if (dir_ts) {
4328                 op->args[0] = temp_arg(dir_ts);
4329                 changes = true;
4330 
4331                 /* The output is now live and modified.  */
4332                 arg_ts->state = 0;
4333 
4334                 if (NEED_SYNC_ARG(0)) {
4335                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4336                                       ? INDEX_op_st_i32
4337                                       : INDEX_op_st_i64);
4338                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4339                                                      arg_ts->type, 3);
4340                     TCGTemp *out_ts = dir_ts;
4341 
4342                     if (IS_DEAD_ARG(0)) {
4343                         out_ts = arg_temp(op->args[1]);
4344                         arg_ts->state = TS_DEAD;
4345                         tcg_op_remove(s, op);
4346                     } else {
4347                         arg_ts->state = TS_MEM;
4348                     }
4349 
4350                     sop->args[0] = temp_arg(out_ts);
4351                     sop->args[1] = temp_arg(arg_ts->mem_base);
4352                     sop->args[2] = arg_ts->mem_offset;
4353                 } else {
4354                     tcg_debug_assert(!IS_DEAD_ARG(0));
4355                 }
4356             }
4357         } else {
4358             for (i = 0; i < nb_oargs; i++) {
4359                 arg_ts = arg_temp(op->args[i]);
4360                 dir_ts = arg_ts->state_ptr;
4361                 if (!dir_ts) {
4362                     continue;
4363                 }
4364                 op->args[i] = temp_arg(dir_ts);
4365                 changes = true;
4366 
4367                 /* The output is now live and modified.  */
4368                 arg_ts->state = 0;
4369 
4370                 /* Sync outputs upon their last write.  */
4371                 if (NEED_SYNC_ARG(i)) {
4372                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4373                                       ? INDEX_op_st_i32
4374                                       : INDEX_op_st_i64);
4375                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4376                                                      arg_ts->type, 3);
4377 
4378                     sop->args[0] = temp_arg(dir_ts);
4379                     sop->args[1] = temp_arg(arg_ts->mem_base);
4380                     sop->args[2] = arg_ts->mem_offset;
4381 
4382                     arg_ts->state = TS_MEM;
4383                 }
4384                 /* Drop outputs that are dead.  */
4385                 if (IS_DEAD_ARG(i)) {
4386                     arg_ts->state = TS_DEAD;
4387                 }
4388             }
4389         }
4390     }
4391 
4392     return changes;
4393 }
4394 
4395 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4396 {
4397     intptr_t off;
4398     int size, align;
4399 
4400     /* When allocating an object, look at the full type. */
4401     size = tcg_type_size(ts->base_type);
4402     switch (ts->base_type) {
4403     case TCG_TYPE_I32:
4404         align = 4;
4405         break;
4406     case TCG_TYPE_I64:
4407     case TCG_TYPE_V64:
4408         align = 8;
4409         break;
4410     case TCG_TYPE_I128:
4411     case TCG_TYPE_V128:
4412     case TCG_TYPE_V256:
4413         /*
4414          * Note that we do not require aligned storage for V256,
4415          * and that we provide alignment for I128 to match V128,
4416          * even if that's above what the host ABI requires.
4417          */
4418         align = 16;
4419         break;
4420     default:
4421         g_assert_not_reached();
4422     }
4423 
4424     /*
4425      * Assume the stack is sufficiently aligned.
4426      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4427      * and do not require 16 byte vector alignment.  This seems slightly
4428      * easier than fully parameterizing the above switch statement.
4429      */
4430     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4431     off = ROUND_UP(s->current_frame_offset, align);
4432 
4433     /* If we've exhausted the stack frame, restart with a smaller TB. */
4434     if (off + size > s->frame_end) {
4435         tcg_raise_tb_overflow(s);
4436     }
4437     s->current_frame_offset = off + size;
4438 #if defined(__sparc__)
4439     off += TCG_TARGET_STACK_BIAS;
4440 #endif
4441 
4442     /* If the object was subdivided, assign memory to all the parts. */
4443     if (ts->base_type != ts->type) {
4444         int part_size = tcg_type_size(ts->type);
4445         int part_count = size / part_size;
4446 
4447         /*
4448          * Each part is allocated sequentially in tcg_temp_new_internal.
4449          * Jump back to the first part by subtracting the current index.
4450          */
4451         ts -= ts->temp_subindex;
4452         for (int i = 0; i < part_count; ++i) {
4453             ts[i].mem_offset = off + i * part_size;
4454             ts[i].mem_base = s->frame_temp;
4455             ts[i].mem_allocated = 1;
4456         }
4457     } else {
4458         ts->mem_offset = off;
4459         ts->mem_base = s->frame_temp;
4460         ts->mem_allocated = 1;
4461     }
4462 }
4463 
4464 /* Assign @reg to @ts, and update reg_to_temp[]. */
4465 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4466 {
4467     if (ts->val_type == TEMP_VAL_REG) {
4468         TCGReg old = ts->reg;
4469         tcg_debug_assert(s->reg_to_temp[old] == ts);
4470         if (old == reg) {
4471             return;
4472         }
4473         s->reg_to_temp[old] = NULL;
4474     }
4475     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4476     s->reg_to_temp[reg] = ts;
4477     ts->val_type = TEMP_VAL_REG;
4478     ts->reg = reg;
4479 }
4480 
4481 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4482 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4483 {
4484     tcg_debug_assert(type != TEMP_VAL_REG);
4485     if (ts->val_type == TEMP_VAL_REG) {
4486         TCGReg reg = ts->reg;
4487         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4488         s->reg_to_temp[reg] = NULL;
4489     }
4490     ts->val_type = type;
4491 }
4492 
4493 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4494 
4495 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4496    mark it free; otherwise mark it dead.  */
4497 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4498 {
4499     TCGTempVal new_type;
4500 
4501     switch (ts->kind) {
4502     case TEMP_FIXED:
4503         return;
4504     case TEMP_GLOBAL:
4505     case TEMP_TB:
4506         new_type = TEMP_VAL_MEM;
4507         break;
4508     case TEMP_EBB:
4509         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4510         break;
4511     case TEMP_CONST:
4512         new_type = TEMP_VAL_CONST;
4513         break;
4514     default:
4515         g_assert_not_reached();
4516     }
4517     set_temp_val_nonreg(s, ts, new_type);
4518 }
4519 
4520 /* Mark a temporary as dead.  */
4521 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4522 {
4523     temp_free_or_dead(s, ts, 1);
4524 }
4525 
4526 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4527    registers needs to be allocated to store a constant.  If 'free_or_dead'
4528    is non-zero, subsequently release the temporary; if it is positive, the
4529    temp is dead; if it is negative, the temp is free.  */
4530 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4531                       TCGRegSet preferred_regs, int free_or_dead)
4532 {
4533     if (!temp_readonly(ts) && !ts->mem_coherent) {
4534         if (!ts->mem_allocated) {
4535             temp_allocate_frame(s, ts);
4536         }
4537         switch (ts->val_type) {
4538         case TEMP_VAL_CONST:
4539             /* If we're going to free the temp immediately, then we won't
4540                require it later in a register, so attempt to store the
4541                constant to memory directly.  */
4542             if (free_or_dead
4543                 && tcg_out_sti(s, ts->type, ts->val,
4544                                ts->mem_base->reg, ts->mem_offset)) {
4545                 break;
4546             }
4547             temp_load(s, ts, tcg_target_available_regs[ts->type],
4548                       allocated_regs, preferred_regs);
4549             /* fallthrough */
4550 
4551         case TEMP_VAL_REG:
4552             tcg_out_st(s, ts->type, ts->reg,
4553                        ts->mem_base->reg, ts->mem_offset);
4554             break;
4555 
4556         case TEMP_VAL_MEM:
4557             break;
4558 
4559         case TEMP_VAL_DEAD:
4560         default:
4561             g_assert_not_reached();
4562         }
4563         ts->mem_coherent = 1;
4564     }
4565     if (free_or_dead) {
4566         temp_free_or_dead(s, ts, free_or_dead);
4567     }
4568 }
4569 
4570 /* free register 'reg' by spilling the corresponding temporary if necessary */
4571 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4572 {
4573     TCGTemp *ts = s->reg_to_temp[reg];
4574     if (ts != NULL) {
4575         temp_sync(s, ts, allocated_regs, 0, -1);
4576     }
4577 }
4578 
4579 /**
4580  * tcg_reg_alloc:
4581  * @required_regs: Set of registers in which we must allocate.
4582  * @allocated_regs: Set of registers which must be avoided.
4583  * @preferred_regs: Set of registers we should prefer.
4584  * @rev: True if we search the registers in "indirect" order.
4585  *
4586  * The allocated register must be in @required_regs & ~@allocated_regs,
4587  * but if we can put it in @preferred_regs we may save a move later.
4588  */
4589 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4590                             TCGRegSet allocated_regs,
4591                             TCGRegSet preferred_regs, bool rev)
4592 {
4593     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4594     TCGRegSet reg_ct[2];
4595     const int *order;
4596 
4597     reg_ct[1] = required_regs & ~allocated_regs;
4598     tcg_debug_assert(reg_ct[1] != 0);
4599     reg_ct[0] = reg_ct[1] & preferred_regs;
4600 
4601     /* Skip the preferred_regs option if it cannot be satisfied,
4602        or if the preference made no difference.  */
4603     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4604 
4605     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4606 
4607     /* Try free registers, preferences first.  */
4608     for (j = f; j < 2; j++) {
4609         TCGRegSet set = reg_ct[j];
4610 
4611         if (tcg_regset_single(set)) {
4612             /* One register in the set.  */
4613             TCGReg reg = tcg_regset_first(set);
4614             if (s->reg_to_temp[reg] == NULL) {
4615                 return reg;
4616             }
4617         } else {
4618             for (i = 0; i < n; i++) {
4619                 TCGReg reg = order[i];
4620                 if (s->reg_to_temp[reg] == NULL &&
4621                     tcg_regset_test_reg(set, reg)) {
4622                     return reg;
4623                 }
4624             }
4625         }
4626     }
4627 
4628     /* We must spill something.  */
4629     for (j = f; j < 2; j++) {
4630         TCGRegSet set = reg_ct[j];
4631 
4632         if (tcg_regset_single(set)) {
4633             /* One register in the set.  */
4634             TCGReg reg = tcg_regset_first(set);
4635             tcg_reg_free(s, reg, allocated_regs);
4636             return reg;
4637         } else {
4638             for (i = 0; i < n; i++) {
4639                 TCGReg reg = order[i];
4640                 if (tcg_regset_test_reg(set, reg)) {
4641                     tcg_reg_free(s, reg, allocated_regs);
4642                     return reg;
4643                 }
4644             }
4645         }
4646     }
4647 
4648     g_assert_not_reached();
4649 }
4650 
4651 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4652                                  TCGRegSet allocated_regs,
4653                                  TCGRegSet preferred_regs, bool rev)
4654 {
4655     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4656     TCGRegSet reg_ct[2];
4657     const int *order;
4658 
4659     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4660     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4661     tcg_debug_assert(reg_ct[1] != 0);
4662     reg_ct[0] = reg_ct[1] & preferred_regs;
4663 
4664     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4665 
4666     /*
4667      * Skip the preferred_regs option if it cannot be satisfied,
4668      * or if the preference made no difference.
4669      */
4670     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4671 
4672     /*
4673      * Minimize the number of flushes by looking for 2 free registers first,
4674      * then a single flush, then two flushes.
4675      */
4676     for (fmin = 2; fmin >= 0; fmin--) {
4677         for (j = k; j < 2; j++) {
4678             TCGRegSet set = reg_ct[j];
4679 
4680             for (i = 0; i < n; i++) {
4681                 TCGReg reg = order[i];
4682 
4683                 if (tcg_regset_test_reg(set, reg)) {
4684                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4685                     if (f >= fmin) {
4686                         tcg_reg_free(s, reg, allocated_regs);
4687                         tcg_reg_free(s, reg + 1, allocated_regs);
4688                         return reg;
4689                     }
4690                 }
4691             }
4692         }
4693     }
4694     g_assert_not_reached();
4695 }
4696 
4697 /* Make sure the temporary is in a register.  If needed, allocate the register
4698    from DESIRED while avoiding ALLOCATED.  */
4699 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4700                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4701 {
4702     TCGReg reg;
4703 
4704     switch (ts->val_type) {
4705     case TEMP_VAL_REG:
4706         return;
4707     case TEMP_VAL_CONST:
4708         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4709                             preferred_regs, ts->indirect_base);
4710         if (ts->type <= TCG_TYPE_I64) {
4711             tcg_out_movi(s, ts->type, reg, ts->val);
4712         } else {
4713             uint64_t val = ts->val;
4714             MemOp vece = MO_64;
4715 
4716             /*
4717              * Find the minimal vector element that matches the constant.
4718              * The targets will, in general, have to do this search anyway,
4719              * do this generically.
4720              */
4721             if (val == dup_const(MO_8, val)) {
4722                 vece = MO_8;
4723             } else if (val == dup_const(MO_16, val)) {
4724                 vece = MO_16;
4725             } else if (val == dup_const(MO_32, val)) {
4726                 vece = MO_32;
4727             }
4728 
4729             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4730         }
4731         ts->mem_coherent = 0;
4732         break;
4733     case TEMP_VAL_MEM:
4734         if (!ts->mem_allocated) {
4735             temp_allocate_frame(s, ts);
4736         }
4737         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4738                             preferred_regs, ts->indirect_base);
4739         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4740         ts->mem_coherent = 1;
4741         break;
4742     case TEMP_VAL_DEAD:
4743     default:
4744         g_assert_not_reached();
4745     }
4746     set_temp_val_reg(s, ts, reg);
4747 }
4748 
4749 /* Save a temporary to memory. 'allocated_regs' is used in case a
4750    temporary registers needs to be allocated to store a constant.  */
4751 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4752 {
4753     /* The liveness analysis already ensures that globals are back
4754        in memory. Keep an tcg_debug_assert for safety. */
4755     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4756 }
4757 
4758 /* save globals to their canonical location and assume they can be
4759    modified be the following code. 'allocated_regs' is used in case a
4760    temporary registers needs to be allocated to store a constant. */
4761 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4762 {
4763     int i, n;
4764 
4765     for (i = 0, n = s->nb_globals; i < n; i++) {
4766         temp_save(s, &s->temps[i], allocated_regs);
4767     }
4768 }
4769 
4770 /* sync globals to their canonical location and assume they can be
4771    read by the following code. 'allocated_regs' is used in case a
4772    temporary registers needs to be allocated to store a constant. */
4773 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4774 {
4775     int i, n;
4776 
4777     for (i = 0, n = s->nb_globals; i < n; i++) {
4778         TCGTemp *ts = &s->temps[i];
4779         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4780                          || ts->kind == TEMP_FIXED
4781                          || ts->mem_coherent);
4782     }
4783 }
4784 
4785 /* at the end of a basic block, we assume all temporaries are dead and
4786    all globals are stored at their canonical location. */
4787 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4788 {
4789     int i;
4790 
4791     for (i = s->nb_globals; i < s->nb_temps; i++) {
4792         TCGTemp *ts = &s->temps[i];
4793 
4794         switch (ts->kind) {
4795         case TEMP_TB:
4796             temp_save(s, ts, allocated_regs);
4797             break;
4798         case TEMP_EBB:
4799             /* The liveness analysis already ensures that temps are dead.
4800                Keep an tcg_debug_assert for safety. */
4801             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4802             break;
4803         case TEMP_CONST:
4804             /* Similarly, we should have freed any allocated register. */
4805             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4806             break;
4807         default:
4808             g_assert_not_reached();
4809         }
4810     }
4811 
4812     save_globals(s, allocated_regs);
4813 }
4814 
4815 /*
4816  * At a conditional branch, we assume all temporaries are dead unless
4817  * explicitly live-across-conditional-branch; all globals and local
4818  * temps are synced to their location.
4819  */
4820 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4821 {
4822     sync_globals(s, allocated_regs);
4823 
4824     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4825         TCGTemp *ts = &s->temps[i];
4826         /*
4827          * The liveness analysis already ensures that temps are dead.
4828          * Keep tcg_debug_asserts for safety.
4829          */
4830         switch (ts->kind) {
4831         case TEMP_TB:
4832             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4833             break;
4834         case TEMP_EBB:
4835         case TEMP_CONST:
4836             break;
4837         default:
4838             g_assert_not_reached();
4839         }
4840     }
4841 }
4842 
4843 /*
4844  * Specialized code generation for INDEX_op_mov_* with a constant.
4845  */
4846 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4847                                   tcg_target_ulong val, TCGLifeData arg_life,
4848                                   TCGRegSet preferred_regs)
4849 {
4850     /* ENV should not be modified.  */
4851     tcg_debug_assert(!temp_readonly(ots));
4852 
4853     /* The movi is not explicitly generated here.  */
4854     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4855     ots->val = val;
4856     ots->mem_coherent = 0;
4857     if (NEED_SYNC_ARG(0)) {
4858         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4859     } else if (IS_DEAD_ARG(0)) {
4860         temp_dead(s, ots);
4861     }
4862 }
4863 
4864 /*
4865  * Specialized code generation for INDEX_op_mov_*.
4866  */
4867 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4868 {
4869     const TCGLifeData arg_life = op->life;
4870     TCGRegSet allocated_regs, preferred_regs;
4871     TCGTemp *ts, *ots;
4872     TCGType otype, itype;
4873     TCGReg oreg, ireg;
4874 
4875     allocated_regs = s->reserved_regs;
4876     preferred_regs = output_pref(op, 0);
4877     ots = arg_temp(op->args[0]);
4878     ts = arg_temp(op->args[1]);
4879 
4880     /* ENV should not be modified.  */
4881     tcg_debug_assert(!temp_readonly(ots));
4882 
4883     /* Note that otype != itype for no-op truncation.  */
4884     otype = ots->type;
4885     itype = ts->type;
4886 
4887     if (ts->val_type == TEMP_VAL_CONST) {
4888         /* propagate constant or generate sti */
4889         tcg_target_ulong val = ts->val;
4890         if (IS_DEAD_ARG(1)) {
4891             temp_dead(s, ts);
4892         }
4893         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4894         return;
4895     }
4896 
4897     /* If the source value is in memory we're going to be forced
4898        to have it in a register in order to perform the copy.  Copy
4899        the SOURCE value into its own register first, that way we
4900        don't have to reload SOURCE the next time it is used. */
4901     if (ts->val_type == TEMP_VAL_MEM) {
4902         temp_load(s, ts, tcg_target_available_regs[itype],
4903                   allocated_regs, preferred_regs);
4904     }
4905     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4906     ireg = ts->reg;
4907 
4908     if (IS_DEAD_ARG(0)) {
4909         /* mov to a non-saved dead register makes no sense (even with
4910            liveness analysis disabled). */
4911         tcg_debug_assert(NEED_SYNC_ARG(0));
4912         if (!ots->mem_allocated) {
4913             temp_allocate_frame(s, ots);
4914         }
4915         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4916         if (IS_DEAD_ARG(1)) {
4917             temp_dead(s, ts);
4918         }
4919         temp_dead(s, ots);
4920         return;
4921     }
4922 
4923     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4924         /*
4925          * The mov can be suppressed.  Kill input first, so that it
4926          * is unlinked from reg_to_temp, then set the output to the
4927          * reg that we saved from the input.
4928          */
4929         temp_dead(s, ts);
4930         oreg = ireg;
4931     } else {
4932         if (ots->val_type == TEMP_VAL_REG) {
4933             oreg = ots->reg;
4934         } else {
4935             /* Make sure to not spill the input register during allocation. */
4936             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4937                                  allocated_regs | ((TCGRegSet)1 << ireg),
4938                                  preferred_regs, ots->indirect_base);
4939         }
4940         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4941             /*
4942              * Cross register class move not supported.
4943              * Store the source register into the destination slot
4944              * and leave the destination temp as TEMP_VAL_MEM.
4945              */
4946             assert(!temp_readonly(ots));
4947             if (!ts->mem_allocated) {
4948                 temp_allocate_frame(s, ots);
4949             }
4950             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4951             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4952             ots->mem_coherent = 1;
4953             return;
4954         }
4955     }
4956     set_temp_val_reg(s, ots, oreg);
4957     ots->mem_coherent = 0;
4958 
4959     if (NEED_SYNC_ARG(0)) {
4960         temp_sync(s, ots, allocated_regs, 0, 0);
4961     }
4962 }
4963 
4964 /*
4965  * Specialized code generation for INDEX_op_dup_vec.
4966  */
4967 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4968 {
4969     const TCGLifeData arg_life = op->life;
4970     TCGRegSet dup_out_regs, dup_in_regs;
4971     const TCGArgConstraint *dup_args_ct;
4972     TCGTemp *its, *ots;
4973     TCGType itype, vtype;
4974     unsigned vece;
4975     int lowpart_ofs;
4976     bool ok;
4977 
4978     ots = arg_temp(op->args[0]);
4979     its = arg_temp(op->args[1]);
4980 
4981     /* ENV should not be modified.  */
4982     tcg_debug_assert(!temp_readonly(ots));
4983 
4984     itype = its->type;
4985     vece = TCGOP_VECE(op);
4986     vtype = TCGOP_TYPE(op);
4987 
4988     if (its->val_type == TEMP_VAL_CONST) {
4989         /* Propagate constant via movi -> dupi.  */
4990         tcg_target_ulong val = its->val;
4991         if (IS_DEAD_ARG(1)) {
4992             temp_dead(s, its);
4993         }
4994         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4995         return;
4996     }
4997 
4998     dup_args_ct = opcode_args_ct(op);
4999     dup_out_regs = dup_args_ct[0].regs;
5000     dup_in_regs = dup_args_ct[1].regs;
5001 
5002     /* Allocate the output register now.  */
5003     if (ots->val_type != TEMP_VAL_REG) {
5004         TCGRegSet allocated_regs = s->reserved_regs;
5005         TCGReg oreg;
5006 
5007         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
5008             /* Make sure to not spill the input register. */
5009             tcg_regset_set_reg(allocated_regs, its->reg);
5010         }
5011         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5012                              output_pref(op, 0), ots->indirect_base);
5013         set_temp_val_reg(s, ots, oreg);
5014     }
5015 
5016     switch (its->val_type) {
5017     case TEMP_VAL_REG:
5018         /*
5019          * The dup constriaints must be broad, covering all possible VECE.
5020          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5021          * to fail, indicating that extra moves are required for that case.
5022          */
5023         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5024             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5025                 goto done;
5026             }
5027             /* Try again from memory or a vector input register.  */
5028         }
5029         if (!its->mem_coherent) {
5030             /*
5031              * The input register is not synced, and so an extra store
5032              * would be required to use memory.  Attempt an integer-vector
5033              * register move first.  We do not have a TCGRegSet for this.
5034              */
5035             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5036                 break;
5037             }
5038             /* Sync the temp back to its slot and load from there.  */
5039             temp_sync(s, its, s->reserved_regs, 0, 0);
5040         }
5041         /* fall through */
5042 
5043     case TEMP_VAL_MEM:
5044         lowpart_ofs = 0;
5045         if (HOST_BIG_ENDIAN) {
5046             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5047         }
5048         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5049                              its->mem_offset + lowpart_ofs)) {
5050             goto done;
5051         }
5052         /* Load the input into the destination vector register. */
5053         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5054         break;
5055 
5056     default:
5057         g_assert_not_reached();
5058     }
5059 
5060     /* We now have a vector input register, so dup must succeed. */
5061     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5062     tcg_debug_assert(ok);
5063 
5064  done:
5065     ots->mem_coherent = 0;
5066     if (IS_DEAD_ARG(1)) {
5067         temp_dead(s, its);
5068     }
5069     if (NEED_SYNC_ARG(0)) {
5070         temp_sync(s, ots, s->reserved_regs, 0, 0);
5071     }
5072     if (IS_DEAD_ARG(0)) {
5073         temp_dead(s, ots);
5074     }
5075 }
5076 
5077 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5078 {
5079     const TCGLifeData arg_life = op->life;
5080     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5081     TCGRegSet i_allocated_regs;
5082     TCGRegSet o_allocated_regs;
5083     int i, k, nb_iargs, nb_oargs;
5084     TCGReg reg;
5085     TCGArg arg;
5086     const TCGArgConstraint *args_ct;
5087     const TCGArgConstraint *arg_ct;
5088     TCGTemp *ts;
5089     TCGArg new_args[TCG_MAX_OP_ARGS];
5090     int const_args[TCG_MAX_OP_ARGS];
5091     TCGCond op_cond;
5092 
5093     nb_oargs = def->nb_oargs;
5094     nb_iargs = def->nb_iargs;
5095 
5096     /* copy constants */
5097     memcpy(new_args + nb_oargs + nb_iargs,
5098            op->args + nb_oargs + nb_iargs,
5099            sizeof(TCGArg) * def->nb_cargs);
5100 
5101     i_allocated_regs = s->reserved_regs;
5102     o_allocated_regs = s->reserved_regs;
5103 
5104     switch (op->opc) {
5105     case INDEX_op_brcond_i32:
5106     case INDEX_op_brcond_i64:
5107         op_cond = op->args[2];
5108         break;
5109     case INDEX_op_setcond_i32:
5110     case INDEX_op_setcond_i64:
5111     case INDEX_op_negsetcond_i32:
5112     case INDEX_op_negsetcond_i64:
5113     case INDEX_op_cmp_vec:
5114         op_cond = op->args[3];
5115         break;
5116     case INDEX_op_brcond2_i32:
5117         op_cond = op->args[4];
5118         break;
5119     case INDEX_op_movcond_i32:
5120     case INDEX_op_movcond_i64:
5121     case INDEX_op_setcond2_i32:
5122     case INDEX_op_cmpsel_vec:
5123         op_cond = op->args[5];
5124         break;
5125     default:
5126         /* No condition within opcode. */
5127         op_cond = TCG_COND_ALWAYS;
5128         break;
5129     }
5130 
5131     args_ct = opcode_args_ct(op);
5132 
5133     /* satisfy input constraints */
5134     for (k = 0; k < nb_iargs; k++) {
5135         TCGRegSet i_preferred_regs, i_required_regs;
5136         bool allocate_new_reg, copyto_new_reg;
5137         TCGTemp *ts2;
5138         int i1, i2;
5139 
5140         i = args_ct[nb_oargs + k].sort_index;
5141         arg = op->args[i];
5142         arg_ct = &args_ct[i];
5143         ts = arg_temp(arg);
5144 
5145         if (ts->val_type == TEMP_VAL_CONST) {
5146 #ifdef TCG_REG_ZERO
5147             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5148                 /* Hardware zero register: indicate register via non-const. */
5149                 const_args[i] = 0;
5150                 new_args[i] = TCG_REG_ZERO;
5151                 continue;
5152             }
5153 #endif
5154 
5155             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5156                                        op_cond, TCGOP_VECE(op))) {
5157                 /* constant is OK for instruction */
5158                 const_args[i] = 1;
5159                 new_args[i] = ts->val;
5160                 continue;
5161             }
5162         }
5163 
5164         reg = ts->reg;
5165         i_preferred_regs = 0;
5166         i_required_regs = arg_ct->regs;
5167         allocate_new_reg = false;
5168         copyto_new_reg = false;
5169 
5170         switch (arg_ct->pair) {
5171         case 0: /* not paired */
5172             if (arg_ct->ialias) {
5173                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5174 
5175                 /*
5176                  * If the input is readonly, then it cannot also be an
5177                  * output and aliased to itself.  If the input is not
5178                  * dead after the instruction, we must allocate a new
5179                  * register and move it.
5180                  */
5181                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5182                     || args_ct[arg_ct->alias_index].newreg) {
5183                     allocate_new_reg = true;
5184                 } else if (ts->val_type == TEMP_VAL_REG) {
5185                     /*
5186                      * Check if the current register has already been
5187                      * allocated for another input.
5188                      */
5189                     allocate_new_reg =
5190                         tcg_regset_test_reg(i_allocated_regs, reg);
5191                 }
5192             }
5193             if (!allocate_new_reg) {
5194                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5195                           i_preferred_regs);
5196                 reg = ts->reg;
5197                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5198             }
5199             if (allocate_new_reg) {
5200                 /*
5201                  * Allocate a new register matching the constraint
5202                  * and move the temporary register into it.
5203                  */
5204                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5205                           i_allocated_regs, 0);
5206                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5207                                     i_preferred_regs, ts->indirect_base);
5208                 copyto_new_reg = true;
5209             }
5210             break;
5211 
5212         case 1:
5213             /* First of an input pair; if i1 == i2, the second is an output. */
5214             i1 = i;
5215             i2 = arg_ct->pair_index;
5216             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5217 
5218             /*
5219              * It is easier to default to allocating a new pair
5220              * and to identify a few cases where it's not required.
5221              */
5222             if (arg_ct->ialias) {
5223                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5224                 if (IS_DEAD_ARG(i1) &&
5225                     IS_DEAD_ARG(i2) &&
5226                     !temp_readonly(ts) &&
5227                     ts->val_type == TEMP_VAL_REG &&
5228                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5229                     tcg_regset_test_reg(i_required_regs, reg) &&
5230                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5231                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5232                     (ts2
5233                      ? ts2->val_type == TEMP_VAL_REG &&
5234                        ts2->reg == reg + 1 &&
5235                        !temp_readonly(ts2)
5236                      : s->reg_to_temp[reg + 1] == NULL)) {
5237                     break;
5238                 }
5239             } else {
5240                 /* Without aliasing, the pair must also be an input. */
5241                 tcg_debug_assert(ts2);
5242                 if (ts->val_type == TEMP_VAL_REG &&
5243                     ts2->val_type == TEMP_VAL_REG &&
5244                     ts2->reg == reg + 1 &&
5245                     tcg_regset_test_reg(i_required_regs, reg)) {
5246                     break;
5247                 }
5248             }
5249             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5250                                      0, ts->indirect_base);
5251             goto do_pair;
5252 
5253         case 2: /* pair second */
5254             reg = new_args[arg_ct->pair_index] + 1;
5255             goto do_pair;
5256 
5257         case 3: /* ialias with second output, no first input */
5258             tcg_debug_assert(arg_ct->ialias);
5259             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5260 
5261             if (IS_DEAD_ARG(i) &&
5262                 !temp_readonly(ts) &&
5263                 ts->val_type == TEMP_VAL_REG &&
5264                 reg > 0 &&
5265                 s->reg_to_temp[reg - 1] == NULL &&
5266                 tcg_regset_test_reg(i_required_regs, reg) &&
5267                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5268                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5269                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5270                 break;
5271             }
5272             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5273                                      i_allocated_regs, 0,
5274                                      ts->indirect_base);
5275             tcg_regset_set_reg(i_allocated_regs, reg);
5276             reg += 1;
5277             goto do_pair;
5278 
5279         do_pair:
5280             /*
5281              * If an aliased input is not dead after the instruction,
5282              * we must allocate a new register and move it.
5283              */
5284             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5285                 TCGRegSet t_allocated_regs = i_allocated_regs;
5286 
5287                 /*
5288                  * Because of the alias, and the continued life, make sure
5289                  * that the temp is somewhere *other* than the reg pair,
5290                  * and we get a copy in reg.
5291                  */
5292                 tcg_regset_set_reg(t_allocated_regs, reg);
5293                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5294                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5295                     /* If ts was already in reg, copy it somewhere else. */
5296                     TCGReg nr;
5297                     bool ok;
5298 
5299                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5300                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5301                                        t_allocated_regs, 0, ts->indirect_base);
5302                     ok = tcg_out_mov(s, ts->type, nr, reg);
5303                     tcg_debug_assert(ok);
5304 
5305                     set_temp_val_reg(s, ts, nr);
5306                 } else {
5307                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5308                               t_allocated_regs, 0);
5309                     copyto_new_reg = true;
5310                 }
5311             } else {
5312                 /* Preferably allocate to reg, otherwise copy. */
5313                 i_required_regs = (TCGRegSet)1 << reg;
5314                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5315                           i_preferred_regs);
5316                 copyto_new_reg = ts->reg != reg;
5317             }
5318             break;
5319 
5320         default:
5321             g_assert_not_reached();
5322         }
5323 
5324         if (copyto_new_reg) {
5325             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5326                 /*
5327                  * Cross register class move not supported.  Sync the
5328                  * temp back to its slot and load from there.
5329                  */
5330                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5331                 tcg_out_ld(s, ts->type, reg,
5332                            ts->mem_base->reg, ts->mem_offset);
5333             }
5334         }
5335         new_args[i] = reg;
5336         const_args[i] = 0;
5337         tcg_regset_set_reg(i_allocated_regs, reg);
5338     }
5339 
5340     /* mark dead temporaries and free the associated registers */
5341     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5342         if (IS_DEAD_ARG(i)) {
5343             temp_dead(s, arg_temp(op->args[i]));
5344         }
5345     }
5346 
5347     if (def->flags & TCG_OPF_COND_BRANCH) {
5348         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5349     } else if (def->flags & TCG_OPF_BB_END) {
5350         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5351     } else {
5352         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5353             /* XXX: permit generic clobber register list ? */
5354             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5355                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5356                     tcg_reg_free(s, i, i_allocated_regs);
5357                 }
5358             }
5359         }
5360         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5361             /* sync globals if the op has side effects and might trigger
5362                an exception. */
5363             sync_globals(s, i_allocated_regs);
5364         }
5365 
5366         /* satisfy the output constraints */
5367         for (k = 0; k < nb_oargs; k++) {
5368             i = args_ct[k].sort_index;
5369             arg = op->args[i];
5370             arg_ct = &args_ct[i];
5371             ts = arg_temp(arg);
5372 
5373             /* ENV should not be modified.  */
5374             tcg_debug_assert(!temp_readonly(ts));
5375 
5376             switch (arg_ct->pair) {
5377             case 0: /* not paired */
5378                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5379                     reg = new_args[arg_ct->alias_index];
5380                 } else if (arg_ct->newreg) {
5381                     reg = tcg_reg_alloc(s, arg_ct->regs,
5382                                         i_allocated_regs | o_allocated_regs,
5383                                         output_pref(op, k), ts->indirect_base);
5384                 } else {
5385                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5386                                         output_pref(op, k), ts->indirect_base);
5387                 }
5388                 break;
5389 
5390             case 1: /* first of pair */
5391                 if (arg_ct->oalias) {
5392                     reg = new_args[arg_ct->alias_index];
5393                 } else if (arg_ct->newreg) {
5394                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5395                                              i_allocated_regs | o_allocated_regs,
5396                                              output_pref(op, k),
5397                                              ts->indirect_base);
5398                 } else {
5399                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5400                                              output_pref(op, k),
5401                                              ts->indirect_base);
5402                 }
5403                 break;
5404 
5405             case 2: /* second of pair */
5406                 if (arg_ct->oalias) {
5407                     reg = new_args[arg_ct->alias_index];
5408                 } else {
5409                     reg = new_args[arg_ct->pair_index] + 1;
5410                 }
5411                 break;
5412 
5413             case 3: /* first of pair, aliasing with a second input */
5414                 tcg_debug_assert(!arg_ct->newreg);
5415                 reg = new_args[arg_ct->pair_index] - 1;
5416                 break;
5417 
5418             default:
5419                 g_assert_not_reached();
5420             }
5421             tcg_regset_set_reg(o_allocated_regs, reg);
5422             set_temp_val_reg(s, ts, reg);
5423             ts->mem_coherent = 0;
5424             new_args[i] = reg;
5425         }
5426     }
5427 
5428     /* emit instruction */
5429     TCGType type = TCGOP_TYPE(op);
5430     switch (op->opc) {
5431     case INDEX_op_ext_i32_i64:
5432         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5433         break;
5434     case INDEX_op_extu_i32_i64:
5435         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5436         break;
5437     case INDEX_op_extrl_i64_i32:
5438         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5439         break;
5440 
5441     case INDEX_op_add:
5442     case INDEX_op_and:
5443     case INDEX_op_andc:
5444     case INDEX_op_or:
5445         {
5446             const TCGOutOpBinary *out =
5447                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5448 
5449             /* Constants should never appear in the first source operand. */
5450             tcg_debug_assert(!const_args[1]);
5451             if (const_args[2]) {
5452                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5453             } else {
5454                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5455             }
5456         }
5457         break;
5458 
5459     default:
5460         if (def->flags & TCG_OPF_VECTOR) {
5461             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5462                            TCGOP_VECE(op), new_args, const_args);
5463         } else {
5464             tcg_out_op(s, op->opc, type, new_args, const_args);
5465         }
5466         break;
5467     }
5468 
5469     /* move the outputs in the correct register if needed */
5470     for(i = 0; i < nb_oargs; i++) {
5471         ts = arg_temp(op->args[i]);
5472 
5473         /* ENV should not be modified.  */
5474         tcg_debug_assert(!temp_readonly(ts));
5475 
5476         if (NEED_SYNC_ARG(i)) {
5477             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5478         } else if (IS_DEAD_ARG(i)) {
5479             temp_dead(s, ts);
5480         }
5481     }
5482 }
5483 
5484 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5485 {
5486     const TCGLifeData arg_life = op->life;
5487     TCGTemp *ots, *itsl, *itsh;
5488     TCGType vtype = TCGOP_TYPE(op);
5489 
5490     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5491     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5492     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5493 
5494     ots = arg_temp(op->args[0]);
5495     itsl = arg_temp(op->args[1]);
5496     itsh = arg_temp(op->args[2]);
5497 
5498     /* ENV should not be modified.  */
5499     tcg_debug_assert(!temp_readonly(ots));
5500 
5501     /* Allocate the output register now.  */
5502     if (ots->val_type != TEMP_VAL_REG) {
5503         TCGRegSet allocated_regs = s->reserved_regs;
5504         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5505         TCGReg oreg;
5506 
5507         /* Make sure to not spill the input registers. */
5508         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5509             tcg_regset_set_reg(allocated_regs, itsl->reg);
5510         }
5511         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5512             tcg_regset_set_reg(allocated_regs, itsh->reg);
5513         }
5514 
5515         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5516                              output_pref(op, 0), ots->indirect_base);
5517         set_temp_val_reg(s, ots, oreg);
5518     }
5519 
5520     /* Promote dup2 of immediates to dupi_vec. */
5521     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5522         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5523         MemOp vece = MO_64;
5524 
5525         if (val == dup_const(MO_8, val)) {
5526             vece = MO_8;
5527         } else if (val == dup_const(MO_16, val)) {
5528             vece = MO_16;
5529         } else if (val == dup_const(MO_32, val)) {
5530             vece = MO_32;
5531         }
5532 
5533         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5534         goto done;
5535     }
5536 
5537     /* If the two inputs form one 64-bit value, try dupm_vec. */
5538     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5539         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5540         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5541         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5542 
5543         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5544         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5545 
5546         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5547                              its->mem_base->reg, its->mem_offset)) {
5548             goto done;
5549         }
5550     }
5551 
5552     /* Fall back to generic expansion. */
5553     return false;
5554 
5555  done:
5556     ots->mem_coherent = 0;
5557     if (IS_DEAD_ARG(1)) {
5558         temp_dead(s, itsl);
5559     }
5560     if (IS_DEAD_ARG(2)) {
5561         temp_dead(s, itsh);
5562     }
5563     if (NEED_SYNC_ARG(0)) {
5564         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5565     } else if (IS_DEAD_ARG(0)) {
5566         temp_dead(s, ots);
5567     }
5568     return true;
5569 }
5570 
5571 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5572                          TCGRegSet allocated_regs)
5573 {
5574     if (ts->val_type == TEMP_VAL_REG) {
5575         if (ts->reg != reg) {
5576             tcg_reg_free(s, reg, allocated_regs);
5577             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5578                 /*
5579                  * Cross register class move not supported.  Sync the
5580                  * temp back to its slot and load from there.
5581                  */
5582                 temp_sync(s, ts, allocated_regs, 0, 0);
5583                 tcg_out_ld(s, ts->type, reg,
5584                            ts->mem_base->reg, ts->mem_offset);
5585             }
5586         }
5587     } else {
5588         TCGRegSet arg_set = 0;
5589 
5590         tcg_reg_free(s, reg, allocated_regs);
5591         tcg_regset_set_reg(arg_set, reg);
5592         temp_load(s, ts, arg_set, allocated_regs, 0);
5593     }
5594 }
5595 
5596 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5597                          TCGRegSet allocated_regs)
5598 {
5599     /*
5600      * When the destination is on the stack, load up the temp and store.
5601      * If there are many call-saved registers, the temp might live to
5602      * see another use; otherwise it'll be discarded.
5603      */
5604     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5605     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5606                arg_slot_stk_ofs(arg_slot));
5607 }
5608 
5609 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5610                             TCGTemp *ts, TCGRegSet *allocated_regs)
5611 {
5612     if (arg_slot_reg_p(l->arg_slot)) {
5613         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5614         load_arg_reg(s, reg, ts, *allocated_regs);
5615         tcg_regset_set_reg(*allocated_regs, reg);
5616     } else {
5617         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5618     }
5619 }
5620 
5621 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5622                          intptr_t ref_off, TCGRegSet *allocated_regs)
5623 {
5624     TCGReg reg;
5625 
5626     if (arg_slot_reg_p(arg_slot)) {
5627         reg = tcg_target_call_iarg_regs[arg_slot];
5628         tcg_reg_free(s, reg, *allocated_regs);
5629         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5630         tcg_regset_set_reg(*allocated_regs, reg);
5631     } else {
5632         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5633                             *allocated_regs, 0, false);
5634         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5635         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5636                    arg_slot_stk_ofs(arg_slot));
5637     }
5638 }
5639 
5640 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5641 {
5642     const int nb_oargs = TCGOP_CALLO(op);
5643     const int nb_iargs = TCGOP_CALLI(op);
5644     const TCGLifeData arg_life = op->life;
5645     const TCGHelperInfo *info = tcg_call_info(op);
5646     TCGRegSet allocated_regs = s->reserved_regs;
5647     int i;
5648 
5649     /*
5650      * Move inputs into place in reverse order,
5651      * so that we place stacked arguments first.
5652      */
5653     for (i = nb_iargs - 1; i >= 0; --i) {
5654         const TCGCallArgumentLoc *loc = &info->in[i];
5655         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5656 
5657         switch (loc->kind) {
5658         case TCG_CALL_ARG_NORMAL:
5659         case TCG_CALL_ARG_EXTEND_U:
5660         case TCG_CALL_ARG_EXTEND_S:
5661             load_arg_normal(s, loc, ts, &allocated_regs);
5662             break;
5663         case TCG_CALL_ARG_BY_REF:
5664             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5665             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5666                          arg_slot_stk_ofs(loc->ref_slot),
5667                          &allocated_regs);
5668             break;
5669         case TCG_CALL_ARG_BY_REF_N:
5670             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5671             break;
5672         default:
5673             g_assert_not_reached();
5674         }
5675     }
5676 
5677     /* Mark dead temporaries and free the associated registers.  */
5678     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5679         if (IS_DEAD_ARG(i)) {
5680             temp_dead(s, arg_temp(op->args[i]));
5681         }
5682     }
5683 
5684     /* Clobber call registers.  */
5685     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5686         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5687             tcg_reg_free(s, i, allocated_regs);
5688         }
5689     }
5690 
5691     /*
5692      * Save globals if they might be written by the helper,
5693      * sync them if they might be read.
5694      */
5695     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5696         /* Nothing to do */
5697     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5698         sync_globals(s, allocated_regs);
5699     } else {
5700         save_globals(s, allocated_regs);
5701     }
5702 
5703     /*
5704      * If the ABI passes a pointer to the returned struct as the first
5705      * argument, load that now.  Pass a pointer to the output home slot.
5706      */
5707     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5708         TCGTemp *ts = arg_temp(op->args[0]);
5709 
5710         if (!ts->mem_allocated) {
5711             temp_allocate_frame(s, ts);
5712         }
5713         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5714     }
5715 
5716     tcg_out_call(s, tcg_call_func(op), info);
5717 
5718     /* Assign output registers and emit moves if needed.  */
5719     switch (info->out_kind) {
5720     case TCG_CALL_RET_NORMAL:
5721         for (i = 0; i < nb_oargs; i++) {
5722             TCGTemp *ts = arg_temp(op->args[i]);
5723             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5724 
5725             /* ENV should not be modified.  */
5726             tcg_debug_assert(!temp_readonly(ts));
5727 
5728             set_temp_val_reg(s, ts, reg);
5729             ts->mem_coherent = 0;
5730         }
5731         break;
5732 
5733     case TCG_CALL_RET_BY_VEC:
5734         {
5735             TCGTemp *ts = arg_temp(op->args[0]);
5736 
5737             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5738             tcg_debug_assert(ts->temp_subindex == 0);
5739             if (!ts->mem_allocated) {
5740                 temp_allocate_frame(s, ts);
5741             }
5742             tcg_out_st(s, TCG_TYPE_V128,
5743                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5744                        ts->mem_base->reg, ts->mem_offset);
5745         }
5746         /* fall through to mark all parts in memory */
5747 
5748     case TCG_CALL_RET_BY_REF:
5749         /* The callee has performed a write through the reference. */
5750         for (i = 0; i < nb_oargs; i++) {
5751             TCGTemp *ts = arg_temp(op->args[i]);
5752             ts->val_type = TEMP_VAL_MEM;
5753         }
5754         break;
5755 
5756     default:
5757         g_assert_not_reached();
5758     }
5759 
5760     /* Flush or discard output registers as needed. */
5761     for (i = 0; i < nb_oargs; i++) {
5762         TCGTemp *ts = arg_temp(op->args[i]);
5763         if (NEED_SYNC_ARG(i)) {
5764             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5765         } else if (IS_DEAD_ARG(i)) {
5766             temp_dead(s, ts);
5767         }
5768     }
5769 }
5770 
5771 /**
5772  * atom_and_align_for_opc:
5773  * @s: tcg context
5774  * @opc: memory operation code
5775  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5776  * @allow_two_ops: true if we are prepared to issue two operations
5777  *
5778  * Return the alignment and atomicity to use for the inline fast path
5779  * for the given memory operation.  The alignment may be larger than
5780  * that specified in @opc, and the correct alignment will be diagnosed
5781  * by the slow path helper.
5782  *
5783  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5784  * and issue two loads or stores for subalignment.
5785  */
5786 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5787                                            MemOp host_atom, bool allow_two_ops)
5788 {
5789     MemOp align = memop_alignment_bits(opc);
5790     MemOp size = opc & MO_SIZE;
5791     MemOp half = size ? size - 1 : 0;
5792     MemOp atom = opc & MO_ATOM_MASK;
5793     MemOp atmax;
5794 
5795     switch (atom) {
5796     case MO_ATOM_NONE:
5797         /* The operation requires no specific atomicity. */
5798         atmax = MO_8;
5799         break;
5800 
5801     case MO_ATOM_IFALIGN:
5802         atmax = size;
5803         break;
5804 
5805     case MO_ATOM_IFALIGN_PAIR:
5806         atmax = half;
5807         break;
5808 
5809     case MO_ATOM_WITHIN16:
5810         atmax = size;
5811         if (size == MO_128) {
5812             /* Misalignment implies !within16, and therefore no atomicity. */
5813         } else if (host_atom != MO_ATOM_WITHIN16) {
5814             /* The host does not implement within16, so require alignment. */
5815             align = MAX(align, size);
5816         }
5817         break;
5818 
5819     case MO_ATOM_WITHIN16_PAIR:
5820         atmax = size;
5821         /*
5822          * Misalignment implies !within16, and therefore half atomicity.
5823          * Any host prepared for two operations can implement this with
5824          * half alignment.
5825          */
5826         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5827             align = MAX(align, half);
5828         }
5829         break;
5830 
5831     case MO_ATOM_SUBALIGN:
5832         atmax = size;
5833         if (host_atom != MO_ATOM_SUBALIGN) {
5834             /* If unaligned but not odd, there are subobjects up to half. */
5835             if (allow_two_ops) {
5836                 align = MAX(align, half);
5837             } else {
5838                 align = MAX(align, size);
5839             }
5840         }
5841         break;
5842 
5843     default:
5844         g_assert_not_reached();
5845     }
5846 
5847     return (TCGAtomAlign){ .atom = atmax, .align = align };
5848 }
5849 
5850 /*
5851  * Similarly for qemu_ld/st slow path helpers.
5852  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5853  * using only the provided backend tcg_out_* functions.
5854  */
5855 
5856 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5857 {
5858     int ofs = arg_slot_stk_ofs(slot);
5859 
5860     /*
5861      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5862      * require extension to uint64_t, adjust the address for uint32_t.
5863      */
5864     if (HOST_BIG_ENDIAN &&
5865         TCG_TARGET_REG_BITS == 64 &&
5866         type == TCG_TYPE_I32) {
5867         ofs += 4;
5868     }
5869     return ofs;
5870 }
5871 
5872 static void tcg_out_helper_load_slots(TCGContext *s,
5873                                       unsigned nmov, TCGMovExtend *mov,
5874                                       const TCGLdstHelperParam *parm)
5875 {
5876     unsigned i;
5877     TCGReg dst3;
5878 
5879     /*
5880      * Start from the end, storing to the stack first.
5881      * This frees those registers, so we need not consider overlap.
5882      */
5883     for (i = nmov; i-- > 0; ) {
5884         unsigned slot = mov[i].dst;
5885 
5886         if (arg_slot_reg_p(slot)) {
5887             goto found_reg;
5888         }
5889 
5890         TCGReg src = mov[i].src;
5891         TCGType dst_type = mov[i].dst_type;
5892         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5893 
5894         /* The argument is going onto the stack; extend into scratch. */
5895         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5896             tcg_debug_assert(parm->ntmp != 0);
5897             mov[i].dst = src = parm->tmp[0];
5898             tcg_out_movext1(s, &mov[i]);
5899         }
5900 
5901         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5902                    tcg_out_helper_stk_ofs(dst_type, slot));
5903     }
5904     return;
5905 
5906  found_reg:
5907     /*
5908      * The remaining arguments are in registers.
5909      * Convert slot numbers to argument registers.
5910      */
5911     nmov = i + 1;
5912     for (i = 0; i < nmov; ++i) {
5913         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5914     }
5915 
5916     switch (nmov) {
5917     case 4:
5918         /* The backend must have provided enough temps for the worst case. */
5919         tcg_debug_assert(parm->ntmp >= 2);
5920 
5921         dst3 = mov[3].dst;
5922         for (unsigned j = 0; j < 3; ++j) {
5923             if (dst3 == mov[j].src) {
5924                 /*
5925                  * Conflict. Copy the source to a temporary, perform the
5926                  * remaining moves, then the extension from our scratch
5927                  * on the way out.
5928                  */
5929                 TCGReg scratch = parm->tmp[1];
5930 
5931                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5932                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5933                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5934                 break;
5935             }
5936         }
5937 
5938         /* No conflicts: perform this move and continue. */
5939         tcg_out_movext1(s, &mov[3]);
5940         /* fall through */
5941 
5942     case 3:
5943         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5944                         parm->ntmp ? parm->tmp[0] : -1);
5945         break;
5946     case 2:
5947         tcg_out_movext2(s, mov, mov + 1,
5948                         parm->ntmp ? parm->tmp[0] : -1);
5949         break;
5950     case 1:
5951         tcg_out_movext1(s, mov);
5952         break;
5953     default:
5954         g_assert_not_reached();
5955     }
5956 }
5957 
5958 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5959                                     TCGType type, tcg_target_long imm,
5960                                     const TCGLdstHelperParam *parm)
5961 {
5962     if (arg_slot_reg_p(slot)) {
5963         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5964     } else {
5965         int ofs = tcg_out_helper_stk_ofs(type, slot);
5966         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5967             tcg_debug_assert(parm->ntmp != 0);
5968             tcg_out_movi(s, type, parm->tmp[0], imm);
5969             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5970         }
5971     }
5972 }
5973 
5974 static void tcg_out_helper_load_common_args(TCGContext *s,
5975                                             const TCGLabelQemuLdst *ldst,
5976                                             const TCGLdstHelperParam *parm,
5977                                             const TCGHelperInfo *info,
5978                                             unsigned next_arg)
5979 {
5980     TCGMovExtend ptr_mov = {
5981         .dst_type = TCG_TYPE_PTR,
5982         .src_type = TCG_TYPE_PTR,
5983         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5984     };
5985     const TCGCallArgumentLoc *loc = &info->in[0];
5986     TCGType type;
5987     unsigned slot;
5988     tcg_target_ulong imm;
5989 
5990     /*
5991      * Handle env, which is always first.
5992      */
5993     ptr_mov.dst = loc->arg_slot;
5994     ptr_mov.src = TCG_AREG0;
5995     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5996 
5997     /*
5998      * Handle oi.
5999      */
6000     imm = ldst->oi;
6001     loc = &info->in[next_arg];
6002     type = TCG_TYPE_I32;
6003     switch (loc->kind) {
6004     case TCG_CALL_ARG_NORMAL:
6005         break;
6006     case TCG_CALL_ARG_EXTEND_U:
6007     case TCG_CALL_ARG_EXTEND_S:
6008         /* No extension required for MemOpIdx. */
6009         tcg_debug_assert(imm <= INT32_MAX);
6010         type = TCG_TYPE_REG;
6011         break;
6012     default:
6013         g_assert_not_reached();
6014     }
6015     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6016     next_arg++;
6017 
6018     /*
6019      * Handle ra.
6020      */
6021     loc = &info->in[next_arg];
6022     slot = loc->arg_slot;
6023     if (parm->ra_gen) {
6024         int arg_reg = -1;
6025         TCGReg ra_reg;
6026 
6027         if (arg_slot_reg_p(slot)) {
6028             arg_reg = tcg_target_call_iarg_regs[slot];
6029         }
6030         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6031 
6032         ptr_mov.dst = slot;
6033         ptr_mov.src = ra_reg;
6034         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6035     } else {
6036         imm = (uintptr_t)ldst->raddr;
6037         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6038     }
6039 }
6040 
6041 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6042                                        const TCGCallArgumentLoc *loc,
6043                                        TCGType dst_type, TCGType src_type,
6044                                        TCGReg lo, TCGReg hi)
6045 {
6046     MemOp reg_mo;
6047 
6048     if (dst_type <= TCG_TYPE_REG) {
6049         MemOp src_ext;
6050 
6051         switch (loc->kind) {
6052         case TCG_CALL_ARG_NORMAL:
6053             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6054             break;
6055         case TCG_CALL_ARG_EXTEND_U:
6056             dst_type = TCG_TYPE_REG;
6057             src_ext = MO_UL;
6058             break;
6059         case TCG_CALL_ARG_EXTEND_S:
6060             dst_type = TCG_TYPE_REG;
6061             src_ext = MO_SL;
6062             break;
6063         default:
6064             g_assert_not_reached();
6065         }
6066 
6067         mov[0].dst = loc->arg_slot;
6068         mov[0].dst_type = dst_type;
6069         mov[0].src = lo;
6070         mov[0].src_type = src_type;
6071         mov[0].src_ext = src_ext;
6072         return 1;
6073     }
6074 
6075     if (TCG_TARGET_REG_BITS == 32) {
6076         assert(dst_type == TCG_TYPE_I64);
6077         reg_mo = MO_32;
6078     } else {
6079         assert(dst_type == TCG_TYPE_I128);
6080         reg_mo = MO_64;
6081     }
6082 
6083     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6084     mov[0].src = lo;
6085     mov[0].dst_type = TCG_TYPE_REG;
6086     mov[0].src_type = TCG_TYPE_REG;
6087     mov[0].src_ext = reg_mo;
6088 
6089     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6090     mov[1].src = hi;
6091     mov[1].dst_type = TCG_TYPE_REG;
6092     mov[1].src_type = TCG_TYPE_REG;
6093     mov[1].src_ext = reg_mo;
6094 
6095     return 2;
6096 }
6097 
6098 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6099                                    const TCGLdstHelperParam *parm)
6100 {
6101     const TCGHelperInfo *info;
6102     const TCGCallArgumentLoc *loc;
6103     TCGMovExtend mov[2];
6104     unsigned next_arg, nmov;
6105     MemOp mop = get_memop(ldst->oi);
6106 
6107     switch (mop & MO_SIZE) {
6108     case MO_8:
6109     case MO_16:
6110     case MO_32:
6111         info = &info_helper_ld32_mmu;
6112         break;
6113     case MO_64:
6114         info = &info_helper_ld64_mmu;
6115         break;
6116     case MO_128:
6117         info = &info_helper_ld128_mmu;
6118         break;
6119     default:
6120         g_assert_not_reached();
6121     }
6122 
6123     /* Defer env argument. */
6124     next_arg = 1;
6125 
6126     loc = &info->in[next_arg];
6127     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6128         /*
6129          * 32-bit host with 32-bit guest: zero-extend the guest address
6130          * to 64-bits for the helper by storing the low part, then
6131          * load a zero for the high part.
6132          */
6133         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6134                                TCG_TYPE_I32, TCG_TYPE_I32,
6135                                ldst->addr_reg, -1);
6136         tcg_out_helper_load_slots(s, 1, mov, parm);
6137 
6138         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6139                                 TCG_TYPE_I32, 0, parm);
6140         next_arg += 2;
6141     } else {
6142         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6143                                       ldst->addr_reg, -1);
6144         tcg_out_helper_load_slots(s, nmov, mov, parm);
6145         next_arg += nmov;
6146     }
6147 
6148     switch (info->out_kind) {
6149     case TCG_CALL_RET_NORMAL:
6150     case TCG_CALL_RET_BY_VEC:
6151         break;
6152     case TCG_CALL_RET_BY_REF:
6153         /*
6154          * The return reference is in the first argument slot.
6155          * We need memory in which to return: re-use the top of stack.
6156          */
6157         {
6158             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6159 
6160             if (arg_slot_reg_p(0)) {
6161                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6162                                  TCG_REG_CALL_STACK, ofs_slot0);
6163             } else {
6164                 tcg_debug_assert(parm->ntmp != 0);
6165                 tcg_out_addi_ptr(s, parm->tmp[0],
6166                                  TCG_REG_CALL_STACK, ofs_slot0);
6167                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6168                            TCG_REG_CALL_STACK, ofs_slot0);
6169             }
6170         }
6171         break;
6172     default:
6173         g_assert_not_reached();
6174     }
6175 
6176     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6177 }
6178 
6179 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6180                                   bool load_sign,
6181                                   const TCGLdstHelperParam *parm)
6182 {
6183     MemOp mop = get_memop(ldst->oi);
6184     TCGMovExtend mov[2];
6185     int ofs_slot0;
6186 
6187     switch (ldst->type) {
6188     case TCG_TYPE_I64:
6189         if (TCG_TARGET_REG_BITS == 32) {
6190             break;
6191         }
6192         /* fall through */
6193 
6194     case TCG_TYPE_I32:
6195         mov[0].dst = ldst->datalo_reg;
6196         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6197         mov[0].dst_type = ldst->type;
6198         mov[0].src_type = TCG_TYPE_REG;
6199 
6200         /*
6201          * If load_sign, then we allowed the helper to perform the
6202          * appropriate sign extension to tcg_target_ulong, and all
6203          * we need now is a plain move.
6204          *
6205          * If they do not, then we expect the relevant extension
6206          * instruction to be no more expensive than a move, and
6207          * we thus save the icache etc by only using one of two
6208          * helper functions.
6209          */
6210         if (load_sign || !(mop & MO_SIGN)) {
6211             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6212                 mov[0].src_ext = MO_32;
6213             } else {
6214                 mov[0].src_ext = MO_64;
6215             }
6216         } else {
6217             mov[0].src_ext = mop & MO_SSIZE;
6218         }
6219         tcg_out_movext1(s, mov);
6220         return;
6221 
6222     case TCG_TYPE_I128:
6223         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6224         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6225         switch (TCG_TARGET_CALL_RET_I128) {
6226         case TCG_CALL_RET_NORMAL:
6227             break;
6228         case TCG_CALL_RET_BY_VEC:
6229             tcg_out_st(s, TCG_TYPE_V128,
6230                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6231                        TCG_REG_CALL_STACK, ofs_slot0);
6232             /* fall through */
6233         case TCG_CALL_RET_BY_REF:
6234             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6235                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6236             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6237                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6238             return;
6239         default:
6240             g_assert_not_reached();
6241         }
6242         break;
6243 
6244     default:
6245         g_assert_not_reached();
6246     }
6247 
6248     mov[0].dst = ldst->datalo_reg;
6249     mov[0].src =
6250         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6251     mov[0].dst_type = TCG_TYPE_REG;
6252     mov[0].src_type = TCG_TYPE_REG;
6253     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6254 
6255     mov[1].dst = ldst->datahi_reg;
6256     mov[1].src =
6257         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6258     mov[1].dst_type = TCG_TYPE_REG;
6259     mov[1].src_type = TCG_TYPE_REG;
6260     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6261 
6262     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6263 }
6264 
6265 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6266                                    const TCGLdstHelperParam *parm)
6267 {
6268     const TCGHelperInfo *info;
6269     const TCGCallArgumentLoc *loc;
6270     TCGMovExtend mov[4];
6271     TCGType data_type;
6272     unsigned next_arg, nmov, n;
6273     MemOp mop = get_memop(ldst->oi);
6274 
6275     switch (mop & MO_SIZE) {
6276     case MO_8:
6277     case MO_16:
6278     case MO_32:
6279         info = &info_helper_st32_mmu;
6280         data_type = TCG_TYPE_I32;
6281         break;
6282     case MO_64:
6283         info = &info_helper_st64_mmu;
6284         data_type = TCG_TYPE_I64;
6285         break;
6286     case MO_128:
6287         info = &info_helper_st128_mmu;
6288         data_type = TCG_TYPE_I128;
6289         break;
6290     default:
6291         g_assert_not_reached();
6292     }
6293 
6294     /* Defer env argument. */
6295     next_arg = 1;
6296     nmov = 0;
6297 
6298     /* Handle addr argument. */
6299     loc = &info->in[next_arg];
6300     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6301     if (TCG_TARGET_REG_BITS == 32) {
6302         /*
6303          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6304          * to 64-bits for the helper by storing the low part.  Later,
6305          * after we have processed the register inputs, we will load a
6306          * zero for the high part.
6307          */
6308         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6309                                TCG_TYPE_I32, TCG_TYPE_I32,
6310                                ldst->addr_reg, -1);
6311         next_arg += 2;
6312         nmov += 1;
6313     } else {
6314         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6315                                    ldst->addr_reg, -1);
6316         next_arg += n;
6317         nmov += n;
6318     }
6319 
6320     /* Handle data argument. */
6321     loc = &info->in[next_arg];
6322     switch (loc->kind) {
6323     case TCG_CALL_ARG_NORMAL:
6324     case TCG_CALL_ARG_EXTEND_U:
6325     case TCG_CALL_ARG_EXTEND_S:
6326         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6327                                    ldst->datalo_reg, ldst->datahi_reg);
6328         next_arg += n;
6329         nmov += n;
6330         tcg_out_helper_load_slots(s, nmov, mov, parm);
6331         break;
6332 
6333     case TCG_CALL_ARG_BY_REF:
6334         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6335         tcg_debug_assert(data_type == TCG_TYPE_I128);
6336         tcg_out_st(s, TCG_TYPE_I64,
6337                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6338                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6339         tcg_out_st(s, TCG_TYPE_I64,
6340                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6341                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6342 
6343         tcg_out_helper_load_slots(s, nmov, mov, parm);
6344 
6345         if (arg_slot_reg_p(loc->arg_slot)) {
6346             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6347                              TCG_REG_CALL_STACK,
6348                              arg_slot_stk_ofs(loc->ref_slot));
6349         } else {
6350             tcg_debug_assert(parm->ntmp != 0);
6351             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6352                              arg_slot_stk_ofs(loc->ref_slot));
6353             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6354                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6355         }
6356         next_arg += 2;
6357         break;
6358 
6359     default:
6360         g_assert_not_reached();
6361     }
6362 
6363     if (TCG_TARGET_REG_BITS == 32) {
6364         /* Zero extend the address by loading a zero for the high part. */
6365         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6366         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6367     }
6368 
6369     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6370 }
6371 
6372 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6373 {
6374     int i, start_words, num_insns;
6375     TCGOp *op;
6376 
6377     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6378                  && qemu_log_in_addr_range(pc_start))) {
6379         FILE *logfile = qemu_log_trylock();
6380         if (logfile) {
6381             fprintf(logfile, "OP:\n");
6382             tcg_dump_ops(s, logfile, false);
6383             fprintf(logfile, "\n");
6384             qemu_log_unlock(logfile);
6385         }
6386     }
6387 
6388 #ifdef CONFIG_DEBUG_TCG
6389     /* Ensure all labels referenced have been emitted.  */
6390     {
6391         TCGLabel *l;
6392         bool error = false;
6393 
6394         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6395             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6396                 qemu_log_mask(CPU_LOG_TB_OP,
6397                               "$L%d referenced but not present.\n", l->id);
6398                 error = true;
6399             }
6400         }
6401         assert(!error);
6402     }
6403 #endif
6404 
6405     /* Do not reuse any EBB that may be allocated within the TB. */
6406     tcg_temp_ebb_reset_freed(s);
6407 
6408     tcg_optimize(s);
6409 
6410     reachable_code_pass(s);
6411     liveness_pass_0(s);
6412     liveness_pass_1(s);
6413 
6414     if (s->nb_indirects > 0) {
6415         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6416                      && qemu_log_in_addr_range(pc_start))) {
6417             FILE *logfile = qemu_log_trylock();
6418             if (logfile) {
6419                 fprintf(logfile, "OP before indirect lowering:\n");
6420                 tcg_dump_ops(s, logfile, false);
6421                 fprintf(logfile, "\n");
6422                 qemu_log_unlock(logfile);
6423             }
6424         }
6425 
6426         /* Replace indirect temps with direct temps.  */
6427         if (liveness_pass_2(s)) {
6428             /* If changes were made, re-run liveness.  */
6429             liveness_pass_1(s);
6430         }
6431     }
6432 
6433     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6434                  && qemu_log_in_addr_range(pc_start))) {
6435         FILE *logfile = qemu_log_trylock();
6436         if (logfile) {
6437             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6438             tcg_dump_ops(s, logfile, true);
6439             fprintf(logfile, "\n");
6440             qemu_log_unlock(logfile);
6441         }
6442     }
6443 
6444     /* Initialize goto_tb jump offsets. */
6445     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6446     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6447     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6448     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6449 
6450     tcg_reg_alloc_start(s);
6451 
6452     /*
6453      * Reset the buffer pointers when restarting after overflow.
6454      * TODO: Move this into translate-all.c with the rest of the
6455      * buffer management.  Having only this done here is confusing.
6456      */
6457     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6458     s->code_ptr = s->code_buf;
6459     s->data_gen_ptr = NULL;
6460 
6461     QSIMPLEQ_INIT(&s->ldst_labels);
6462     s->pool_labels = NULL;
6463 
6464     start_words = s->insn_start_words;
6465     s->gen_insn_data =
6466         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6467 
6468     tcg_out_tb_start(s);
6469 
6470     num_insns = -1;
6471     QTAILQ_FOREACH(op, &s->ops, link) {
6472         TCGOpcode opc = op->opc;
6473 
6474         switch (opc) {
6475         case INDEX_op_mov:
6476         case INDEX_op_mov_vec:
6477             tcg_reg_alloc_mov(s, op);
6478             break;
6479         case INDEX_op_dup_vec:
6480             tcg_reg_alloc_dup(s, op);
6481             break;
6482         case INDEX_op_insn_start:
6483             if (num_insns >= 0) {
6484                 size_t off = tcg_current_code_size(s);
6485                 s->gen_insn_end_off[num_insns] = off;
6486                 /* Assert that we do not overflow our stored offset.  */
6487                 assert(s->gen_insn_end_off[num_insns] == off);
6488             }
6489             num_insns++;
6490             for (i = 0; i < start_words; ++i) {
6491                 s->gen_insn_data[num_insns * start_words + i] =
6492                     tcg_get_insn_start_param(op, i);
6493             }
6494             break;
6495         case INDEX_op_discard:
6496             temp_dead(s, arg_temp(op->args[0]));
6497             break;
6498         case INDEX_op_set_label:
6499             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6500             tcg_out_label(s, arg_label(op->args[0]));
6501             break;
6502         case INDEX_op_call:
6503             tcg_reg_alloc_call(s, op);
6504             break;
6505         case INDEX_op_exit_tb:
6506             tcg_out_exit_tb(s, op->args[0]);
6507             break;
6508         case INDEX_op_goto_tb:
6509             tcg_out_goto_tb(s, op->args[0]);
6510             break;
6511         case INDEX_op_dup2_vec:
6512             if (tcg_reg_alloc_dup2(s, op)) {
6513                 break;
6514             }
6515             /* fall through */
6516         default:
6517             /* Sanity check that we've not introduced any unhandled opcodes. */
6518             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6519                                               TCGOP_FLAGS(op)));
6520             /* Note: in order to speed up the code, it would be much
6521                faster to have specialized register allocator functions for
6522                some common argument patterns */
6523             tcg_reg_alloc_op(s, op);
6524             break;
6525         }
6526         /* Test for (pending) buffer overflow.  The assumption is that any
6527            one operation beginning below the high water mark cannot overrun
6528            the buffer completely.  Thus we can test for overflow after
6529            generating code without having to check during generation.  */
6530         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6531             return -1;
6532         }
6533         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6534         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6535             return -2;
6536         }
6537     }
6538     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6539     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6540 
6541     /* Generate TB finalization at the end of block */
6542     i = tcg_out_ldst_finalize(s);
6543     if (i < 0) {
6544         return i;
6545     }
6546     i = tcg_out_pool_finalize(s);
6547     if (i < 0) {
6548         return i;
6549     }
6550     if (!tcg_resolve_relocs(s)) {
6551         return -2;
6552     }
6553 
6554 #ifndef CONFIG_TCG_INTERPRETER
6555     /* flush instruction cache */
6556     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6557                         (uintptr_t)s->code_buf,
6558                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6559 #endif
6560 
6561     return tcg_current_code_size(s);
6562 }
6563 
6564 #ifdef ELF_HOST_MACHINE
6565 /* In order to use this feature, the backend needs to do three things:
6566 
6567    (1) Define ELF_HOST_MACHINE to indicate both what value to
6568        put into the ELF image and to indicate support for the feature.
6569 
6570    (2) Define tcg_register_jit.  This should create a buffer containing
6571        the contents of a .debug_frame section that describes the post-
6572        prologue unwind info for the tcg machine.
6573 
6574    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6575 */
6576 
6577 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6578 typedef enum {
6579     JIT_NOACTION = 0,
6580     JIT_REGISTER_FN,
6581     JIT_UNREGISTER_FN
6582 } jit_actions_t;
6583 
6584 struct jit_code_entry {
6585     struct jit_code_entry *next_entry;
6586     struct jit_code_entry *prev_entry;
6587     const void *symfile_addr;
6588     uint64_t symfile_size;
6589 };
6590 
6591 struct jit_descriptor {
6592     uint32_t version;
6593     uint32_t action_flag;
6594     struct jit_code_entry *relevant_entry;
6595     struct jit_code_entry *first_entry;
6596 };
6597 
6598 void __jit_debug_register_code(void) __attribute__((noinline));
6599 void __jit_debug_register_code(void)
6600 {
6601     asm("");
6602 }
6603 
6604 /* Must statically initialize the version, because GDB may check
6605    the version before we can set it.  */
6606 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6607 
6608 /* End GDB interface.  */
6609 
6610 static int find_string(const char *strtab, const char *str)
6611 {
6612     const char *p = strtab + 1;
6613 
6614     while (1) {
6615         if (strcmp(p, str) == 0) {
6616             return p - strtab;
6617         }
6618         p += strlen(p) + 1;
6619     }
6620 }
6621 
6622 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6623                                  const void *debug_frame,
6624                                  size_t debug_frame_size)
6625 {
6626     struct __attribute__((packed)) DebugInfo {
6627         uint32_t  len;
6628         uint16_t  version;
6629         uint32_t  abbrev;
6630         uint8_t   ptr_size;
6631         uint8_t   cu_die;
6632         uint16_t  cu_lang;
6633         uintptr_t cu_low_pc;
6634         uintptr_t cu_high_pc;
6635         uint8_t   fn_die;
6636         char      fn_name[16];
6637         uintptr_t fn_low_pc;
6638         uintptr_t fn_high_pc;
6639         uint8_t   cu_eoc;
6640     };
6641 
6642     struct ElfImage {
6643         ElfW(Ehdr) ehdr;
6644         ElfW(Phdr) phdr;
6645         ElfW(Shdr) shdr[7];
6646         ElfW(Sym)  sym[2];
6647         struct DebugInfo di;
6648         uint8_t    da[24];
6649         char       str[80];
6650     };
6651 
6652     struct ElfImage *img;
6653 
6654     static const struct ElfImage img_template = {
6655         .ehdr = {
6656             .e_ident[EI_MAG0] = ELFMAG0,
6657             .e_ident[EI_MAG1] = ELFMAG1,
6658             .e_ident[EI_MAG2] = ELFMAG2,
6659             .e_ident[EI_MAG3] = ELFMAG3,
6660             .e_ident[EI_CLASS] = ELF_CLASS,
6661             .e_ident[EI_DATA] = ELF_DATA,
6662             .e_ident[EI_VERSION] = EV_CURRENT,
6663             .e_type = ET_EXEC,
6664             .e_machine = ELF_HOST_MACHINE,
6665             .e_version = EV_CURRENT,
6666             .e_phoff = offsetof(struct ElfImage, phdr),
6667             .e_shoff = offsetof(struct ElfImage, shdr),
6668             .e_ehsize = sizeof(ElfW(Shdr)),
6669             .e_phentsize = sizeof(ElfW(Phdr)),
6670             .e_phnum = 1,
6671             .e_shentsize = sizeof(ElfW(Shdr)),
6672             .e_shnum = ARRAY_SIZE(img->shdr),
6673             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6674 #ifdef ELF_HOST_FLAGS
6675             .e_flags = ELF_HOST_FLAGS,
6676 #endif
6677 #ifdef ELF_OSABI
6678             .e_ident[EI_OSABI] = ELF_OSABI,
6679 #endif
6680         },
6681         .phdr = {
6682             .p_type = PT_LOAD,
6683             .p_flags = PF_X,
6684         },
6685         .shdr = {
6686             [0] = { .sh_type = SHT_NULL },
6687             /* Trick: The contents of code_gen_buffer are not present in
6688                this fake ELF file; that got allocated elsewhere.  Therefore
6689                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6690                will not look for contents.  We can record any address.  */
6691             [1] = { /* .text */
6692                 .sh_type = SHT_NOBITS,
6693                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6694             },
6695             [2] = { /* .debug_info */
6696                 .sh_type = SHT_PROGBITS,
6697                 .sh_offset = offsetof(struct ElfImage, di),
6698                 .sh_size = sizeof(struct DebugInfo),
6699             },
6700             [3] = { /* .debug_abbrev */
6701                 .sh_type = SHT_PROGBITS,
6702                 .sh_offset = offsetof(struct ElfImage, da),
6703                 .sh_size = sizeof(img->da),
6704             },
6705             [4] = { /* .debug_frame */
6706                 .sh_type = SHT_PROGBITS,
6707                 .sh_offset = sizeof(struct ElfImage),
6708             },
6709             [5] = { /* .symtab */
6710                 .sh_type = SHT_SYMTAB,
6711                 .sh_offset = offsetof(struct ElfImage, sym),
6712                 .sh_size = sizeof(img->sym),
6713                 .sh_info = 1,
6714                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6715                 .sh_entsize = sizeof(ElfW(Sym)),
6716             },
6717             [6] = { /* .strtab */
6718                 .sh_type = SHT_STRTAB,
6719                 .sh_offset = offsetof(struct ElfImage, str),
6720                 .sh_size = sizeof(img->str),
6721             }
6722         },
6723         .sym = {
6724             [1] = { /* code_gen_buffer */
6725                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6726                 .st_shndx = 1,
6727             }
6728         },
6729         .di = {
6730             .len = sizeof(struct DebugInfo) - 4,
6731             .version = 2,
6732             .ptr_size = sizeof(void *),
6733             .cu_die = 1,
6734             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6735             .fn_die = 2,
6736             .fn_name = "code_gen_buffer"
6737         },
6738         .da = {
6739             1,          /* abbrev number (the cu) */
6740             0x11, 1,    /* DW_TAG_compile_unit, has children */
6741             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6742             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6743             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6744             0, 0,       /* end of abbrev */
6745             2,          /* abbrev number (the fn) */
6746             0x2e, 0,    /* DW_TAG_subprogram, no children */
6747             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6748             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6749             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6750             0, 0,       /* end of abbrev */
6751             0           /* no more abbrev */
6752         },
6753         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6754                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6755     };
6756 
6757     /* We only need a single jit entry; statically allocate it.  */
6758     static struct jit_code_entry one_entry;
6759 
6760     uintptr_t buf = (uintptr_t)buf_ptr;
6761     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6762     DebugFrameHeader *dfh;
6763 
6764     img = g_malloc(img_size);
6765     *img = img_template;
6766 
6767     img->phdr.p_vaddr = buf;
6768     img->phdr.p_paddr = buf;
6769     img->phdr.p_memsz = buf_size;
6770 
6771     img->shdr[1].sh_name = find_string(img->str, ".text");
6772     img->shdr[1].sh_addr = buf;
6773     img->shdr[1].sh_size = buf_size;
6774 
6775     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6776     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6777 
6778     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6779     img->shdr[4].sh_size = debug_frame_size;
6780 
6781     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6782     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6783 
6784     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6785     img->sym[1].st_value = buf;
6786     img->sym[1].st_size = buf_size;
6787 
6788     img->di.cu_low_pc = buf;
6789     img->di.cu_high_pc = buf + buf_size;
6790     img->di.fn_low_pc = buf;
6791     img->di.fn_high_pc = buf + buf_size;
6792 
6793     dfh = (DebugFrameHeader *)(img + 1);
6794     memcpy(dfh, debug_frame, debug_frame_size);
6795     dfh->fde.func_start = buf;
6796     dfh->fde.func_len = buf_size;
6797 
6798 #ifdef DEBUG_JIT
6799     /* Enable this block to be able to debug the ELF image file creation.
6800        One can use readelf, objdump, or other inspection utilities.  */
6801     {
6802         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6803         FILE *f = fopen(jit, "w+b");
6804         if (f) {
6805             if (fwrite(img, img_size, 1, f) != img_size) {
6806                 /* Avoid stupid unused return value warning for fwrite.  */
6807             }
6808             fclose(f);
6809         }
6810     }
6811 #endif
6812 
6813     one_entry.symfile_addr = img;
6814     one_entry.symfile_size = img_size;
6815 
6816     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6817     __jit_debug_descriptor.relevant_entry = &one_entry;
6818     __jit_debug_descriptor.first_entry = &one_entry;
6819     __jit_debug_register_code();
6820 }
6821 #else
6822 /* No support for the feature.  Provide the entry point expected by exec.c,
6823    and implement the internal function we declared earlier.  */
6824 
6825 static void tcg_register_jit_int(const void *buf, size_t size,
6826                                  const void *debug_frame,
6827                                  size_t debug_frame_size)
6828 {
6829 }
6830 
6831 void tcg_register_jit(const void *buf, size_t buf_size)
6832 {
6833 }
6834 #endif /* ELF_HOST_MACHINE */
6835 
6836 #if !TCG_TARGET_MAYBE_vec
6837 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6838 {
6839     g_assert_not_reached();
6840 }
6841 #endif
6842