xref: /openbmc/qemu/tcg/tcg.c (revision 6971358747d8998a5770d1bf997495d3061d6c6a)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpUnary {
990     TCGOutOp base;
991     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
992 } TCGOutOpUnary;
993 
994 typedef struct TCGOutOpSubtract {
995     TCGOutOp base;
996     void (*out_rrr)(TCGContext *s, TCGType type,
997                     TCGReg a0, TCGReg a1, TCGReg a2);
998     void (*out_rir)(TCGContext *s, TCGType type,
999                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1000 } TCGOutOpSubtract;
1001 
1002 #include "tcg-target.c.inc"
1003 
1004 #ifndef CONFIG_TCG_INTERPRETER
1005 /* Validate CPUTLBDescFast placement. */
1006 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1007                         sizeof(CPUNegativeOffsetState))
1008                   < MIN_TLB_MASK_TABLE_OFS);
1009 #endif
1010 
1011 /*
1012  * Register V as the TCGOutOp for O.
1013  * This verifies that V is of type T, otherwise give a nice compiler error.
1014  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1015  */
1016 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1017 
1018 /* Register allocation descriptions for every TCGOpcode. */
1019 static const TCGOutOp * const all_outop[NB_OPS] = {
1020     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1021     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1022     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1023     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1024     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1025     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1026     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1027     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1028     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1029     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1030     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1031 };
1032 
1033 #undef OUTOP
1034 
1035 /*
1036  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1037  * and registered the target's TCG globals) must register with this function
1038  * before initiating translation.
1039  *
1040  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1041  * of tcg_region_init() for the reasoning behind this.
1042  *
1043  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1044  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1045  * is not used anymore for translation once this function is called.
1046  *
1047  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1048  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1049  * modes.
1050  */
1051 #ifdef CONFIG_USER_ONLY
1052 void tcg_register_thread(void)
1053 {
1054     tcg_ctx = &tcg_init_ctx;
1055 }
1056 #else
1057 void tcg_register_thread(void)
1058 {
1059     TCGContext *s = g_malloc(sizeof(*s));
1060     unsigned int i, n;
1061 
1062     *s = tcg_init_ctx;
1063 
1064     /* Relink mem_base.  */
1065     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1066         if (tcg_init_ctx.temps[i].mem_base) {
1067             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1068             tcg_debug_assert(b >= 0 && b < n);
1069             s->temps[i].mem_base = &s->temps[b];
1070         }
1071     }
1072 
1073     /* Claim an entry in tcg_ctxs */
1074     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1075     g_assert(n < tcg_max_ctxs);
1076     qatomic_set(&tcg_ctxs[n], s);
1077 
1078     if (n > 0) {
1079         tcg_region_initial_alloc(s);
1080     }
1081 
1082     tcg_ctx = s;
1083 }
1084 #endif /* !CONFIG_USER_ONLY */
1085 
1086 /* pool based memory allocation */
1087 void *tcg_malloc_internal(TCGContext *s, int size)
1088 {
1089     TCGPool *p;
1090     int pool_size;
1091 
1092     if (size > TCG_POOL_CHUNK_SIZE) {
1093         /* big malloc: insert a new pool (XXX: could optimize) */
1094         p = g_malloc(sizeof(TCGPool) + size);
1095         p->size = size;
1096         p->next = s->pool_first_large;
1097         s->pool_first_large = p;
1098         return p->data;
1099     } else {
1100         p = s->pool_current;
1101         if (!p) {
1102             p = s->pool_first;
1103             if (!p)
1104                 goto new_pool;
1105         } else {
1106             if (!p->next) {
1107             new_pool:
1108                 pool_size = TCG_POOL_CHUNK_SIZE;
1109                 p = g_malloc(sizeof(TCGPool) + pool_size);
1110                 p->size = pool_size;
1111                 p->next = NULL;
1112                 if (s->pool_current) {
1113                     s->pool_current->next = p;
1114                 } else {
1115                     s->pool_first = p;
1116                 }
1117             } else {
1118                 p = p->next;
1119             }
1120         }
1121     }
1122     s->pool_current = p;
1123     s->pool_cur = p->data + size;
1124     s->pool_end = p->data + p->size;
1125     return p->data;
1126 }
1127 
1128 void tcg_pool_reset(TCGContext *s)
1129 {
1130     TCGPool *p, *t;
1131     for (p = s->pool_first_large; p; p = t) {
1132         t = p->next;
1133         g_free(p);
1134     }
1135     s->pool_first_large = NULL;
1136     s->pool_cur = s->pool_end = NULL;
1137     s->pool_current = NULL;
1138 }
1139 
1140 /*
1141  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1142  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1143  * We only use these for layout in tcg_out_ld_helper_ret and
1144  * tcg_out_st_helper_args, and share them between several of
1145  * the helpers, with the end result that it's easier to build manually.
1146  */
1147 
1148 #if TCG_TARGET_REG_BITS == 32
1149 # define dh_typecode_ttl  dh_typecode_i32
1150 #else
1151 # define dh_typecode_ttl  dh_typecode_i64
1152 #endif
1153 
1154 static TCGHelperInfo info_helper_ld32_mmu = {
1155     .flags = TCG_CALL_NO_WG,
1156     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1157               | dh_typemask(env, 1)
1158               | dh_typemask(i64, 2)  /* uint64_t addr */
1159               | dh_typemask(i32, 3)  /* unsigned oi */
1160               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1161 };
1162 
1163 static TCGHelperInfo info_helper_ld64_mmu = {
1164     .flags = TCG_CALL_NO_WG,
1165     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1166               | dh_typemask(env, 1)
1167               | dh_typemask(i64, 2)  /* uint64_t addr */
1168               | dh_typemask(i32, 3)  /* unsigned oi */
1169               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1170 };
1171 
1172 static TCGHelperInfo info_helper_ld128_mmu = {
1173     .flags = TCG_CALL_NO_WG,
1174     .typemask = dh_typemask(i128, 0) /* return Int128 */
1175               | dh_typemask(env, 1)
1176               | dh_typemask(i64, 2)  /* uint64_t addr */
1177               | dh_typemask(i32, 3)  /* unsigned oi */
1178               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1179 };
1180 
1181 static TCGHelperInfo info_helper_st32_mmu = {
1182     .flags = TCG_CALL_NO_WG,
1183     .typemask = dh_typemask(void, 0)
1184               | dh_typemask(env, 1)
1185               | dh_typemask(i64, 2)  /* uint64_t addr */
1186               | dh_typemask(i32, 3)  /* uint32_t data */
1187               | dh_typemask(i32, 4)  /* unsigned oi */
1188               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1189 };
1190 
1191 static TCGHelperInfo info_helper_st64_mmu = {
1192     .flags = TCG_CALL_NO_WG,
1193     .typemask = dh_typemask(void, 0)
1194               | dh_typemask(env, 1)
1195               | dh_typemask(i64, 2)  /* uint64_t addr */
1196               | dh_typemask(i64, 3)  /* uint64_t data */
1197               | dh_typemask(i32, 4)  /* unsigned oi */
1198               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1199 };
1200 
1201 static TCGHelperInfo info_helper_st128_mmu = {
1202     .flags = TCG_CALL_NO_WG,
1203     .typemask = dh_typemask(void, 0)
1204               | dh_typemask(env, 1)
1205               | dh_typemask(i64, 2)  /* uint64_t addr */
1206               | dh_typemask(i128, 3) /* Int128 data */
1207               | dh_typemask(i32, 4)  /* unsigned oi */
1208               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1209 };
1210 
1211 #ifdef CONFIG_TCG_INTERPRETER
1212 static ffi_type *typecode_to_ffi(int argmask)
1213 {
1214     /*
1215      * libffi does not support __int128_t, so we have forced Int128
1216      * to use the structure definition instead of the builtin type.
1217      */
1218     static ffi_type *ffi_type_i128_elements[3] = {
1219         &ffi_type_uint64,
1220         &ffi_type_uint64,
1221         NULL
1222     };
1223     static ffi_type ffi_type_i128 = {
1224         .size = 16,
1225         .alignment = __alignof__(Int128),
1226         .type = FFI_TYPE_STRUCT,
1227         .elements = ffi_type_i128_elements,
1228     };
1229 
1230     switch (argmask) {
1231     case dh_typecode_void:
1232         return &ffi_type_void;
1233     case dh_typecode_i32:
1234         return &ffi_type_uint32;
1235     case dh_typecode_s32:
1236         return &ffi_type_sint32;
1237     case dh_typecode_i64:
1238         return &ffi_type_uint64;
1239     case dh_typecode_s64:
1240         return &ffi_type_sint64;
1241     case dh_typecode_ptr:
1242         return &ffi_type_pointer;
1243     case dh_typecode_i128:
1244         return &ffi_type_i128;
1245     }
1246     g_assert_not_reached();
1247 }
1248 
1249 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1250 {
1251     unsigned typemask = info->typemask;
1252     struct {
1253         ffi_cif cif;
1254         ffi_type *args[];
1255     } *ca;
1256     ffi_status status;
1257     int nargs;
1258 
1259     /* Ignoring the return type, find the last non-zero field. */
1260     nargs = 32 - clz32(typemask >> 3);
1261     nargs = DIV_ROUND_UP(nargs, 3);
1262     assert(nargs <= MAX_CALL_IARGS);
1263 
1264     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1265     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1266     ca->cif.nargs = nargs;
1267 
1268     if (nargs != 0) {
1269         ca->cif.arg_types = ca->args;
1270         for (int j = 0; j < nargs; ++j) {
1271             int typecode = extract32(typemask, (j + 1) * 3, 3);
1272             ca->args[j] = typecode_to_ffi(typecode);
1273         }
1274     }
1275 
1276     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1277                           ca->cif.rtype, ca->cif.arg_types);
1278     assert(status == FFI_OK);
1279 
1280     return &ca->cif;
1281 }
1282 
1283 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1284 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1285 #else
1286 #define HELPER_INFO_INIT(I)      (&(I)->init)
1287 #define HELPER_INFO_INIT_VAL(I)  1
1288 #endif /* CONFIG_TCG_INTERPRETER */
1289 
1290 static inline bool arg_slot_reg_p(unsigned arg_slot)
1291 {
1292     /*
1293      * Split the sizeof away from the comparison to avoid Werror from
1294      * "unsigned < 0 is always false", when iarg_regs is empty.
1295      */
1296     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1297     return arg_slot < nreg;
1298 }
1299 
1300 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1301 {
1302     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1303     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1304 
1305     tcg_debug_assert(stk_slot < max);
1306     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1307 }
1308 
1309 typedef struct TCGCumulativeArgs {
1310     int arg_idx;                /* tcg_gen_callN args[] */
1311     int info_in_idx;            /* TCGHelperInfo in[] */
1312     int arg_slot;               /* regs+stack slot */
1313     int ref_slot;               /* stack slots for references */
1314 } TCGCumulativeArgs;
1315 
1316 static void layout_arg_even(TCGCumulativeArgs *cum)
1317 {
1318     cum->arg_slot += cum->arg_slot & 1;
1319 }
1320 
1321 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1322                          TCGCallArgumentKind kind)
1323 {
1324     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1325 
1326     *loc = (TCGCallArgumentLoc){
1327         .kind = kind,
1328         .arg_idx = cum->arg_idx,
1329         .arg_slot = cum->arg_slot,
1330     };
1331     cum->info_in_idx++;
1332     cum->arg_slot++;
1333 }
1334 
1335 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1336                                 TCGHelperInfo *info, int n)
1337 {
1338     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1339 
1340     for (int i = 0; i < n; ++i) {
1341         /* Layout all using the same arg_idx, adjusting the subindex. */
1342         loc[i] = (TCGCallArgumentLoc){
1343             .kind = TCG_CALL_ARG_NORMAL,
1344             .arg_idx = cum->arg_idx,
1345             .tmp_subindex = i,
1346             .arg_slot = cum->arg_slot + i,
1347         };
1348     }
1349     cum->info_in_idx += n;
1350     cum->arg_slot += n;
1351 }
1352 
1353 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1354 {
1355     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1356     int n = 128 / TCG_TARGET_REG_BITS;
1357 
1358     /* The first subindex carries the pointer. */
1359     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1360 
1361     /*
1362      * The callee is allowed to clobber memory associated with
1363      * structure pass by-reference.  Therefore we must make copies.
1364      * Allocate space from "ref_slot", which will be adjusted to
1365      * follow the parameters on the stack.
1366      */
1367     loc[0].ref_slot = cum->ref_slot;
1368 
1369     /*
1370      * Subsequent words also go into the reference slot, but
1371      * do not accumulate into the regular arguments.
1372      */
1373     for (int i = 1; i < n; ++i) {
1374         loc[i] = (TCGCallArgumentLoc){
1375             .kind = TCG_CALL_ARG_BY_REF_N,
1376             .arg_idx = cum->arg_idx,
1377             .tmp_subindex = i,
1378             .ref_slot = cum->ref_slot + i,
1379         };
1380     }
1381     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1382     cum->ref_slot += n;
1383 }
1384 
1385 static void init_call_layout(TCGHelperInfo *info)
1386 {
1387     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1388     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1389     unsigned typemask = info->typemask;
1390     unsigned typecode;
1391     TCGCumulativeArgs cum = { };
1392 
1393     /*
1394      * Parse and place any function return value.
1395      */
1396     typecode = typemask & 7;
1397     switch (typecode) {
1398     case dh_typecode_void:
1399         info->nr_out = 0;
1400         break;
1401     case dh_typecode_i32:
1402     case dh_typecode_s32:
1403     case dh_typecode_ptr:
1404         info->nr_out = 1;
1405         info->out_kind = TCG_CALL_RET_NORMAL;
1406         break;
1407     case dh_typecode_i64:
1408     case dh_typecode_s64:
1409         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1410         info->out_kind = TCG_CALL_RET_NORMAL;
1411         /* Query the last register now to trigger any assert early. */
1412         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1413         break;
1414     case dh_typecode_i128:
1415         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1416         info->out_kind = TCG_TARGET_CALL_RET_I128;
1417         switch (TCG_TARGET_CALL_RET_I128) {
1418         case TCG_CALL_RET_NORMAL:
1419             /* Query the last register now to trigger any assert early. */
1420             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1421             break;
1422         case TCG_CALL_RET_BY_VEC:
1423             /* Query the single register now to trigger any assert early. */
1424             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1425             break;
1426         case TCG_CALL_RET_BY_REF:
1427             /*
1428              * Allocate the first argument to the output.
1429              * We don't need to store this anywhere, just make it
1430              * unavailable for use in the input loop below.
1431              */
1432             cum.arg_slot = 1;
1433             break;
1434         default:
1435             qemu_build_not_reached();
1436         }
1437         break;
1438     default:
1439         g_assert_not_reached();
1440     }
1441 
1442     /*
1443      * Parse and place function arguments.
1444      */
1445     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1446         TCGCallArgumentKind kind;
1447         TCGType type;
1448 
1449         typecode = typemask & 7;
1450         switch (typecode) {
1451         case dh_typecode_i32:
1452         case dh_typecode_s32:
1453             type = TCG_TYPE_I32;
1454             break;
1455         case dh_typecode_i64:
1456         case dh_typecode_s64:
1457             type = TCG_TYPE_I64;
1458             break;
1459         case dh_typecode_ptr:
1460             type = TCG_TYPE_PTR;
1461             break;
1462         case dh_typecode_i128:
1463             type = TCG_TYPE_I128;
1464             break;
1465         default:
1466             g_assert_not_reached();
1467         }
1468 
1469         switch (type) {
1470         case TCG_TYPE_I32:
1471             switch (TCG_TARGET_CALL_ARG_I32) {
1472             case TCG_CALL_ARG_EVEN:
1473                 layout_arg_even(&cum);
1474                 /* fall through */
1475             case TCG_CALL_ARG_NORMAL:
1476                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1477                 break;
1478             case TCG_CALL_ARG_EXTEND:
1479                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1480                 layout_arg_1(&cum, info, kind);
1481                 break;
1482             default:
1483                 qemu_build_not_reached();
1484             }
1485             break;
1486 
1487         case TCG_TYPE_I64:
1488             switch (TCG_TARGET_CALL_ARG_I64) {
1489             case TCG_CALL_ARG_EVEN:
1490                 layout_arg_even(&cum);
1491                 /* fall through */
1492             case TCG_CALL_ARG_NORMAL:
1493                 if (TCG_TARGET_REG_BITS == 32) {
1494                     layout_arg_normal_n(&cum, info, 2);
1495                 } else {
1496                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1497                 }
1498                 break;
1499             default:
1500                 qemu_build_not_reached();
1501             }
1502             break;
1503 
1504         case TCG_TYPE_I128:
1505             switch (TCG_TARGET_CALL_ARG_I128) {
1506             case TCG_CALL_ARG_EVEN:
1507                 layout_arg_even(&cum);
1508                 /* fall through */
1509             case TCG_CALL_ARG_NORMAL:
1510                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1511                 break;
1512             case TCG_CALL_ARG_BY_REF:
1513                 layout_arg_by_ref(&cum, info);
1514                 break;
1515             default:
1516                 qemu_build_not_reached();
1517             }
1518             break;
1519 
1520         default:
1521             g_assert_not_reached();
1522         }
1523     }
1524     info->nr_in = cum.info_in_idx;
1525 
1526     /* Validate that we didn't overrun the input array. */
1527     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1528     /* Validate the backend has enough argument space. */
1529     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1530 
1531     /*
1532      * Relocate the "ref_slot" area to the end of the parameters.
1533      * Minimizing this stack offset helps code size for x86,
1534      * which has a signed 8-bit offset encoding.
1535      */
1536     if (cum.ref_slot != 0) {
1537         int ref_base = 0;
1538 
1539         if (cum.arg_slot > max_reg_slots) {
1540             int align = __alignof(Int128) / sizeof(tcg_target_long);
1541 
1542             ref_base = cum.arg_slot - max_reg_slots;
1543             if (align > 1) {
1544                 ref_base = ROUND_UP(ref_base, align);
1545             }
1546         }
1547         assert(ref_base + cum.ref_slot <= max_stk_slots);
1548         ref_base += max_reg_slots;
1549 
1550         if (ref_base != 0) {
1551             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1552                 TCGCallArgumentLoc *loc = &info->in[i];
1553                 switch (loc->kind) {
1554                 case TCG_CALL_ARG_BY_REF:
1555                 case TCG_CALL_ARG_BY_REF_N:
1556                     loc->ref_slot += ref_base;
1557                     break;
1558                 default:
1559                     break;
1560                 }
1561             }
1562         }
1563     }
1564 }
1565 
1566 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1567 static void process_constraint_sets(void);
1568 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1569                                             TCGReg reg, const char *name);
1570 
1571 static void tcg_context_init(unsigned max_threads)
1572 {
1573     TCGContext *s = &tcg_init_ctx;
1574     int n, i;
1575     TCGTemp *ts;
1576 
1577     memset(s, 0, sizeof(*s));
1578     s->nb_globals = 0;
1579 
1580     init_call_layout(&info_helper_ld32_mmu);
1581     init_call_layout(&info_helper_ld64_mmu);
1582     init_call_layout(&info_helper_ld128_mmu);
1583     init_call_layout(&info_helper_st32_mmu);
1584     init_call_layout(&info_helper_st64_mmu);
1585     init_call_layout(&info_helper_st128_mmu);
1586 
1587     tcg_target_init(s);
1588     process_constraint_sets();
1589 
1590     /* Reverse the order of the saved registers, assuming they're all at
1591        the start of tcg_target_reg_alloc_order.  */
1592     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1593         int r = tcg_target_reg_alloc_order[n];
1594         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1595             break;
1596         }
1597     }
1598     for (i = 0; i < n; ++i) {
1599         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1600     }
1601     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1602         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1603     }
1604 
1605     tcg_ctx = s;
1606     /*
1607      * In user-mode we simply share the init context among threads, since we
1608      * use a single region. See the documentation tcg_region_init() for the
1609      * reasoning behind this.
1610      * In system-mode we will have at most max_threads TCG threads.
1611      */
1612 #ifdef CONFIG_USER_ONLY
1613     tcg_ctxs = &tcg_ctx;
1614     tcg_cur_ctxs = 1;
1615     tcg_max_ctxs = 1;
1616 #else
1617     tcg_max_ctxs = max_threads;
1618     tcg_ctxs = g_new0(TCGContext *, max_threads);
1619 #endif
1620 
1621     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1622     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1623     tcg_env = temp_tcgv_ptr(ts);
1624 }
1625 
1626 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1627 {
1628     tcg_context_init(max_threads);
1629     tcg_region_init(tb_size, splitwx, max_threads);
1630 }
1631 
1632 /*
1633  * Allocate TBs right before their corresponding translated code, making
1634  * sure that TBs and code are on different cache lines.
1635  */
1636 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1637 {
1638     uintptr_t align = qemu_icache_linesize;
1639     TranslationBlock *tb;
1640     void *next;
1641 
1642  retry:
1643     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1644     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1645 
1646     if (unlikely(next > s->code_gen_highwater)) {
1647         if (tcg_region_alloc(s)) {
1648             return NULL;
1649         }
1650         goto retry;
1651     }
1652     qatomic_set(&s->code_gen_ptr, next);
1653     return tb;
1654 }
1655 
1656 void tcg_prologue_init(void)
1657 {
1658     TCGContext *s = tcg_ctx;
1659     size_t prologue_size;
1660 
1661     s->code_ptr = s->code_gen_ptr;
1662     s->code_buf = s->code_gen_ptr;
1663     s->data_gen_ptr = NULL;
1664 
1665 #ifndef CONFIG_TCG_INTERPRETER
1666     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1667 #endif
1668 
1669     s->pool_labels = NULL;
1670 
1671     qemu_thread_jit_write();
1672     /* Generate the prologue.  */
1673     tcg_target_qemu_prologue(s);
1674 
1675     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1676     {
1677         int result = tcg_out_pool_finalize(s);
1678         tcg_debug_assert(result == 0);
1679     }
1680 
1681     prologue_size = tcg_current_code_size(s);
1682     perf_report_prologue(s->code_gen_ptr, prologue_size);
1683 
1684 #ifndef CONFIG_TCG_INTERPRETER
1685     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1686                         (uintptr_t)s->code_buf, prologue_size);
1687 #endif
1688 
1689     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1690         FILE *logfile = qemu_log_trylock();
1691         if (logfile) {
1692             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1693             if (s->data_gen_ptr) {
1694                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1695                 size_t data_size = prologue_size - code_size;
1696                 size_t i;
1697 
1698                 disas(logfile, s->code_gen_ptr, code_size);
1699 
1700                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1701                     if (sizeof(tcg_target_ulong) == 8) {
1702                         fprintf(logfile,
1703                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1704                                 (uintptr_t)s->data_gen_ptr + i,
1705                                 *(uint64_t *)(s->data_gen_ptr + i));
1706                     } else {
1707                         fprintf(logfile,
1708                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1709                                 (uintptr_t)s->data_gen_ptr + i,
1710                                 *(uint32_t *)(s->data_gen_ptr + i));
1711                     }
1712                 }
1713             } else {
1714                 disas(logfile, s->code_gen_ptr, prologue_size);
1715             }
1716             fprintf(logfile, "\n");
1717             qemu_log_unlock(logfile);
1718         }
1719     }
1720 
1721 #ifndef CONFIG_TCG_INTERPRETER
1722     /*
1723      * Assert that goto_ptr is implemented completely, setting an epilogue.
1724      * For tci, we use NULL as the signal to return from the interpreter,
1725      * so skip this check.
1726      */
1727     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1728 #endif
1729 
1730     tcg_region_prologue_set(s);
1731 }
1732 
1733 void tcg_func_start(TCGContext *s)
1734 {
1735     tcg_pool_reset(s);
1736     s->nb_temps = s->nb_globals;
1737 
1738     /* No temps have been previously allocated for size or locality.  */
1739     tcg_temp_ebb_reset_freed(s);
1740 
1741     /* No constant temps have been previously allocated. */
1742     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1743         if (s->const_table[i]) {
1744             g_hash_table_remove_all(s->const_table[i]);
1745         }
1746     }
1747 
1748     s->nb_ops = 0;
1749     s->nb_labels = 0;
1750     s->current_frame_offset = s->frame_start;
1751 
1752 #ifdef CONFIG_DEBUG_TCG
1753     s->goto_tb_issue_mask = 0;
1754 #endif
1755 
1756     QTAILQ_INIT(&s->ops);
1757     QTAILQ_INIT(&s->free_ops);
1758     s->emit_before_op = NULL;
1759     QSIMPLEQ_INIT(&s->labels);
1760 
1761     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1762     tcg_debug_assert(s->insn_start_words > 0);
1763 }
1764 
1765 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1766 {
1767     int n = s->nb_temps++;
1768 
1769     if (n >= TCG_MAX_TEMPS) {
1770         tcg_raise_tb_overflow(s);
1771     }
1772     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1773 }
1774 
1775 static TCGTemp *tcg_global_alloc(TCGContext *s)
1776 {
1777     TCGTemp *ts;
1778 
1779     tcg_debug_assert(s->nb_globals == s->nb_temps);
1780     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1781     s->nb_globals++;
1782     ts = tcg_temp_alloc(s);
1783     ts->kind = TEMP_GLOBAL;
1784 
1785     return ts;
1786 }
1787 
1788 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1789                                             TCGReg reg, const char *name)
1790 {
1791     TCGTemp *ts;
1792 
1793     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1794 
1795     ts = tcg_global_alloc(s);
1796     ts->base_type = type;
1797     ts->type = type;
1798     ts->kind = TEMP_FIXED;
1799     ts->reg = reg;
1800     ts->name = name;
1801     tcg_regset_set_reg(s->reserved_regs, reg);
1802 
1803     return ts;
1804 }
1805 
1806 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1807 {
1808     s->frame_start = start;
1809     s->frame_end = start + size;
1810     s->frame_temp
1811         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1812 }
1813 
1814 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1815                                             const char *name, TCGType type)
1816 {
1817     TCGContext *s = tcg_ctx;
1818     TCGTemp *base_ts = tcgv_ptr_temp(base);
1819     TCGTemp *ts = tcg_global_alloc(s);
1820     int indirect_reg = 0;
1821 
1822     switch (base_ts->kind) {
1823     case TEMP_FIXED:
1824         break;
1825     case TEMP_GLOBAL:
1826         /* We do not support double-indirect registers.  */
1827         tcg_debug_assert(!base_ts->indirect_reg);
1828         base_ts->indirect_base = 1;
1829         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1830                             ? 2 : 1);
1831         indirect_reg = 1;
1832         break;
1833     default:
1834         g_assert_not_reached();
1835     }
1836 
1837     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1838         TCGTemp *ts2 = tcg_global_alloc(s);
1839         char buf[64];
1840 
1841         ts->base_type = TCG_TYPE_I64;
1842         ts->type = TCG_TYPE_I32;
1843         ts->indirect_reg = indirect_reg;
1844         ts->mem_allocated = 1;
1845         ts->mem_base = base_ts;
1846         ts->mem_offset = offset;
1847         pstrcpy(buf, sizeof(buf), name);
1848         pstrcat(buf, sizeof(buf), "_0");
1849         ts->name = strdup(buf);
1850 
1851         tcg_debug_assert(ts2 == ts + 1);
1852         ts2->base_type = TCG_TYPE_I64;
1853         ts2->type = TCG_TYPE_I32;
1854         ts2->indirect_reg = indirect_reg;
1855         ts2->mem_allocated = 1;
1856         ts2->mem_base = base_ts;
1857         ts2->mem_offset = offset + 4;
1858         ts2->temp_subindex = 1;
1859         pstrcpy(buf, sizeof(buf), name);
1860         pstrcat(buf, sizeof(buf), "_1");
1861         ts2->name = strdup(buf);
1862     } else {
1863         ts->base_type = type;
1864         ts->type = type;
1865         ts->indirect_reg = indirect_reg;
1866         ts->mem_allocated = 1;
1867         ts->mem_base = base_ts;
1868         ts->mem_offset = offset;
1869         ts->name = name;
1870     }
1871     return ts;
1872 }
1873 
1874 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1875 {
1876     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1877     return temp_tcgv_i32(ts);
1878 }
1879 
1880 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1881 {
1882     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1883     return temp_tcgv_i64(ts);
1884 }
1885 
1886 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1887 {
1888     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1889     return temp_tcgv_ptr(ts);
1890 }
1891 
1892 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1893 {
1894     TCGContext *s = tcg_ctx;
1895     TCGTemp *ts;
1896     int n;
1897 
1898     if (kind == TEMP_EBB) {
1899         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1900 
1901         if (idx < TCG_MAX_TEMPS) {
1902             /* There is already an available temp with the right type.  */
1903             clear_bit(idx, s->free_temps[type].l);
1904 
1905             ts = &s->temps[idx];
1906             ts->temp_allocated = 1;
1907             tcg_debug_assert(ts->base_type == type);
1908             tcg_debug_assert(ts->kind == kind);
1909             return ts;
1910         }
1911     } else {
1912         tcg_debug_assert(kind == TEMP_TB);
1913     }
1914 
1915     switch (type) {
1916     case TCG_TYPE_I32:
1917     case TCG_TYPE_V64:
1918     case TCG_TYPE_V128:
1919     case TCG_TYPE_V256:
1920         n = 1;
1921         break;
1922     case TCG_TYPE_I64:
1923         n = 64 / TCG_TARGET_REG_BITS;
1924         break;
1925     case TCG_TYPE_I128:
1926         n = 128 / TCG_TARGET_REG_BITS;
1927         break;
1928     default:
1929         g_assert_not_reached();
1930     }
1931 
1932     ts = tcg_temp_alloc(s);
1933     ts->base_type = type;
1934     ts->temp_allocated = 1;
1935     ts->kind = kind;
1936 
1937     if (n == 1) {
1938         ts->type = type;
1939     } else {
1940         ts->type = TCG_TYPE_REG;
1941 
1942         for (int i = 1; i < n; ++i) {
1943             TCGTemp *ts2 = tcg_temp_alloc(s);
1944 
1945             tcg_debug_assert(ts2 == ts + i);
1946             ts2->base_type = type;
1947             ts2->type = TCG_TYPE_REG;
1948             ts2->temp_allocated = 1;
1949             ts2->temp_subindex = i;
1950             ts2->kind = kind;
1951         }
1952     }
1953     return ts;
1954 }
1955 
1956 TCGv_i32 tcg_temp_new_i32(void)
1957 {
1958     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1959 }
1960 
1961 TCGv_i32 tcg_temp_ebb_new_i32(void)
1962 {
1963     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1964 }
1965 
1966 TCGv_i64 tcg_temp_new_i64(void)
1967 {
1968     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1969 }
1970 
1971 TCGv_i64 tcg_temp_ebb_new_i64(void)
1972 {
1973     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1974 }
1975 
1976 TCGv_ptr tcg_temp_new_ptr(void)
1977 {
1978     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1979 }
1980 
1981 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1982 {
1983     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1984 }
1985 
1986 TCGv_i128 tcg_temp_new_i128(void)
1987 {
1988     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1989 }
1990 
1991 TCGv_i128 tcg_temp_ebb_new_i128(void)
1992 {
1993     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1994 }
1995 
1996 TCGv_vec tcg_temp_new_vec(TCGType type)
1997 {
1998     TCGTemp *t;
1999 
2000 #ifdef CONFIG_DEBUG_TCG
2001     switch (type) {
2002     case TCG_TYPE_V64:
2003         assert(TCG_TARGET_HAS_v64);
2004         break;
2005     case TCG_TYPE_V128:
2006         assert(TCG_TARGET_HAS_v128);
2007         break;
2008     case TCG_TYPE_V256:
2009         assert(TCG_TARGET_HAS_v256);
2010         break;
2011     default:
2012         g_assert_not_reached();
2013     }
2014 #endif
2015 
2016     t = tcg_temp_new_internal(type, TEMP_EBB);
2017     return temp_tcgv_vec(t);
2018 }
2019 
2020 /* Create a new temp of the same type as an existing temp.  */
2021 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2022 {
2023     TCGTemp *t = tcgv_vec_temp(match);
2024 
2025     tcg_debug_assert(t->temp_allocated != 0);
2026 
2027     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2028     return temp_tcgv_vec(t);
2029 }
2030 
2031 void tcg_temp_free_internal(TCGTemp *ts)
2032 {
2033     TCGContext *s = tcg_ctx;
2034 
2035     switch (ts->kind) {
2036     case TEMP_CONST:
2037     case TEMP_TB:
2038         /* Silently ignore free. */
2039         break;
2040     case TEMP_EBB:
2041         tcg_debug_assert(ts->temp_allocated != 0);
2042         ts->temp_allocated = 0;
2043         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2044         break;
2045     default:
2046         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2047         g_assert_not_reached();
2048     }
2049 }
2050 
2051 void tcg_temp_free_i32(TCGv_i32 arg)
2052 {
2053     tcg_temp_free_internal(tcgv_i32_temp(arg));
2054 }
2055 
2056 void tcg_temp_free_i64(TCGv_i64 arg)
2057 {
2058     tcg_temp_free_internal(tcgv_i64_temp(arg));
2059 }
2060 
2061 void tcg_temp_free_i128(TCGv_i128 arg)
2062 {
2063     tcg_temp_free_internal(tcgv_i128_temp(arg));
2064 }
2065 
2066 void tcg_temp_free_ptr(TCGv_ptr arg)
2067 {
2068     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2069 }
2070 
2071 void tcg_temp_free_vec(TCGv_vec arg)
2072 {
2073     tcg_temp_free_internal(tcgv_vec_temp(arg));
2074 }
2075 
2076 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2077 {
2078     TCGContext *s = tcg_ctx;
2079     GHashTable *h = s->const_table[type];
2080     TCGTemp *ts;
2081 
2082     if (h == NULL) {
2083         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2084         s->const_table[type] = h;
2085     }
2086 
2087     ts = g_hash_table_lookup(h, &val);
2088     if (ts == NULL) {
2089         int64_t *val_ptr;
2090 
2091         ts = tcg_temp_alloc(s);
2092 
2093         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2094             TCGTemp *ts2 = tcg_temp_alloc(s);
2095 
2096             tcg_debug_assert(ts2 == ts + 1);
2097 
2098             ts->base_type = TCG_TYPE_I64;
2099             ts->type = TCG_TYPE_I32;
2100             ts->kind = TEMP_CONST;
2101             ts->temp_allocated = 1;
2102 
2103             ts2->base_type = TCG_TYPE_I64;
2104             ts2->type = TCG_TYPE_I32;
2105             ts2->kind = TEMP_CONST;
2106             ts2->temp_allocated = 1;
2107             ts2->temp_subindex = 1;
2108 
2109             /*
2110              * Retain the full value of the 64-bit constant in the low
2111              * part, so that the hash table works.  Actual uses will
2112              * truncate the value to the low part.
2113              */
2114             ts[HOST_BIG_ENDIAN].val = val;
2115             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2116             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2117         } else {
2118             ts->base_type = type;
2119             ts->type = type;
2120             ts->kind = TEMP_CONST;
2121             ts->temp_allocated = 1;
2122             ts->val = val;
2123             val_ptr = &ts->val;
2124         }
2125         g_hash_table_insert(h, val_ptr, ts);
2126     }
2127 
2128     return ts;
2129 }
2130 
2131 TCGv_i32 tcg_constant_i32(int32_t val)
2132 {
2133     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2134 }
2135 
2136 TCGv_i64 tcg_constant_i64(int64_t val)
2137 {
2138     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2139 }
2140 
2141 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2142 {
2143     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2144 }
2145 
2146 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2147 {
2148     val = dup_const(vece, val);
2149     return temp_tcgv_vec(tcg_constant_internal(type, val));
2150 }
2151 
2152 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2153 {
2154     TCGTemp *t = tcgv_vec_temp(match);
2155 
2156     tcg_debug_assert(t->temp_allocated != 0);
2157     return tcg_constant_vec(t->base_type, vece, val);
2158 }
2159 
2160 #ifdef CONFIG_DEBUG_TCG
2161 size_t temp_idx(TCGTemp *ts)
2162 {
2163     ptrdiff_t n = ts - tcg_ctx->temps;
2164     assert(n >= 0 && n < tcg_ctx->nb_temps);
2165     return n;
2166 }
2167 
2168 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2169 {
2170     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2171 
2172     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2173     assert(o % sizeof(TCGTemp) == 0);
2174 
2175     return (void *)tcg_ctx + (uintptr_t)v;
2176 }
2177 #endif /* CONFIG_DEBUG_TCG */
2178 
2179 /*
2180  * Return true if OP may appear in the opcode stream with TYPE.
2181  * Test the runtime variable that controls each opcode.
2182  */
2183 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2184 {
2185     bool has_type;
2186 
2187     switch (type) {
2188     case TCG_TYPE_I32:
2189         has_type = true;
2190         break;
2191     case TCG_TYPE_I64:
2192         has_type = TCG_TARGET_REG_BITS == 64;
2193         break;
2194     case TCG_TYPE_V64:
2195         has_type = TCG_TARGET_HAS_v64;
2196         break;
2197     case TCG_TYPE_V128:
2198         has_type = TCG_TARGET_HAS_v128;
2199         break;
2200     case TCG_TYPE_V256:
2201         has_type = TCG_TARGET_HAS_v256;
2202         break;
2203     default:
2204         has_type = false;
2205         break;
2206     }
2207 
2208     switch (op) {
2209     case INDEX_op_discard:
2210     case INDEX_op_set_label:
2211     case INDEX_op_call:
2212     case INDEX_op_br:
2213     case INDEX_op_mb:
2214     case INDEX_op_insn_start:
2215     case INDEX_op_exit_tb:
2216     case INDEX_op_goto_tb:
2217     case INDEX_op_goto_ptr:
2218     case INDEX_op_qemu_ld_i32:
2219     case INDEX_op_qemu_st_i32:
2220     case INDEX_op_qemu_ld_i64:
2221     case INDEX_op_qemu_st_i64:
2222         return true;
2223 
2224     case INDEX_op_qemu_st8_i32:
2225         return TCG_TARGET_HAS_qemu_st8_i32;
2226 
2227     case INDEX_op_qemu_ld_i128:
2228     case INDEX_op_qemu_st_i128:
2229         return TCG_TARGET_HAS_qemu_ldst_i128;
2230 
2231     case INDEX_op_add:
2232     case INDEX_op_and:
2233     case INDEX_op_mov:
2234     case INDEX_op_or:
2235     case INDEX_op_xor:
2236         return has_type;
2237 
2238     case INDEX_op_setcond_i32:
2239     case INDEX_op_brcond_i32:
2240     case INDEX_op_movcond_i32:
2241     case INDEX_op_ld8u_i32:
2242     case INDEX_op_ld8s_i32:
2243     case INDEX_op_ld16u_i32:
2244     case INDEX_op_ld16s_i32:
2245     case INDEX_op_ld_i32:
2246     case INDEX_op_st8_i32:
2247     case INDEX_op_st16_i32:
2248     case INDEX_op_st_i32:
2249     case INDEX_op_mul_i32:
2250     case INDEX_op_shl_i32:
2251     case INDEX_op_shr_i32:
2252     case INDEX_op_sar_i32:
2253     case INDEX_op_extract_i32:
2254     case INDEX_op_sextract_i32:
2255     case INDEX_op_deposit_i32:
2256         return true;
2257 
2258     case INDEX_op_negsetcond_i32:
2259         return TCG_TARGET_HAS_negsetcond_i32;
2260     case INDEX_op_div_i32:
2261     case INDEX_op_divu_i32:
2262         return TCG_TARGET_HAS_div_i32;
2263     case INDEX_op_rem_i32:
2264     case INDEX_op_remu_i32:
2265         return TCG_TARGET_HAS_rem_i32;
2266     case INDEX_op_div2_i32:
2267     case INDEX_op_divu2_i32:
2268         return TCG_TARGET_HAS_div2_i32;
2269     case INDEX_op_rotl_i32:
2270     case INDEX_op_rotr_i32:
2271         return TCG_TARGET_HAS_rot_i32;
2272     case INDEX_op_extract2_i32:
2273         return TCG_TARGET_HAS_extract2_i32;
2274     case INDEX_op_add2_i32:
2275         return TCG_TARGET_HAS_add2_i32;
2276     case INDEX_op_sub2_i32:
2277         return TCG_TARGET_HAS_sub2_i32;
2278     case INDEX_op_mulu2_i32:
2279         return TCG_TARGET_HAS_mulu2_i32;
2280     case INDEX_op_muls2_i32:
2281         return TCG_TARGET_HAS_muls2_i32;
2282     case INDEX_op_muluh_i32:
2283         return TCG_TARGET_HAS_muluh_i32;
2284     case INDEX_op_mulsh_i32:
2285         return TCG_TARGET_HAS_mulsh_i32;
2286     case INDEX_op_bswap16_i32:
2287         return TCG_TARGET_HAS_bswap16_i32;
2288     case INDEX_op_bswap32_i32:
2289         return TCG_TARGET_HAS_bswap32_i32;
2290     case INDEX_op_not_i32:
2291         return TCG_TARGET_HAS_not_i32;
2292     case INDEX_op_clz_i32:
2293         return TCG_TARGET_HAS_clz_i32;
2294     case INDEX_op_ctz_i32:
2295         return TCG_TARGET_HAS_ctz_i32;
2296     case INDEX_op_ctpop_i32:
2297         return TCG_TARGET_HAS_ctpop_i32;
2298 
2299     case INDEX_op_brcond2_i32:
2300     case INDEX_op_setcond2_i32:
2301         return TCG_TARGET_REG_BITS == 32;
2302 
2303     case INDEX_op_setcond_i64:
2304     case INDEX_op_brcond_i64:
2305     case INDEX_op_movcond_i64:
2306     case INDEX_op_ld8u_i64:
2307     case INDEX_op_ld8s_i64:
2308     case INDEX_op_ld16u_i64:
2309     case INDEX_op_ld16s_i64:
2310     case INDEX_op_ld32u_i64:
2311     case INDEX_op_ld32s_i64:
2312     case INDEX_op_ld_i64:
2313     case INDEX_op_st8_i64:
2314     case INDEX_op_st16_i64:
2315     case INDEX_op_st32_i64:
2316     case INDEX_op_st_i64:
2317     case INDEX_op_mul_i64:
2318     case INDEX_op_shl_i64:
2319     case INDEX_op_shr_i64:
2320     case INDEX_op_sar_i64:
2321     case INDEX_op_ext_i32_i64:
2322     case INDEX_op_extu_i32_i64:
2323     case INDEX_op_extract_i64:
2324     case INDEX_op_sextract_i64:
2325     case INDEX_op_deposit_i64:
2326         return TCG_TARGET_REG_BITS == 64;
2327 
2328     case INDEX_op_negsetcond_i64:
2329         return TCG_TARGET_HAS_negsetcond_i64;
2330     case INDEX_op_div_i64:
2331     case INDEX_op_divu_i64:
2332         return TCG_TARGET_HAS_div_i64;
2333     case INDEX_op_rem_i64:
2334     case INDEX_op_remu_i64:
2335         return TCG_TARGET_HAS_rem_i64;
2336     case INDEX_op_div2_i64:
2337     case INDEX_op_divu2_i64:
2338         return TCG_TARGET_HAS_div2_i64;
2339     case INDEX_op_rotl_i64:
2340     case INDEX_op_rotr_i64:
2341         return TCG_TARGET_HAS_rot_i64;
2342     case INDEX_op_extract2_i64:
2343         return TCG_TARGET_HAS_extract2_i64;
2344     case INDEX_op_extrl_i64_i32:
2345     case INDEX_op_extrh_i64_i32:
2346         return TCG_TARGET_HAS_extr_i64_i32;
2347     case INDEX_op_bswap16_i64:
2348         return TCG_TARGET_HAS_bswap16_i64;
2349     case INDEX_op_bswap32_i64:
2350         return TCG_TARGET_HAS_bswap32_i64;
2351     case INDEX_op_bswap64_i64:
2352         return TCG_TARGET_HAS_bswap64_i64;
2353     case INDEX_op_not_i64:
2354         return TCG_TARGET_HAS_not_i64;
2355     case INDEX_op_clz_i64:
2356         return TCG_TARGET_HAS_clz_i64;
2357     case INDEX_op_ctz_i64:
2358         return TCG_TARGET_HAS_ctz_i64;
2359     case INDEX_op_ctpop_i64:
2360         return TCG_TARGET_HAS_ctpop_i64;
2361     case INDEX_op_add2_i64:
2362         return TCG_TARGET_HAS_add2_i64;
2363     case INDEX_op_sub2_i64:
2364         return TCG_TARGET_HAS_sub2_i64;
2365     case INDEX_op_mulu2_i64:
2366         return TCG_TARGET_HAS_mulu2_i64;
2367     case INDEX_op_muls2_i64:
2368         return TCG_TARGET_HAS_muls2_i64;
2369     case INDEX_op_muluh_i64:
2370         return TCG_TARGET_HAS_muluh_i64;
2371     case INDEX_op_mulsh_i64:
2372         return TCG_TARGET_HAS_mulsh_i64;
2373 
2374     case INDEX_op_mov_vec:
2375     case INDEX_op_dup_vec:
2376     case INDEX_op_dupm_vec:
2377     case INDEX_op_ld_vec:
2378     case INDEX_op_st_vec:
2379     case INDEX_op_add_vec:
2380     case INDEX_op_sub_vec:
2381     case INDEX_op_and_vec:
2382     case INDEX_op_or_vec:
2383     case INDEX_op_xor_vec:
2384     case INDEX_op_cmp_vec:
2385         return has_type;
2386     case INDEX_op_dup2_vec:
2387         return has_type && TCG_TARGET_REG_BITS == 32;
2388     case INDEX_op_not_vec:
2389         return has_type && TCG_TARGET_HAS_not_vec;
2390     case INDEX_op_neg_vec:
2391         return has_type && TCG_TARGET_HAS_neg_vec;
2392     case INDEX_op_abs_vec:
2393         return has_type && TCG_TARGET_HAS_abs_vec;
2394     case INDEX_op_andc_vec:
2395         return has_type && TCG_TARGET_HAS_andc_vec;
2396     case INDEX_op_orc_vec:
2397         return has_type && TCG_TARGET_HAS_orc_vec;
2398     case INDEX_op_nand_vec:
2399         return has_type && TCG_TARGET_HAS_nand_vec;
2400     case INDEX_op_nor_vec:
2401         return has_type && TCG_TARGET_HAS_nor_vec;
2402     case INDEX_op_eqv_vec:
2403         return has_type && TCG_TARGET_HAS_eqv_vec;
2404     case INDEX_op_mul_vec:
2405         return has_type && TCG_TARGET_HAS_mul_vec;
2406     case INDEX_op_shli_vec:
2407     case INDEX_op_shri_vec:
2408     case INDEX_op_sari_vec:
2409         return has_type && TCG_TARGET_HAS_shi_vec;
2410     case INDEX_op_shls_vec:
2411     case INDEX_op_shrs_vec:
2412     case INDEX_op_sars_vec:
2413         return has_type && TCG_TARGET_HAS_shs_vec;
2414     case INDEX_op_shlv_vec:
2415     case INDEX_op_shrv_vec:
2416     case INDEX_op_sarv_vec:
2417         return has_type && TCG_TARGET_HAS_shv_vec;
2418     case INDEX_op_rotli_vec:
2419         return has_type && TCG_TARGET_HAS_roti_vec;
2420     case INDEX_op_rotls_vec:
2421         return has_type && TCG_TARGET_HAS_rots_vec;
2422     case INDEX_op_rotlv_vec:
2423     case INDEX_op_rotrv_vec:
2424         return has_type && TCG_TARGET_HAS_rotv_vec;
2425     case INDEX_op_ssadd_vec:
2426     case INDEX_op_usadd_vec:
2427     case INDEX_op_sssub_vec:
2428     case INDEX_op_ussub_vec:
2429         return has_type && TCG_TARGET_HAS_sat_vec;
2430     case INDEX_op_smin_vec:
2431     case INDEX_op_umin_vec:
2432     case INDEX_op_smax_vec:
2433     case INDEX_op_umax_vec:
2434         return has_type && TCG_TARGET_HAS_minmax_vec;
2435     case INDEX_op_bitsel_vec:
2436         return has_type && TCG_TARGET_HAS_bitsel_vec;
2437     case INDEX_op_cmpsel_vec:
2438         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2439 
2440     default:
2441         if (op < INDEX_op_last_generic) {
2442             const TCGOutOp *outop;
2443             TCGConstraintSetIndex con_set;
2444 
2445             if (!has_type) {
2446                 return false;
2447             }
2448 
2449             outop = all_outop[op];
2450             tcg_debug_assert(outop != NULL);
2451 
2452             con_set = outop->static_constraint;
2453             if (con_set == C_Dynamic) {
2454                 con_set = outop->dynamic_constraint(type, flags);
2455             }
2456             if (con_set >= 0) {
2457                 return true;
2458             }
2459             tcg_debug_assert(con_set == C_NotImplemented);
2460             return false;
2461         }
2462         tcg_debug_assert(op < NB_OPS);
2463         return true;
2464 
2465     case INDEX_op_last_generic:
2466         g_assert_not_reached();
2467     }
2468 }
2469 
2470 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2471 {
2472     unsigned width;
2473 
2474     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2475     width = (type == TCG_TYPE_I32 ? 32 : 64);
2476 
2477     tcg_debug_assert(ofs < width);
2478     tcg_debug_assert(len > 0);
2479     tcg_debug_assert(len <= width - ofs);
2480 
2481     return TCG_TARGET_deposit_valid(type, ofs, len);
2482 }
2483 
2484 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2485 
2486 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2487                           TCGTemp *ret, TCGTemp **args)
2488 {
2489     TCGv_i64 extend_free[MAX_CALL_IARGS];
2490     int n_extend = 0;
2491     TCGOp *op;
2492     int i, n, pi = 0, total_args;
2493 
2494     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2495         init_call_layout(info);
2496         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2497     }
2498 
2499     total_args = info->nr_out + info->nr_in + 2;
2500     op = tcg_op_alloc(INDEX_op_call, total_args);
2501 
2502 #ifdef CONFIG_PLUGIN
2503     /* Flag helpers that may affect guest state */
2504     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2505         tcg_ctx->plugin_insn->calls_helpers = true;
2506     }
2507 #endif
2508 
2509     TCGOP_CALLO(op) = n = info->nr_out;
2510     switch (n) {
2511     case 0:
2512         tcg_debug_assert(ret == NULL);
2513         break;
2514     case 1:
2515         tcg_debug_assert(ret != NULL);
2516         op->args[pi++] = temp_arg(ret);
2517         break;
2518     case 2:
2519     case 4:
2520         tcg_debug_assert(ret != NULL);
2521         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2522         tcg_debug_assert(ret->temp_subindex == 0);
2523         for (i = 0; i < n; ++i) {
2524             op->args[pi++] = temp_arg(ret + i);
2525         }
2526         break;
2527     default:
2528         g_assert_not_reached();
2529     }
2530 
2531     TCGOP_CALLI(op) = n = info->nr_in;
2532     for (i = 0; i < n; i++) {
2533         const TCGCallArgumentLoc *loc = &info->in[i];
2534         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2535 
2536         switch (loc->kind) {
2537         case TCG_CALL_ARG_NORMAL:
2538         case TCG_CALL_ARG_BY_REF:
2539         case TCG_CALL_ARG_BY_REF_N:
2540             op->args[pi++] = temp_arg(ts);
2541             break;
2542 
2543         case TCG_CALL_ARG_EXTEND_U:
2544         case TCG_CALL_ARG_EXTEND_S:
2545             {
2546                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2547                 TCGv_i32 orig = temp_tcgv_i32(ts);
2548 
2549                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2550                     tcg_gen_ext_i32_i64(temp, orig);
2551                 } else {
2552                     tcg_gen_extu_i32_i64(temp, orig);
2553                 }
2554                 op->args[pi++] = tcgv_i64_arg(temp);
2555                 extend_free[n_extend++] = temp;
2556             }
2557             break;
2558 
2559         default:
2560             g_assert_not_reached();
2561         }
2562     }
2563     op->args[pi++] = (uintptr_t)func;
2564     op->args[pi++] = (uintptr_t)info;
2565     tcg_debug_assert(pi == total_args);
2566 
2567     if (tcg_ctx->emit_before_op) {
2568         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2569     } else {
2570         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2571     }
2572 
2573     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2574     for (i = 0; i < n_extend; ++i) {
2575         tcg_temp_free_i64(extend_free[i]);
2576     }
2577 }
2578 
2579 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2580 {
2581     tcg_gen_callN(func, info, ret, NULL);
2582 }
2583 
2584 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2585 {
2586     tcg_gen_callN(func, info, ret, &t1);
2587 }
2588 
2589 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2590                    TCGTemp *t1, TCGTemp *t2)
2591 {
2592     TCGTemp *args[2] = { t1, t2 };
2593     tcg_gen_callN(func, info, ret, args);
2594 }
2595 
2596 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2597                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2598 {
2599     TCGTemp *args[3] = { t1, t2, t3 };
2600     tcg_gen_callN(func, info, ret, args);
2601 }
2602 
2603 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2604                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2605 {
2606     TCGTemp *args[4] = { t1, t2, t3, t4 };
2607     tcg_gen_callN(func, info, ret, args);
2608 }
2609 
2610 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2611                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2612 {
2613     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2614     tcg_gen_callN(func, info, ret, args);
2615 }
2616 
2617 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2618                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2619                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2620 {
2621     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2622     tcg_gen_callN(func, info, ret, args);
2623 }
2624 
2625 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2626                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2627                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2628 {
2629     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2630     tcg_gen_callN(func, info, ret, args);
2631 }
2632 
2633 static void tcg_reg_alloc_start(TCGContext *s)
2634 {
2635     int i, n;
2636 
2637     for (i = 0, n = s->nb_temps; i < n; i++) {
2638         TCGTemp *ts = &s->temps[i];
2639         TCGTempVal val = TEMP_VAL_MEM;
2640 
2641         switch (ts->kind) {
2642         case TEMP_CONST:
2643             val = TEMP_VAL_CONST;
2644             break;
2645         case TEMP_FIXED:
2646             val = TEMP_VAL_REG;
2647             break;
2648         case TEMP_GLOBAL:
2649             break;
2650         case TEMP_EBB:
2651             val = TEMP_VAL_DEAD;
2652             /* fall through */
2653         case TEMP_TB:
2654             ts->mem_allocated = 0;
2655             break;
2656         default:
2657             g_assert_not_reached();
2658         }
2659         ts->val_type = val;
2660     }
2661 
2662     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2663 }
2664 
2665 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2666                                  TCGTemp *ts)
2667 {
2668     int idx = temp_idx(ts);
2669 
2670     switch (ts->kind) {
2671     case TEMP_FIXED:
2672     case TEMP_GLOBAL:
2673         pstrcpy(buf, buf_size, ts->name);
2674         break;
2675     case TEMP_TB:
2676         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2677         break;
2678     case TEMP_EBB:
2679         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2680         break;
2681     case TEMP_CONST:
2682         switch (ts->type) {
2683         case TCG_TYPE_I32:
2684             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2685             break;
2686 #if TCG_TARGET_REG_BITS > 32
2687         case TCG_TYPE_I64:
2688             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2689             break;
2690 #endif
2691         case TCG_TYPE_V64:
2692         case TCG_TYPE_V128:
2693         case TCG_TYPE_V256:
2694             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2695                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2696             break;
2697         default:
2698             g_assert_not_reached();
2699         }
2700         break;
2701     }
2702     return buf;
2703 }
2704 
2705 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2706                              int buf_size, TCGArg arg)
2707 {
2708     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2709 }
2710 
2711 static const char * const cond_name[] =
2712 {
2713     [TCG_COND_NEVER] = "never",
2714     [TCG_COND_ALWAYS] = "always",
2715     [TCG_COND_EQ] = "eq",
2716     [TCG_COND_NE] = "ne",
2717     [TCG_COND_LT] = "lt",
2718     [TCG_COND_GE] = "ge",
2719     [TCG_COND_LE] = "le",
2720     [TCG_COND_GT] = "gt",
2721     [TCG_COND_LTU] = "ltu",
2722     [TCG_COND_GEU] = "geu",
2723     [TCG_COND_LEU] = "leu",
2724     [TCG_COND_GTU] = "gtu",
2725     [TCG_COND_TSTEQ] = "tsteq",
2726     [TCG_COND_TSTNE] = "tstne",
2727 };
2728 
2729 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2730 {
2731     [MO_UB]   = "ub",
2732     [MO_SB]   = "sb",
2733     [MO_LEUW] = "leuw",
2734     [MO_LESW] = "lesw",
2735     [MO_LEUL] = "leul",
2736     [MO_LESL] = "lesl",
2737     [MO_LEUQ] = "leq",
2738     [MO_BEUW] = "beuw",
2739     [MO_BESW] = "besw",
2740     [MO_BEUL] = "beul",
2741     [MO_BESL] = "besl",
2742     [MO_BEUQ] = "beq",
2743     [MO_128 + MO_BE] = "beo",
2744     [MO_128 + MO_LE] = "leo",
2745 };
2746 
2747 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2748     [MO_UNALN >> MO_ASHIFT]    = "un+",
2749     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2750     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2751     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2752     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2753     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2754     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2755     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2756 };
2757 
2758 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2759     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2760     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2761     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2762     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2763     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2764     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2765 };
2766 
2767 static const char bswap_flag_name[][6] = {
2768     [TCG_BSWAP_IZ] = "iz",
2769     [TCG_BSWAP_OZ] = "oz",
2770     [TCG_BSWAP_OS] = "os",
2771     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2772     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2773 };
2774 
2775 #ifdef CONFIG_PLUGIN
2776 static const char * const plugin_from_name[] = {
2777     "from-tb",
2778     "from-insn",
2779     "after-insn",
2780     "after-tb",
2781 };
2782 #endif
2783 
2784 static inline bool tcg_regset_single(TCGRegSet d)
2785 {
2786     return (d & (d - 1)) == 0;
2787 }
2788 
2789 static inline TCGReg tcg_regset_first(TCGRegSet d)
2790 {
2791     if (TCG_TARGET_NB_REGS <= 32) {
2792         return ctz32(d);
2793     } else {
2794         return ctz64(d);
2795     }
2796 }
2797 
2798 /* Return only the number of characters output -- no error return. */
2799 #define ne_fprintf(...) \
2800     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2801 
2802 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2803 {
2804     char buf[128];
2805     TCGOp *op;
2806 
2807     QTAILQ_FOREACH(op, &s->ops, link) {
2808         int i, k, nb_oargs, nb_iargs, nb_cargs;
2809         const TCGOpDef *def;
2810         TCGOpcode c;
2811         int col = 0;
2812 
2813         c = op->opc;
2814         def = &tcg_op_defs[c];
2815 
2816         if (c == INDEX_op_insn_start) {
2817             nb_oargs = 0;
2818             col += ne_fprintf(f, "\n ----");
2819 
2820             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2821                 col += ne_fprintf(f, " %016" PRIx64,
2822                                   tcg_get_insn_start_param(op, i));
2823             }
2824         } else if (c == INDEX_op_call) {
2825             const TCGHelperInfo *info = tcg_call_info(op);
2826             void *func = tcg_call_func(op);
2827 
2828             /* variable number of arguments */
2829             nb_oargs = TCGOP_CALLO(op);
2830             nb_iargs = TCGOP_CALLI(op);
2831             nb_cargs = def->nb_cargs;
2832 
2833             col += ne_fprintf(f, " %s ", def->name);
2834 
2835             /*
2836              * Print the function name from TCGHelperInfo, if available.
2837              * Note that plugins have a template function for the info,
2838              * but the actual function pointer comes from the plugin.
2839              */
2840             if (func == info->func) {
2841                 col += ne_fprintf(f, "%s", info->name);
2842             } else {
2843                 col += ne_fprintf(f, "plugin(%p)", func);
2844             }
2845 
2846             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2847             for (i = 0; i < nb_oargs; i++) {
2848                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2849                                                             op->args[i]));
2850             }
2851             for (i = 0; i < nb_iargs; i++) {
2852                 TCGArg arg = op->args[nb_oargs + i];
2853                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2854                 col += ne_fprintf(f, ",%s", t);
2855             }
2856         } else {
2857             if (def->flags & TCG_OPF_INT) {
2858                 col += ne_fprintf(f, " %s_i%d ",
2859                                   def->name,
2860                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2861             } else if (def->flags & TCG_OPF_VECTOR) {
2862                 col += ne_fprintf(f, "%s v%d,e%d,",
2863                                   def->name,
2864                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2865                                   8 << TCGOP_VECE(op));
2866             } else {
2867                 col += ne_fprintf(f, " %s ", def->name);
2868             }
2869 
2870             nb_oargs = def->nb_oargs;
2871             nb_iargs = def->nb_iargs;
2872             nb_cargs = def->nb_cargs;
2873 
2874             k = 0;
2875             for (i = 0; i < nb_oargs; i++) {
2876                 const char *sep =  k ? "," : "";
2877                 col += ne_fprintf(f, "%s%s", sep,
2878                                   tcg_get_arg_str(s, buf, sizeof(buf),
2879                                                   op->args[k++]));
2880             }
2881             for (i = 0; i < nb_iargs; i++) {
2882                 const char *sep =  k ? "," : "";
2883                 col += ne_fprintf(f, "%s%s", sep,
2884                                   tcg_get_arg_str(s, buf, sizeof(buf),
2885                                                   op->args[k++]));
2886             }
2887             switch (c) {
2888             case INDEX_op_brcond_i32:
2889             case INDEX_op_setcond_i32:
2890             case INDEX_op_negsetcond_i32:
2891             case INDEX_op_movcond_i32:
2892             case INDEX_op_brcond2_i32:
2893             case INDEX_op_setcond2_i32:
2894             case INDEX_op_brcond_i64:
2895             case INDEX_op_setcond_i64:
2896             case INDEX_op_negsetcond_i64:
2897             case INDEX_op_movcond_i64:
2898             case INDEX_op_cmp_vec:
2899             case INDEX_op_cmpsel_vec:
2900                 if (op->args[k] < ARRAY_SIZE(cond_name)
2901                     && cond_name[op->args[k]]) {
2902                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2903                 } else {
2904                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2905                 }
2906                 i = 1;
2907                 break;
2908             case INDEX_op_qemu_ld_i32:
2909             case INDEX_op_qemu_st_i32:
2910             case INDEX_op_qemu_st8_i32:
2911             case INDEX_op_qemu_ld_i64:
2912             case INDEX_op_qemu_st_i64:
2913             case INDEX_op_qemu_ld_i128:
2914             case INDEX_op_qemu_st_i128:
2915                 {
2916                     const char *s_al, *s_op, *s_at;
2917                     MemOpIdx oi = op->args[k++];
2918                     MemOp mop = get_memop(oi);
2919                     unsigned ix = get_mmuidx(oi);
2920 
2921                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2922                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2923                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2924                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2925 
2926                     /* If all fields are accounted for, print symbolically. */
2927                     if (!mop && s_al && s_op && s_at) {
2928                         col += ne_fprintf(f, ",%s%s%s,%u",
2929                                           s_at, s_al, s_op, ix);
2930                     } else {
2931                         mop = get_memop(oi);
2932                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2933                     }
2934                     i = 1;
2935                 }
2936                 break;
2937             case INDEX_op_bswap16_i32:
2938             case INDEX_op_bswap16_i64:
2939             case INDEX_op_bswap32_i32:
2940             case INDEX_op_bswap32_i64:
2941             case INDEX_op_bswap64_i64:
2942                 {
2943                     TCGArg flags = op->args[k];
2944                     const char *name = NULL;
2945 
2946                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2947                         name = bswap_flag_name[flags];
2948                     }
2949                     if (name) {
2950                         col += ne_fprintf(f, ",%s", name);
2951                     } else {
2952                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2953                     }
2954                     i = k = 1;
2955                 }
2956                 break;
2957 #ifdef CONFIG_PLUGIN
2958             case INDEX_op_plugin_cb:
2959                 {
2960                     TCGArg from = op->args[k++];
2961                     const char *name = NULL;
2962 
2963                     if (from < ARRAY_SIZE(plugin_from_name)) {
2964                         name = plugin_from_name[from];
2965                     }
2966                     if (name) {
2967                         col += ne_fprintf(f, "%s", name);
2968                     } else {
2969                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2970                     }
2971                     i = 1;
2972                 }
2973                 break;
2974 #endif
2975             default:
2976                 i = 0;
2977                 break;
2978             }
2979             switch (c) {
2980             case INDEX_op_set_label:
2981             case INDEX_op_br:
2982             case INDEX_op_brcond_i32:
2983             case INDEX_op_brcond_i64:
2984             case INDEX_op_brcond2_i32:
2985                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2986                                   arg_label(op->args[k])->id);
2987                 i++, k++;
2988                 break;
2989             case INDEX_op_mb:
2990                 {
2991                     TCGBar membar = op->args[k];
2992                     const char *b_op, *m_op;
2993 
2994                     switch (membar & TCG_BAR_SC) {
2995                     case 0:
2996                         b_op = "none";
2997                         break;
2998                     case TCG_BAR_LDAQ:
2999                         b_op = "acq";
3000                         break;
3001                     case TCG_BAR_STRL:
3002                         b_op = "rel";
3003                         break;
3004                     case TCG_BAR_SC:
3005                         b_op = "seq";
3006                         break;
3007                     default:
3008                         g_assert_not_reached();
3009                     }
3010 
3011                     switch (membar & TCG_MO_ALL) {
3012                     case 0:
3013                         m_op = "none";
3014                         break;
3015                     case TCG_MO_LD_LD:
3016                         m_op = "rr";
3017                         break;
3018                     case TCG_MO_LD_ST:
3019                         m_op = "rw";
3020                         break;
3021                     case TCG_MO_ST_LD:
3022                         m_op = "wr";
3023                         break;
3024                     case TCG_MO_ST_ST:
3025                         m_op = "ww";
3026                         break;
3027                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3028                         m_op = "rr+rw";
3029                         break;
3030                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3031                         m_op = "rr+wr";
3032                         break;
3033                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3034                         m_op = "rr+ww";
3035                         break;
3036                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3037                         m_op = "rw+wr";
3038                         break;
3039                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3040                         m_op = "rw+ww";
3041                         break;
3042                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3043                         m_op = "wr+ww";
3044                         break;
3045                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3046                         m_op = "rr+rw+wr";
3047                         break;
3048                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3049                         m_op = "rr+rw+ww";
3050                         break;
3051                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3052                         m_op = "rr+wr+ww";
3053                         break;
3054                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3055                         m_op = "rw+wr+ww";
3056                         break;
3057                     case TCG_MO_ALL:
3058                         m_op = "all";
3059                         break;
3060                     default:
3061                         g_assert_not_reached();
3062                     }
3063 
3064                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3065                     i++, k++;
3066                 }
3067                 break;
3068             default:
3069                 break;
3070             }
3071             for (; i < nb_cargs; i++, k++) {
3072                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3073                                   op->args[k]);
3074             }
3075         }
3076 
3077         if (have_prefs || op->life) {
3078             for (; col < 40; ++col) {
3079                 putc(' ', f);
3080             }
3081         }
3082 
3083         if (op->life) {
3084             unsigned life = op->life;
3085 
3086             if (life & (SYNC_ARG * 3)) {
3087                 ne_fprintf(f, "  sync:");
3088                 for (i = 0; i < 2; ++i) {
3089                     if (life & (SYNC_ARG << i)) {
3090                         ne_fprintf(f, " %d", i);
3091                     }
3092                 }
3093             }
3094             life /= DEAD_ARG;
3095             if (life) {
3096                 ne_fprintf(f, "  dead:");
3097                 for (i = 0; life; ++i, life >>= 1) {
3098                     if (life & 1) {
3099                         ne_fprintf(f, " %d", i);
3100                     }
3101                 }
3102             }
3103         }
3104 
3105         if (have_prefs) {
3106             for (i = 0; i < nb_oargs; ++i) {
3107                 TCGRegSet set = output_pref(op, i);
3108 
3109                 if (i == 0) {
3110                     ne_fprintf(f, "  pref=");
3111                 } else {
3112                     ne_fprintf(f, ",");
3113                 }
3114                 if (set == 0) {
3115                     ne_fprintf(f, "none");
3116                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3117                     ne_fprintf(f, "all");
3118 #ifdef CONFIG_DEBUG_TCG
3119                 } else if (tcg_regset_single(set)) {
3120                     TCGReg reg = tcg_regset_first(set);
3121                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3122 #endif
3123                 } else if (TCG_TARGET_NB_REGS <= 32) {
3124                     ne_fprintf(f, "0x%x", (uint32_t)set);
3125                 } else {
3126                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3127                 }
3128             }
3129         }
3130 
3131         putc('\n', f);
3132     }
3133 }
3134 
3135 /* we give more priority to constraints with less registers */
3136 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3137 {
3138     int n;
3139 
3140     arg_ct += k;
3141     n = ctpop64(arg_ct->regs);
3142 
3143     /*
3144      * Sort constraints of a single register first, which includes output
3145      * aliases (which must exactly match the input already allocated).
3146      */
3147     if (n == 1 || arg_ct->oalias) {
3148         return INT_MAX;
3149     }
3150 
3151     /*
3152      * Sort register pairs next, first then second immediately after.
3153      * Arbitrarily sort multiple pairs by the index of the first reg;
3154      * there shouldn't be many pairs.
3155      */
3156     switch (arg_ct->pair) {
3157     case 1:
3158     case 3:
3159         return (k + 1) * 2;
3160     case 2:
3161         return (arg_ct->pair_index + 1) * 2 - 1;
3162     }
3163 
3164     /* Finally, sort by decreasing register count. */
3165     assert(n > 1);
3166     return -n;
3167 }
3168 
3169 /* sort from highest priority to lowest */
3170 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3171 {
3172     int i, j;
3173 
3174     for (i = 0; i < n; i++) {
3175         a[start + i].sort_index = start + i;
3176     }
3177     if (n <= 1) {
3178         return;
3179     }
3180     for (i = 0; i < n - 1; i++) {
3181         for (j = i + 1; j < n; j++) {
3182             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3183             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3184             if (p1 < p2) {
3185                 int tmp = a[start + i].sort_index;
3186                 a[start + i].sort_index = a[start + j].sort_index;
3187                 a[start + j].sort_index = tmp;
3188             }
3189         }
3190     }
3191 }
3192 
3193 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3194 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3195 
3196 static void process_constraint_sets(void)
3197 {
3198     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3199         const TCGConstraintSet *tdefs = &constraint_sets[c];
3200         TCGArgConstraint *args_ct = all_cts[c];
3201         int nb_oargs = tdefs->nb_oargs;
3202         int nb_iargs = tdefs->nb_iargs;
3203         int nb_args = nb_oargs + nb_iargs;
3204         bool saw_alias_pair = false;
3205 
3206         for (int i = 0; i < nb_args; i++) {
3207             const char *ct_str = tdefs->args_ct_str[i];
3208             bool input_p = i >= nb_oargs;
3209             int o;
3210 
3211             switch (*ct_str) {
3212             case '0' ... '9':
3213                 o = *ct_str - '0';
3214                 tcg_debug_assert(input_p);
3215                 tcg_debug_assert(o < nb_oargs);
3216                 tcg_debug_assert(args_ct[o].regs != 0);
3217                 tcg_debug_assert(!args_ct[o].oalias);
3218                 args_ct[i] = args_ct[o];
3219                 /* The output sets oalias.  */
3220                 args_ct[o].oalias = 1;
3221                 args_ct[o].alias_index = i;
3222                 /* The input sets ialias. */
3223                 args_ct[i].ialias = 1;
3224                 args_ct[i].alias_index = o;
3225                 if (args_ct[i].pair) {
3226                     saw_alias_pair = true;
3227                 }
3228                 tcg_debug_assert(ct_str[1] == '\0');
3229                 continue;
3230 
3231             case '&':
3232                 tcg_debug_assert(!input_p);
3233                 args_ct[i].newreg = true;
3234                 ct_str++;
3235                 break;
3236 
3237             case 'p': /* plus */
3238                 /* Allocate to the register after the previous. */
3239                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3240                 o = i - 1;
3241                 tcg_debug_assert(!args_ct[o].pair);
3242                 tcg_debug_assert(!args_ct[o].ct);
3243                 args_ct[i] = (TCGArgConstraint){
3244                     .pair = 2,
3245                     .pair_index = o,
3246                     .regs = args_ct[o].regs << 1,
3247                     .newreg = args_ct[o].newreg,
3248                 };
3249                 args_ct[o].pair = 1;
3250                 args_ct[o].pair_index = i;
3251                 tcg_debug_assert(ct_str[1] == '\0');
3252                 continue;
3253 
3254             case 'm': /* minus */
3255                 /* Allocate to the register before the previous. */
3256                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3257                 o = i - 1;
3258                 tcg_debug_assert(!args_ct[o].pair);
3259                 tcg_debug_assert(!args_ct[o].ct);
3260                 args_ct[i] = (TCGArgConstraint){
3261                     .pair = 1,
3262                     .pair_index = o,
3263                     .regs = args_ct[o].regs >> 1,
3264                     .newreg = args_ct[o].newreg,
3265                 };
3266                 args_ct[o].pair = 2;
3267                 args_ct[o].pair_index = i;
3268                 tcg_debug_assert(ct_str[1] == '\0');
3269                 continue;
3270             }
3271 
3272             do {
3273                 switch (*ct_str) {
3274                 case 'i':
3275                     args_ct[i].ct |= TCG_CT_CONST;
3276                     break;
3277 #ifdef TCG_REG_ZERO
3278                 case 'z':
3279                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3280                     break;
3281 #endif
3282 
3283                 /* Include all of the target-specific constraints. */
3284 
3285 #undef CONST
3286 #define CONST(CASE, MASK) \
3287     case CASE: args_ct[i].ct |= MASK; break;
3288 #define REGS(CASE, MASK) \
3289     case CASE: args_ct[i].regs |= MASK; break;
3290 
3291 #include "tcg-target-con-str.h"
3292 
3293 #undef REGS
3294 #undef CONST
3295                 default:
3296                 case '0' ... '9':
3297                 case '&':
3298                 case 'p':
3299                 case 'm':
3300                     /* Typo in TCGConstraintSet constraint. */
3301                     g_assert_not_reached();
3302                 }
3303             } while (*++ct_str != '\0');
3304         }
3305 
3306         /*
3307          * Fix up output pairs that are aliased with inputs.
3308          * When we created the alias, we copied pair from the output.
3309          * There are three cases:
3310          *    (1a) Pairs of inputs alias pairs of outputs.
3311          *    (1b) One input aliases the first of a pair of outputs.
3312          *    (2)  One input aliases the second of a pair of outputs.
3313          *
3314          * Case 1a is handled by making sure that the pair_index'es are
3315          * properly updated so that they appear the same as a pair of inputs.
3316          *
3317          * Case 1b is handled by setting the pair_index of the input to
3318          * itself, simply so it doesn't point to an unrelated argument.
3319          * Since we don't encounter the "second" during the input allocation
3320          * phase, nothing happens with the second half of the input pair.
3321          *
3322          * Case 2 is handled by setting the second input to pair=3, the
3323          * first output to pair=3, and the pair_index'es to match.
3324          */
3325         if (saw_alias_pair) {
3326             for (int i = nb_oargs; i < nb_args; i++) {
3327                 int o, o2, i2;
3328 
3329                 /*
3330                  * Since [0-9pm] must be alone in the constraint string,
3331                  * the only way they can both be set is if the pair comes
3332                  * from the output alias.
3333                  */
3334                 if (!args_ct[i].ialias) {
3335                     continue;
3336                 }
3337                 switch (args_ct[i].pair) {
3338                 case 0:
3339                     break;
3340                 case 1:
3341                     o = args_ct[i].alias_index;
3342                     o2 = args_ct[o].pair_index;
3343                     tcg_debug_assert(args_ct[o].pair == 1);
3344                     tcg_debug_assert(args_ct[o2].pair == 2);
3345                     if (args_ct[o2].oalias) {
3346                         /* Case 1a */
3347                         i2 = args_ct[o2].alias_index;
3348                         tcg_debug_assert(args_ct[i2].pair == 2);
3349                         args_ct[i2].pair_index = i;
3350                         args_ct[i].pair_index = i2;
3351                     } else {
3352                         /* Case 1b */
3353                         args_ct[i].pair_index = i;
3354                     }
3355                     break;
3356                 case 2:
3357                     o = args_ct[i].alias_index;
3358                     o2 = args_ct[o].pair_index;
3359                     tcg_debug_assert(args_ct[o].pair == 2);
3360                     tcg_debug_assert(args_ct[o2].pair == 1);
3361                     if (args_ct[o2].oalias) {
3362                         /* Case 1a */
3363                         i2 = args_ct[o2].alias_index;
3364                         tcg_debug_assert(args_ct[i2].pair == 1);
3365                         args_ct[i2].pair_index = i;
3366                         args_ct[i].pair_index = i2;
3367                     } else {
3368                         /* Case 2 */
3369                         args_ct[i].pair = 3;
3370                         args_ct[o2].pair = 3;
3371                         args_ct[i].pair_index = o2;
3372                         args_ct[o2].pair_index = i;
3373                     }
3374                     break;
3375                 default:
3376                     g_assert_not_reached();
3377                 }
3378             }
3379         }
3380 
3381         /* sort the constraints (XXX: this is just an heuristic) */
3382         sort_constraints(args_ct, 0, nb_oargs);
3383         sort_constraints(args_ct, nb_oargs, nb_iargs);
3384     }
3385 }
3386 
3387 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3388 {
3389     TCGOpcode opc = op->opc;
3390     TCGType type = TCGOP_TYPE(op);
3391     unsigned flags = TCGOP_FLAGS(op);
3392     const TCGOpDef *def = &tcg_op_defs[opc];
3393     const TCGOutOp *outop = all_outop[opc];
3394     TCGConstraintSetIndex con_set;
3395 
3396     if (def->flags & TCG_OPF_NOT_PRESENT) {
3397         return empty_cts;
3398     }
3399 
3400     if (outop) {
3401         con_set = outop->static_constraint;
3402         if (con_set == C_Dynamic) {
3403             con_set = outop->dynamic_constraint(type, flags);
3404         }
3405     } else {
3406         con_set = tcg_target_op_def(opc, type, flags);
3407     }
3408     tcg_debug_assert(con_set >= 0);
3409     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3410 
3411     /* The constraint arguments must match TCGOpcode arguments. */
3412     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3413     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3414 
3415     return all_cts[con_set];
3416 }
3417 
3418 static void remove_label_use(TCGOp *op, int idx)
3419 {
3420     TCGLabel *label = arg_label(op->args[idx]);
3421     TCGLabelUse *use;
3422 
3423     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3424         if (use->op == op) {
3425             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3426             return;
3427         }
3428     }
3429     g_assert_not_reached();
3430 }
3431 
3432 void tcg_op_remove(TCGContext *s, TCGOp *op)
3433 {
3434     switch (op->opc) {
3435     case INDEX_op_br:
3436         remove_label_use(op, 0);
3437         break;
3438     case INDEX_op_brcond_i32:
3439     case INDEX_op_brcond_i64:
3440         remove_label_use(op, 3);
3441         break;
3442     case INDEX_op_brcond2_i32:
3443         remove_label_use(op, 5);
3444         break;
3445     default:
3446         break;
3447     }
3448 
3449     QTAILQ_REMOVE(&s->ops, op, link);
3450     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3451     s->nb_ops--;
3452 }
3453 
3454 void tcg_remove_ops_after(TCGOp *op)
3455 {
3456     TCGContext *s = tcg_ctx;
3457 
3458     while (true) {
3459         TCGOp *last = tcg_last_op();
3460         if (last == op) {
3461             return;
3462         }
3463         tcg_op_remove(s, last);
3464     }
3465 }
3466 
3467 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3468 {
3469     TCGContext *s = tcg_ctx;
3470     TCGOp *op = NULL;
3471 
3472     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3473         QTAILQ_FOREACH(op, &s->free_ops, link) {
3474             if (nargs <= op->nargs) {
3475                 QTAILQ_REMOVE(&s->free_ops, op, link);
3476                 nargs = op->nargs;
3477                 goto found;
3478             }
3479         }
3480     }
3481 
3482     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3483     nargs = MAX(4, nargs);
3484     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3485 
3486  found:
3487     memset(op, 0, offsetof(TCGOp, link));
3488     op->opc = opc;
3489     op->nargs = nargs;
3490 
3491     /* Check for bitfield overflow. */
3492     tcg_debug_assert(op->nargs == nargs);
3493 
3494     s->nb_ops++;
3495     return op;
3496 }
3497 
3498 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3499 {
3500     TCGOp *op = tcg_op_alloc(opc, nargs);
3501 
3502     if (tcg_ctx->emit_before_op) {
3503         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3504     } else {
3505         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3506     }
3507     return op;
3508 }
3509 
3510 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3511                             TCGOpcode opc, TCGType type, unsigned nargs)
3512 {
3513     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3514 
3515     TCGOP_TYPE(new_op) = type;
3516     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3517     return new_op;
3518 }
3519 
3520 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3521                            TCGOpcode opc, TCGType type, unsigned nargs)
3522 {
3523     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3524 
3525     TCGOP_TYPE(new_op) = type;
3526     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3527     return new_op;
3528 }
3529 
3530 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3531 {
3532     TCGLabelUse *u;
3533 
3534     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3535         TCGOp *op = u->op;
3536         switch (op->opc) {
3537         case INDEX_op_br:
3538             op->args[0] = label_arg(to);
3539             break;
3540         case INDEX_op_brcond_i32:
3541         case INDEX_op_brcond_i64:
3542             op->args[3] = label_arg(to);
3543             break;
3544         case INDEX_op_brcond2_i32:
3545             op->args[5] = label_arg(to);
3546             break;
3547         default:
3548             g_assert_not_reached();
3549         }
3550     }
3551 
3552     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3553 }
3554 
3555 /* Reachable analysis : remove unreachable code.  */
3556 static void __attribute__((noinline))
3557 reachable_code_pass(TCGContext *s)
3558 {
3559     TCGOp *op, *op_next, *op_prev;
3560     bool dead = false;
3561 
3562     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3563         bool remove = dead;
3564         TCGLabel *label;
3565 
3566         switch (op->opc) {
3567         case INDEX_op_set_label:
3568             label = arg_label(op->args[0]);
3569 
3570             /*
3571              * Note that the first op in the TB is always a load,
3572              * so there is always something before a label.
3573              */
3574             op_prev = QTAILQ_PREV(op, link);
3575 
3576             /*
3577              * If we find two sequential labels, move all branches to
3578              * reference the second label and remove the first label.
3579              * Do this before branch to next optimization, so that the
3580              * middle label is out of the way.
3581              */
3582             if (op_prev->opc == INDEX_op_set_label) {
3583                 move_label_uses(label, arg_label(op_prev->args[0]));
3584                 tcg_op_remove(s, op_prev);
3585                 op_prev = QTAILQ_PREV(op, link);
3586             }
3587 
3588             /*
3589              * Optimization can fold conditional branches to unconditional.
3590              * If we find a label which is preceded by an unconditional
3591              * branch to next, remove the branch.  We couldn't do this when
3592              * processing the branch because any dead code between the branch
3593              * and label had not yet been removed.
3594              */
3595             if (op_prev->opc == INDEX_op_br &&
3596                 label == arg_label(op_prev->args[0])) {
3597                 tcg_op_remove(s, op_prev);
3598                 /* Fall through means insns become live again.  */
3599                 dead = false;
3600             }
3601 
3602             if (QSIMPLEQ_EMPTY(&label->branches)) {
3603                 /*
3604                  * While there is an occasional backward branch, virtually
3605                  * all branches generated by the translators are forward.
3606                  * Which means that generally we will have already removed
3607                  * all references to the label that will be, and there is
3608                  * little to be gained by iterating.
3609                  */
3610                 remove = true;
3611             } else {
3612                 /* Once we see a label, insns become live again.  */
3613                 dead = false;
3614                 remove = false;
3615             }
3616             break;
3617 
3618         case INDEX_op_br:
3619         case INDEX_op_exit_tb:
3620         case INDEX_op_goto_ptr:
3621             /* Unconditional branches; everything following is dead.  */
3622             dead = true;
3623             break;
3624 
3625         case INDEX_op_call:
3626             /* Notice noreturn helper calls, raising exceptions.  */
3627             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3628                 dead = true;
3629             }
3630             break;
3631 
3632         case INDEX_op_insn_start:
3633             /* Never remove -- we need to keep these for unwind.  */
3634             remove = false;
3635             break;
3636 
3637         default:
3638             break;
3639         }
3640 
3641         if (remove) {
3642             tcg_op_remove(s, op);
3643         }
3644     }
3645 }
3646 
3647 #define TS_DEAD  1
3648 #define TS_MEM   2
3649 
3650 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3651 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3652 
3653 /* For liveness_pass_1, the register preferences for a given temp.  */
3654 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3655 {
3656     return ts->state_ptr;
3657 }
3658 
3659 /* For liveness_pass_1, reset the preferences for a given temp to the
3660  * maximal regset for its type.
3661  */
3662 static inline void la_reset_pref(TCGTemp *ts)
3663 {
3664     *la_temp_pref(ts)
3665         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3666 }
3667 
3668 /* liveness analysis: end of function: all temps are dead, and globals
3669    should be in memory. */
3670 static void la_func_end(TCGContext *s, int ng, int nt)
3671 {
3672     int i;
3673 
3674     for (i = 0; i < ng; ++i) {
3675         s->temps[i].state = TS_DEAD | TS_MEM;
3676         la_reset_pref(&s->temps[i]);
3677     }
3678     for (i = ng; i < nt; ++i) {
3679         s->temps[i].state = TS_DEAD;
3680         la_reset_pref(&s->temps[i]);
3681     }
3682 }
3683 
3684 /* liveness analysis: end of basic block: all temps are dead, globals
3685    and local temps should be in memory. */
3686 static void la_bb_end(TCGContext *s, int ng, int nt)
3687 {
3688     int i;
3689 
3690     for (i = 0; i < nt; ++i) {
3691         TCGTemp *ts = &s->temps[i];
3692         int state;
3693 
3694         switch (ts->kind) {
3695         case TEMP_FIXED:
3696         case TEMP_GLOBAL:
3697         case TEMP_TB:
3698             state = TS_DEAD | TS_MEM;
3699             break;
3700         case TEMP_EBB:
3701         case TEMP_CONST:
3702             state = TS_DEAD;
3703             break;
3704         default:
3705             g_assert_not_reached();
3706         }
3707         ts->state = state;
3708         la_reset_pref(ts);
3709     }
3710 }
3711 
3712 /* liveness analysis: sync globals back to memory.  */
3713 static void la_global_sync(TCGContext *s, int ng)
3714 {
3715     int i;
3716 
3717     for (i = 0; i < ng; ++i) {
3718         int state = s->temps[i].state;
3719         s->temps[i].state = state | TS_MEM;
3720         if (state == TS_DEAD) {
3721             /* If the global was previously dead, reset prefs.  */
3722             la_reset_pref(&s->temps[i]);
3723         }
3724     }
3725 }
3726 
3727 /*
3728  * liveness analysis: conditional branch: all temps are dead unless
3729  * explicitly live-across-conditional-branch, globals and local temps
3730  * should be synced.
3731  */
3732 static void la_bb_sync(TCGContext *s, int ng, int nt)
3733 {
3734     la_global_sync(s, ng);
3735 
3736     for (int i = ng; i < nt; ++i) {
3737         TCGTemp *ts = &s->temps[i];
3738         int state;
3739 
3740         switch (ts->kind) {
3741         case TEMP_TB:
3742             state = ts->state;
3743             ts->state = state | TS_MEM;
3744             if (state != TS_DEAD) {
3745                 continue;
3746             }
3747             break;
3748         case TEMP_EBB:
3749         case TEMP_CONST:
3750             continue;
3751         default:
3752             g_assert_not_reached();
3753         }
3754         la_reset_pref(&s->temps[i]);
3755     }
3756 }
3757 
3758 /* liveness analysis: sync globals back to memory and kill.  */
3759 static void la_global_kill(TCGContext *s, int ng)
3760 {
3761     int i;
3762 
3763     for (i = 0; i < ng; i++) {
3764         s->temps[i].state = TS_DEAD | TS_MEM;
3765         la_reset_pref(&s->temps[i]);
3766     }
3767 }
3768 
3769 /* liveness analysis: note live globals crossing calls.  */
3770 static void la_cross_call(TCGContext *s, int nt)
3771 {
3772     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3773     int i;
3774 
3775     for (i = 0; i < nt; i++) {
3776         TCGTemp *ts = &s->temps[i];
3777         if (!(ts->state & TS_DEAD)) {
3778             TCGRegSet *pset = la_temp_pref(ts);
3779             TCGRegSet set = *pset;
3780 
3781             set &= mask;
3782             /* If the combination is not possible, restart.  */
3783             if (set == 0) {
3784                 set = tcg_target_available_regs[ts->type] & mask;
3785             }
3786             *pset = set;
3787         }
3788     }
3789 }
3790 
3791 /*
3792  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3793  * to TEMP_EBB, if possible.
3794  */
3795 static void __attribute__((noinline))
3796 liveness_pass_0(TCGContext *s)
3797 {
3798     void * const multiple_ebb = (void *)(uintptr_t)-1;
3799     int nb_temps = s->nb_temps;
3800     TCGOp *op, *ebb;
3801 
3802     for (int i = s->nb_globals; i < nb_temps; ++i) {
3803         s->temps[i].state_ptr = NULL;
3804     }
3805 
3806     /*
3807      * Represent each EBB by the op at which it begins.  In the case of
3808      * the first EBB, this is the first op, otherwise it is a label.
3809      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3810      * within a single EBB, else MULTIPLE_EBB.
3811      */
3812     ebb = QTAILQ_FIRST(&s->ops);
3813     QTAILQ_FOREACH(op, &s->ops, link) {
3814         const TCGOpDef *def;
3815         int nb_oargs, nb_iargs;
3816 
3817         switch (op->opc) {
3818         case INDEX_op_set_label:
3819             ebb = op;
3820             continue;
3821         case INDEX_op_discard:
3822             continue;
3823         case INDEX_op_call:
3824             nb_oargs = TCGOP_CALLO(op);
3825             nb_iargs = TCGOP_CALLI(op);
3826             break;
3827         default:
3828             def = &tcg_op_defs[op->opc];
3829             nb_oargs = def->nb_oargs;
3830             nb_iargs = def->nb_iargs;
3831             break;
3832         }
3833 
3834         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3835             TCGTemp *ts = arg_temp(op->args[i]);
3836 
3837             if (ts->kind != TEMP_TB) {
3838                 continue;
3839             }
3840             if (ts->state_ptr == NULL) {
3841                 ts->state_ptr = ebb;
3842             } else if (ts->state_ptr != ebb) {
3843                 ts->state_ptr = multiple_ebb;
3844             }
3845         }
3846     }
3847 
3848     /*
3849      * For TEMP_TB that turned out not to be used beyond one EBB,
3850      * reduce the liveness to TEMP_EBB.
3851      */
3852     for (int i = s->nb_globals; i < nb_temps; ++i) {
3853         TCGTemp *ts = &s->temps[i];
3854         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3855             ts->kind = TEMP_EBB;
3856         }
3857     }
3858 }
3859 
3860 /* Liveness analysis : update the opc_arg_life array to tell if a
3861    given input arguments is dead. Instructions updating dead
3862    temporaries are removed. */
3863 static void __attribute__((noinline))
3864 liveness_pass_1(TCGContext *s)
3865 {
3866     int nb_globals = s->nb_globals;
3867     int nb_temps = s->nb_temps;
3868     TCGOp *op, *op_prev;
3869     TCGRegSet *prefs;
3870     int i;
3871 
3872     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3873     for (i = 0; i < nb_temps; ++i) {
3874         s->temps[i].state_ptr = prefs + i;
3875     }
3876 
3877     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3878     la_func_end(s, nb_globals, nb_temps);
3879 
3880     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3881         int nb_iargs, nb_oargs;
3882         TCGOpcode opc_new, opc_new2;
3883         bool have_opc_new2;
3884         TCGLifeData arg_life = 0;
3885         TCGTemp *ts;
3886         TCGOpcode opc = op->opc;
3887         const TCGOpDef *def = &tcg_op_defs[opc];
3888         const TCGArgConstraint *args_ct;
3889 
3890         switch (opc) {
3891         case INDEX_op_call:
3892             {
3893                 const TCGHelperInfo *info = tcg_call_info(op);
3894                 int call_flags = tcg_call_flags(op);
3895 
3896                 nb_oargs = TCGOP_CALLO(op);
3897                 nb_iargs = TCGOP_CALLI(op);
3898 
3899                 /* pure functions can be removed if their result is unused */
3900                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3901                     for (i = 0; i < nb_oargs; i++) {
3902                         ts = arg_temp(op->args[i]);
3903                         if (ts->state != TS_DEAD) {
3904                             goto do_not_remove_call;
3905                         }
3906                     }
3907                     goto do_remove;
3908                 }
3909             do_not_remove_call:
3910 
3911                 /* Output args are dead.  */
3912                 for (i = 0; i < nb_oargs; i++) {
3913                     ts = arg_temp(op->args[i]);
3914                     if (ts->state & TS_DEAD) {
3915                         arg_life |= DEAD_ARG << i;
3916                     }
3917                     if (ts->state & TS_MEM) {
3918                         arg_life |= SYNC_ARG << i;
3919                     }
3920                     ts->state = TS_DEAD;
3921                     la_reset_pref(ts);
3922                 }
3923 
3924                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3925                 memset(op->output_pref, 0, sizeof(op->output_pref));
3926 
3927                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3928                                     TCG_CALL_NO_READ_GLOBALS))) {
3929                     la_global_kill(s, nb_globals);
3930                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3931                     la_global_sync(s, nb_globals);
3932                 }
3933 
3934                 /* Record arguments that die in this helper.  */
3935                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3936                     ts = arg_temp(op->args[i]);
3937                     if (ts->state & TS_DEAD) {
3938                         arg_life |= DEAD_ARG << i;
3939                     }
3940                 }
3941 
3942                 /* For all live registers, remove call-clobbered prefs.  */
3943                 la_cross_call(s, nb_temps);
3944 
3945                 /*
3946                  * Input arguments are live for preceding opcodes.
3947                  *
3948                  * For those arguments that die, and will be allocated in
3949                  * registers, clear the register set for that arg, to be
3950                  * filled in below.  For args that will be on the stack,
3951                  * reset to any available reg.  Process arguments in reverse
3952                  * order so that if a temp is used more than once, the stack
3953                  * reset to max happens before the register reset to 0.
3954                  */
3955                 for (i = nb_iargs - 1; i >= 0; i--) {
3956                     const TCGCallArgumentLoc *loc = &info->in[i];
3957                     ts = arg_temp(op->args[nb_oargs + i]);
3958 
3959                     if (ts->state & TS_DEAD) {
3960                         switch (loc->kind) {
3961                         case TCG_CALL_ARG_NORMAL:
3962                         case TCG_CALL_ARG_EXTEND_U:
3963                         case TCG_CALL_ARG_EXTEND_S:
3964                             if (arg_slot_reg_p(loc->arg_slot)) {
3965                                 *la_temp_pref(ts) = 0;
3966                                 break;
3967                             }
3968                             /* fall through */
3969                         default:
3970                             *la_temp_pref(ts) =
3971                                 tcg_target_available_regs[ts->type];
3972                             break;
3973                         }
3974                         ts->state &= ~TS_DEAD;
3975                     }
3976                 }
3977 
3978                 /*
3979                  * For each input argument, add its input register to prefs.
3980                  * If a temp is used once, this produces a single set bit;
3981                  * if a temp is used multiple times, this produces a set.
3982                  */
3983                 for (i = 0; i < nb_iargs; i++) {
3984                     const TCGCallArgumentLoc *loc = &info->in[i];
3985                     ts = arg_temp(op->args[nb_oargs + i]);
3986 
3987                     switch (loc->kind) {
3988                     case TCG_CALL_ARG_NORMAL:
3989                     case TCG_CALL_ARG_EXTEND_U:
3990                     case TCG_CALL_ARG_EXTEND_S:
3991                         if (arg_slot_reg_p(loc->arg_slot)) {
3992                             tcg_regset_set_reg(*la_temp_pref(ts),
3993                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3994                         }
3995                         break;
3996                     default:
3997                         break;
3998                     }
3999                 }
4000             }
4001             break;
4002         case INDEX_op_insn_start:
4003             break;
4004         case INDEX_op_discard:
4005             /* mark the temporary as dead */
4006             ts = arg_temp(op->args[0]);
4007             ts->state = TS_DEAD;
4008             la_reset_pref(ts);
4009             break;
4010 
4011         case INDEX_op_add2_i32:
4012         case INDEX_op_add2_i64:
4013             opc_new = INDEX_op_add;
4014             goto do_addsub2;
4015         case INDEX_op_sub2_i32:
4016         case INDEX_op_sub2_i64:
4017             opc_new = INDEX_op_sub;
4018         do_addsub2:
4019             nb_iargs = 4;
4020             nb_oargs = 2;
4021             /* Test if the high part of the operation is dead, but not
4022                the low part.  The result can be optimized to a simple
4023                add or sub.  This happens often for x86_64 guest when the
4024                cpu mode is set to 32 bit.  */
4025             if (arg_temp(op->args[1])->state == TS_DEAD) {
4026                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4027                     goto do_remove;
4028                 }
4029                 /* Replace the opcode and adjust the args in place,
4030                    leaving 3 unused args at the end.  */
4031                 op->opc = opc = opc_new;
4032                 op->args[1] = op->args[2];
4033                 op->args[2] = op->args[4];
4034                 /* Fall through and mark the single-word operation live.  */
4035                 nb_iargs = 2;
4036                 nb_oargs = 1;
4037             }
4038             goto do_not_remove;
4039 
4040         case INDEX_op_mulu2_i32:
4041             opc_new = INDEX_op_mul_i32;
4042             opc_new2 = INDEX_op_muluh_i32;
4043             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
4044             goto do_mul2;
4045         case INDEX_op_muls2_i32:
4046             opc_new = INDEX_op_mul_i32;
4047             opc_new2 = INDEX_op_mulsh_i32;
4048             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
4049             goto do_mul2;
4050         case INDEX_op_mulu2_i64:
4051             opc_new = INDEX_op_mul_i64;
4052             opc_new2 = INDEX_op_muluh_i64;
4053             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
4054             goto do_mul2;
4055         case INDEX_op_muls2_i64:
4056             opc_new = INDEX_op_mul_i64;
4057             opc_new2 = INDEX_op_mulsh_i64;
4058             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
4059             goto do_mul2;
4060         do_mul2:
4061             nb_iargs = 2;
4062             nb_oargs = 2;
4063             if (arg_temp(op->args[1])->state == TS_DEAD) {
4064                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4065                     /* Both parts of the operation are dead.  */
4066                     goto do_remove;
4067                 }
4068                 /* The high part of the operation is dead; generate the low. */
4069                 op->opc = opc = opc_new;
4070                 op->args[1] = op->args[2];
4071                 op->args[2] = op->args[3];
4072             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
4073                 /* The low part of the operation is dead; generate the high. */
4074                 op->opc = opc = opc_new2;
4075                 op->args[0] = op->args[1];
4076                 op->args[1] = op->args[2];
4077                 op->args[2] = op->args[3];
4078             } else {
4079                 goto do_not_remove;
4080             }
4081             /* Mark the single-word operation live.  */
4082             nb_oargs = 1;
4083             goto do_not_remove;
4084 
4085         default:
4086             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4087             nb_iargs = def->nb_iargs;
4088             nb_oargs = def->nb_oargs;
4089 
4090             /* Test if the operation can be removed because all
4091                its outputs are dead. We assume that nb_oargs == 0
4092                implies side effects */
4093             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4094                 for (i = 0; i < nb_oargs; i++) {
4095                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4096                         goto do_not_remove;
4097                     }
4098                 }
4099                 goto do_remove;
4100             }
4101             goto do_not_remove;
4102 
4103         do_remove:
4104             tcg_op_remove(s, op);
4105             break;
4106 
4107         do_not_remove:
4108             for (i = 0; i < nb_oargs; i++) {
4109                 ts = arg_temp(op->args[i]);
4110 
4111                 /* Remember the preference of the uses that followed.  */
4112                 if (i < ARRAY_SIZE(op->output_pref)) {
4113                     op->output_pref[i] = *la_temp_pref(ts);
4114                 }
4115 
4116                 /* Output args are dead.  */
4117                 if (ts->state & TS_DEAD) {
4118                     arg_life |= DEAD_ARG << i;
4119                 }
4120                 if (ts->state & TS_MEM) {
4121                     arg_life |= SYNC_ARG << i;
4122                 }
4123                 ts->state = TS_DEAD;
4124                 la_reset_pref(ts);
4125             }
4126 
4127             /* If end of basic block, update.  */
4128             if (def->flags & TCG_OPF_BB_EXIT) {
4129                 la_func_end(s, nb_globals, nb_temps);
4130             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4131                 la_bb_sync(s, nb_globals, nb_temps);
4132             } else if (def->flags & TCG_OPF_BB_END) {
4133                 la_bb_end(s, nb_globals, nb_temps);
4134             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4135                 la_global_sync(s, nb_globals);
4136                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4137                     la_cross_call(s, nb_temps);
4138                 }
4139             }
4140 
4141             /* Record arguments that die in this opcode.  */
4142             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4143                 ts = arg_temp(op->args[i]);
4144                 if (ts->state & TS_DEAD) {
4145                     arg_life |= DEAD_ARG << i;
4146                 }
4147             }
4148 
4149             /* Input arguments are live for preceding opcodes.  */
4150             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4151                 ts = arg_temp(op->args[i]);
4152                 if (ts->state & TS_DEAD) {
4153                     /* For operands that were dead, initially allow
4154                        all regs for the type.  */
4155                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4156                     ts->state &= ~TS_DEAD;
4157                 }
4158             }
4159 
4160             /* Incorporate constraints for this operand.  */
4161             switch (opc) {
4162             case INDEX_op_mov:
4163                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4164                    have proper constraints.  That said, special case
4165                    moves to propagate preferences backward.  */
4166                 if (IS_DEAD_ARG(1)) {
4167                     *la_temp_pref(arg_temp(op->args[0]))
4168                         = *la_temp_pref(arg_temp(op->args[1]));
4169                 }
4170                 break;
4171 
4172             default:
4173                 args_ct = opcode_args_ct(op);
4174                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4175                     const TCGArgConstraint *ct = &args_ct[i];
4176                     TCGRegSet set, *pset;
4177 
4178                     ts = arg_temp(op->args[i]);
4179                     pset = la_temp_pref(ts);
4180                     set = *pset;
4181 
4182                     set &= ct->regs;
4183                     if (ct->ialias) {
4184                         set &= output_pref(op, ct->alias_index);
4185                     }
4186                     /* If the combination is not possible, restart.  */
4187                     if (set == 0) {
4188                         set = ct->regs;
4189                     }
4190                     *pset = set;
4191                 }
4192                 break;
4193             }
4194             break;
4195         }
4196         op->life = arg_life;
4197     }
4198 }
4199 
4200 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4201 static bool __attribute__((noinline))
4202 liveness_pass_2(TCGContext *s)
4203 {
4204     int nb_globals = s->nb_globals;
4205     int nb_temps, i;
4206     bool changes = false;
4207     TCGOp *op, *op_next;
4208 
4209     /* Create a temporary for each indirect global.  */
4210     for (i = 0; i < nb_globals; ++i) {
4211         TCGTemp *its = &s->temps[i];
4212         if (its->indirect_reg) {
4213             TCGTemp *dts = tcg_temp_alloc(s);
4214             dts->type = its->type;
4215             dts->base_type = its->base_type;
4216             dts->temp_subindex = its->temp_subindex;
4217             dts->kind = TEMP_EBB;
4218             its->state_ptr = dts;
4219         } else {
4220             its->state_ptr = NULL;
4221         }
4222         /* All globals begin dead.  */
4223         its->state = TS_DEAD;
4224     }
4225     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4226         TCGTemp *its = &s->temps[i];
4227         its->state_ptr = NULL;
4228         its->state = TS_DEAD;
4229     }
4230 
4231     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4232         TCGOpcode opc = op->opc;
4233         const TCGOpDef *def = &tcg_op_defs[opc];
4234         TCGLifeData arg_life = op->life;
4235         int nb_iargs, nb_oargs, call_flags;
4236         TCGTemp *arg_ts, *dir_ts;
4237 
4238         if (opc == INDEX_op_call) {
4239             nb_oargs = TCGOP_CALLO(op);
4240             nb_iargs = TCGOP_CALLI(op);
4241             call_flags = tcg_call_flags(op);
4242         } else {
4243             nb_iargs = def->nb_iargs;
4244             nb_oargs = def->nb_oargs;
4245 
4246             /* Set flags similar to how calls require.  */
4247             if (def->flags & TCG_OPF_COND_BRANCH) {
4248                 /* Like reading globals: sync_globals */
4249                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4250             } else if (def->flags & TCG_OPF_BB_END) {
4251                 /* Like writing globals: save_globals */
4252                 call_flags = 0;
4253             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4254                 /* Like reading globals: sync_globals */
4255                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4256             } else {
4257                 /* No effect on globals.  */
4258                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4259                               TCG_CALL_NO_WRITE_GLOBALS);
4260             }
4261         }
4262 
4263         /* Make sure that input arguments are available.  */
4264         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4265             arg_ts = arg_temp(op->args[i]);
4266             dir_ts = arg_ts->state_ptr;
4267             if (dir_ts && arg_ts->state == TS_DEAD) {
4268                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4269                                   ? INDEX_op_ld_i32
4270                                   : INDEX_op_ld_i64);
4271                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4272                                                   arg_ts->type, 3);
4273 
4274                 lop->args[0] = temp_arg(dir_ts);
4275                 lop->args[1] = temp_arg(arg_ts->mem_base);
4276                 lop->args[2] = arg_ts->mem_offset;
4277 
4278                 /* Loaded, but synced with memory.  */
4279                 arg_ts->state = TS_MEM;
4280             }
4281         }
4282 
4283         /* Perform input replacement, and mark inputs that became dead.
4284            No action is required except keeping temp_state up to date
4285            so that we reload when needed.  */
4286         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4287             arg_ts = arg_temp(op->args[i]);
4288             dir_ts = arg_ts->state_ptr;
4289             if (dir_ts) {
4290                 op->args[i] = temp_arg(dir_ts);
4291                 changes = true;
4292                 if (IS_DEAD_ARG(i)) {
4293                     arg_ts->state = TS_DEAD;
4294                 }
4295             }
4296         }
4297 
4298         /* Liveness analysis should ensure that the following are
4299            all correct, for call sites and basic block end points.  */
4300         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4301             /* Nothing to do */
4302         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4303             for (i = 0; i < nb_globals; ++i) {
4304                 /* Liveness should see that globals are synced back,
4305                    that is, either TS_DEAD or TS_MEM.  */
4306                 arg_ts = &s->temps[i];
4307                 tcg_debug_assert(arg_ts->state_ptr == 0
4308                                  || arg_ts->state != 0);
4309             }
4310         } else {
4311             for (i = 0; i < nb_globals; ++i) {
4312                 /* Liveness should see that globals are saved back,
4313                    that is, TS_DEAD, waiting to be reloaded.  */
4314                 arg_ts = &s->temps[i];
4315                 tcg_debug_assert(arg_ts->state_ptr == 0
4316                                  || arg_ts->state == TS_DEAD);
4317             }
4318         }
4319 
4320         /* Outputs become available.  */
4321         if (opc == INDEX_op_mov) {
4322             arg_ts = arg_temp(op->args[0]);
4323             dir_ts = arg_ts->state_ptr;
4324             if (dir_ts) {
4325                 op->args[0] = temp_arg(dir_ts);
4326                 changes = true;
4327 
4328                 /* The output is now live and modified.  */
4329                 arg_ts->state = 0;
4330 
4331                 if (NEED_SYNC_ARG(0)) {
4332                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4333                                       ? INDEX_op_st_i32
4334                                       : INDEX_op_st_i64);
4335                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4336                                                      arg_ts->type, 3);
4337                     TCGTemp *out_ts = dir_ts;
4338 
4339                     if (IS_DEAD_ARG(0)) {
4340                         out_ts = arg_temp(op->args[1]);
4341                         arg_ts->state = TS_DEAD;
4342                         tcg_op_remove(s, op);
4343                     } else {
4344                         arg_ts->state = TS_MEM;
4345                     }
4346 
4347                     sop->args[0] = temp_arg(out_ts);
4348                     sop->args[1] = temp_arg(arg_ts->mem_base);
4349                     sop->args[2] = arg_ts->mem_offset;
4350                 } else {
4351                     tcg_debug_assert(!IS_DEAD_ARG(0));
4352                 }
4353             }
4354         } else {
4355             for (i = 0; i < nb_oargs; i++) {
4356                 arg_ts = arg_temp(op->args[i]);
4357                 dir_ts = arg_ts->state_ptr;
4358                 if (!dir_ts) {
4359                     continue;
4360                 }
4361                 op->args[i] = temp_arg(dir_ts);
4362                 changes = true;
4363 
4364                 /* The output is now live and modified.  */
4365                 arg_ts->state = 0;
4366 
4367                 /* Sync outputs upon their last write.  */
4368                 if (NEED_SYNC_ARG(i)) {
4369                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4370                                       ? INDEX_op_st_i32
4371                                       : INDEX_op_st_i64);
4372                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4373                                                      arg_ts->type, 3);
4374 
4375                     sop->args[0] = temp_arg(dir_ts);
4376                     sop->args[1] = temp_arg(arg_ts->mem_base);
4377                     sop->args[2] = arg_ts->mem_offset;
4378 
4379                     arg_ts->state = TS_MEM;
4380                 }
4381                 /* Drop outputs that are dead.  */
4382                 if (IS_DEAD_ARG(i)) {
4383                     arg_ts->state = TS_DEAD;
4384                 }
4385             }
4386         }
4387     }
4388 
4389     return changes;
4390 }
4391 
4392 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4393 {
4394     intptr_t off;
4395     int size, align;
4396 
4397     /* When allocating an object, look at the full type. */
4398     size = tcg_type_size(ts->base_type);
4399     switch (ts->base_type) {
4400     case TCG_TYPE_I32:
4401         align = 4;
4402         break;
4403     case TCG_TYPE_I64:
4404     case TCG_TYPE_V64:
4405         align = 8;
4406         break;
4407     case TCG_TYPE_I128:
4408     case TCG_TYPE_V128:
4409     case TCG_TYPE_V256:
4410         /*
4411          * Note that we do not require aligned storage for V256,
4412          * and that we provide alignment for I128 to match V128,
4413          * even if that's above what the host ABI requires.
4414          */
4415         align = 16;
4416         break;
4417     default:
4418         g_assert_not_reached();
4419     }
4420 
4421     /*
4422      * Assume the stack is sufficiently aligned.
4423      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4424      * and do not require 16 byte vector alignment.  This seems slightly
4425      * easier than fully parameterizing the above switch statement.
4426      */
4427     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4428     off = ROUND_UP(s->current_frame_offset, align);
4429 
4430     /* If we've exhausted the stack frame, restart with a smaller TB. */
4431     if (off + size > s->frame_end) {
4432         tcg_raise_tb_overflow(s);
4433     }
4434     s->current_frame_offset = off + size;
4435 #if defined(__sparc__)
4436     off += TCG_TARGET_STACK_BIAS;
4437 #endif
4438 
4439     /* If the object was subdivided, assign memory to all the parts. */
4440     if (ts->base_type != ts->type) {
4441         int part_size = tcg_type_size(ts->type);
4442         int part_count = size / part_size;
4443 
4444         /*
4445          * Each part is allocated sequentially in tcg_temp_new_internal.
4446          * Jump back to the first part by subtracting the current index.
4447          */
4448         ts -= ts->temp_subindex;
4449         for (int i = 0; i < part_count; ++i) {
4450             ts[i].mem_offset = off + i * part_size;
4451             ts[i].mem_base = s->frame_temp;
4452             ts[i].mem_allocated = 1;
4453         }
4454     } else {
4455         ts->mem_offset = off;
4456         ts->mem_base = s->frame_temp;
4457         ts->mem_allocated = 1;
4458     }
4459 }
4460 
4461 /* Assign @reg to @ts, and update reg_to_temp[]. */
4462 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4463 {
4464     if (ts->val_type == TEMP_VAL_REG) {
4465         TCGReg old = ts->reg;
4466         tcg_debug_assert(s->reg_to_temp[old] == ts);
4467         if (old == reg) {
4468             return;
4469         }
4470         s->reg_to_temp[old] = NULL;
4471     }
4472     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4473     s->reg_to_temp[reg] = ts;
4474     ts->val_type = TEMP_VAL_REG;
4475     ts->reg = reg;
4476 }
4477 
4478 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4479 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4480 {
4481     tcg_debug_assert(type != TEMP_VAL_REG);
4482     if (ts->val_type == TEMP_VAL_REG) {
4483         TCGReg reg = ts->reg;
4484         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4485         s->reg_to_temp[reg] = NULL;
4486     }
4487     ts->val_type = type;
4488 }
4489 
4490 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4491 
4492 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4493    mark it free; otherwise mark it dead.  */
4494 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4495 {
4496     TCGTempVal new_type;
4497 
4498     switch (ts->kind) {
4499     case TEMP_FIXED:
4500         return;
4501     case TEMP_GLOBAL:
4502     case TEMP_TB:
4503         new_type = TEMP_VAL_MEM;
4504         break;
4505     case TEMP_EBB:
4506         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4507         break;
4508     case TEMP_CONST:
4509         new_type = TEMP_VAL_CONST;
4510         break;
4511     default:
4512         g_assert_not_reached();
4513     }
4514     set_temp_val_nonreg(s, ts, new_type);
4515 }
4516 
4517 /* Mark a temporary as dead.  */
4518 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4519 {
4520     temp_free_or_dead(s, ts, 1);
4521 }
4522 
4523 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4524    registers needs to be allocated to store a constant.  If 'free_or_dead'
4525    is non-zero, subsequently release the temporary; if it is positive, the
4526    temp is dead; if it is negative, the temp is free.  */
4527 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4528                       TCGRegSet preferred_regs, int free_or_dead)
4529 {
4530     if (!temp_readonly(ts) && !ts->mem_coherent) {
4531         if (!ts->mem_allocated) {
4532             temp_allocate_frame(s, ts);
4533         }
4534         switch (ts->val_type) {
4535         case TEMP_VAL_CONST:
4536             /* If we're going to free the temp immediately, then we won't
4537                require it later in a register, so attempt to store the
4538                constant to memory directly.  */
4539             if (free_or_dead
4540                 && tcg_out_sti(s, ts->type, ts->val,
4541                                ts->mem_base->reg, ts->mem_offset)) {
4542                 break;
4543             }
4544             temp_load(s, ts, tcg_target_available_regs[ts->type],
4545                       allocated_regs, preferred_regs);
4546             /* fallthrough */
4547 
4548         case TEMP_VAL_REG:
4549             tcg_out_st(s, ts->type, ts->reg,
4550                        ts->mem_base->reg, ts->mem_offset);
4551             break;
4552 
4553         case TEMP_VAL_MEM:
4554             break;
4555 
4556         case TEMP_VAL_DEAD:
4557         default:
4558             g_assert_not_reached();
4559         }
4560         ts->mem_coherent = 1;
4561     }
4562     if (free_or_dead) {
4563         temp_free_or_dead(s, ts, free_or_dead);
4564     }
4565 }
4566 
4567 /* free register 'reg' by spilling the corresponding temporary if necessary */
4568 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4569 {
4570     TCGTemp *ts = s->reg_to_temp[reg];
4571     if (ts != NULL) {
4572         temp_sync(s, ts, allocated_regs, 0, -1);
4573     }
4574 }
4575 
4576 /**
4577  * tcg_reg_alloc:
4578  * @required_regs: Set of registers in which we must allocate.
4579  * @allocated_regs: Set of registers which must be avoided.
4580  * @preferred_regs: Set of registers we should prefer.
4581  * @rev: True if we search the registers in "indirect" order.
4582  *
4583  * The allocated register must be in @required_regs & ~@allocated_regs,
4584  * but if we can put it in @preferred_regs we may save a move later.
4585  */
4586 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4587                             TCGRegSet allocated_regs,
4588                             TCGRegSet preferred_regs, bool rev)
4589 {
4590     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4591     TCGRegSet reg_ct[2];
4592     const int *order;
4593 
4594     reg_ct[1] = required_regs & ~allocated_regs;
4595     tcg_debug_assert(reg_ct[1] != 0);
4596     reg_ct[0] = reg_ct[1] & preferred_regs;
4597 
4598     /* Skip the preferred_regs option if it cannot be satisfied,
4599        or if the preference made no difference.  */
4600     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4601 
4602     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4603 
4604     /* Try free registers, preferences first.  */
4605     for (j = f; j < 2; j++) {
4606         TCGRegSet set = reg_ct[j];
4607 
4608         if (tcg_regset_single(set)) {
4609             /* One register in the set.  */
4610             TCGReg reg = tcg_regset_first(set);
4611             if (s->reg_to_temp[reg] == NULL) {
4612                 return reg;
4613             }
4614         } else {
4615             for (i = 0; i < n; i++) {
4616                 TCGReg reg = order[i];
4617                 if (s->reg_to_temp[reg] == NULL &&
4618                     tcg_regset_test_reg(set, reg)) {
4619                     return reg;
4620                 }
4621             }
4622         }
4623     }
4624 
4625     /* We must spill something.  */
4626     for (j = f; j < 2; j++) {
4627         TCGRegSet set = reg_ct[j];
4628 
4629         if (tcg_regset_single(set)) {
4630             /* One register in the set.  */
4631             TCGReg reg = tcg_regset_first(set);
4632             tcg_reg_free(s, reg, allocated_regs);
4633             return reg;
4634         } else {
4635             for (i = 0; i < n; i++) {
4636                 TCGReg reg = order[i];
4637                 if (tcg_regset_test_reg(set, reg)) {
4638                     tcg_reg_free(s, reg, allocated_regs);
4639                     return reg;
4640                 }
4641             }
4642         }
4643     }
4644 
4645     g_assert_not_reached();
4646 }
4647 
4648 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4649                                  TCGRegSet allocated_regs,
4650                                  TCGRegSet preferred_regs, bool rev)
4651 {
4652     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4653     TCGRegSet reg_ct[2];
4654     const int *order;
4655 
4656     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4657     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4658     tcg_debug_assert(reg_ct[1] != 0);
4659     reg_ct[0] = reg_ct[1] & preferred_regs;
4660 
4661     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4662 
4663     /*
4664      * Skip the preferred_regs option if it cannot be satisfied,
4665      * or if the preference made no difference.
4666      */
4667     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4668 
4669     /*
4670      * Minimize the number of flushes by looking for 2 free registers first,
4671      * then a single flush, then two flushes.
4672      */
4673     for (fmin = 2; fmin >= 0; fmin--) {
4674         for (j = k; j < 2; j++) {
4675             TCGRegSet set = reg_ct[j];
4676 
4677             for (i = 0; i < n; i++) {
4678                 TCGReg reg = order[i];
4679 
4680                 if (tcg_regset_test_reg(set, reg)) {
4681                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4682                     if (f >= fmin) {
4683                         tcg_reg_free(s, reg, allocated_regs);
4684                         tcg_reg_free(s, reg + 1, allocated_regs);
4685                         return reg;
4686                     }
4687                 }
4688             }
4689         }
4690     }
4691     g_assert_not_reached();
4692 }
4693 
4694 /* Make sure the temporary is in a register.  If needed, allocate the register
4695    from DESIRED while avoiding ALLOCATED.  */
4696 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4697                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4698 {
4699     TCGReg reg;
4700 
4701     switch (ts->val_type) {
4702     case TEMP_VAL_REG:
4703         return;
4704     case TEMP_VAL_CONST:
4705         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4706                             preferred_regs, ts->indirect_base);
4707         if (ts->type <= TCG_TYPE_I64) {
4708             tcg_out_movi(s, ts->type, reg, ts->val);
4709         } else {
4710             uint64_t val = ts->val;
4711             MemOp vece = MO_64;
4712 
4713             /*
4714              * Find the minimal vector element that matches the constant.
4715              * The targets will, in general, have to do this search anyway,
4716              * do this generically.
4717              */
4718             if (val == dup_const(MO_8, val)) {
4719                 vece = MO_8;
4720             } else if (val == dup_const(MO_16, val)) {
4721                 vece = MO_16;
4722             } else if (val == dup_const(MO_32, val)) {
4723                 vece = MO_32;
4724             }
4725 
4726             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4727         }
4728         ts->mem_coherent = 0;
4729         break;
4730     case TEMP_VAL_MEM:
4731         if (!ts->mem_allocated) {
4732             temp_allocate_frame(s, ts);
4733         }
4734         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4735                             preferred_regs, ts->indirect_base);
4736         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4737         ts->mem_coherent = 1;
4738         break;
4739     case TEMP_VAL_DEAD:
4740     default:
4741         g_assert_not_reached();
4742     }
4743     set_temp_val_reg(s, ts, reg);
4744 }
4745 
4746 /* Save a temporary to memory. 'allocated_regs' is used in case a
4747    temporary registers needs to be allocated to store a constant.  */
4748 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4749 {
4750     /* The liveness analysis already ensures that globals are back
4751        in memory. Keep an tcg_debug_assert for safety. */
4752     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4753 }
4754 
4755 /* save globals to their canonical location and assume they can be
4756    modified be the following code. 'allocated_regs' is used in case a
4757    temporary registers needs to be allocated to store a constant. */
4758 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4759 {
4760     int i, n;
4761 
4762     for (i = 0, n = s->nb_globals; i < n; i++) {
4763         temp_save(s, &s->temps[i], allocated_regs);
4764     }
4765 }
4766 
4767 /* sync globals to their canonical location and assume they can be
4768    read by the following code. 'allocated_regs' is used in case a
4769    temporary registers needs to be allocated to store a constant. */
4770 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4771 {
4772     int i, n;
4773 
4774     for (i = 0, n = s->nb_globals; i < n; i++) {
4775         TCGTemp *ts = &s->temps[i];
4776         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4777                          || ts->kind == TEMP_FIXED
4778                          || ts->mem_coherent);
4779     }
4780 }
4781 
4782 /* at the end of a basic block, we assume all temporaries are dead and
4783    all globals are stored at their canonical location. */
4784 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4785 {
4786     int i;
4787 
4788     for (i = s->nb_globals; i < s->nb_temps; i++) {
4789         TCGTemp *ts = &s->temps[i];
4790 
4791         switch (ts->kind) {
4792         case TEMP_TB:
4793             temp_save(s, ts, allocated_regs);
4794             break;
4795         case TEMP_EBB:
4796             /* The liveness analysis already ensures that temps are dead.
4797                Keep an tcg_debug_assert for safety. */
4798             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4799             break;
4800         case TEMP_CONST:
4801             /* Similarly, we should have freed any allocated register. */
4802             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4803             break;
4804         default:
4805             g_assert_not_reached();
4806         }
4807     }
4808 
4809     save_globals(s, allocated_regs);
4810 }
4811 
4812 /*
4813  * At a conditional branch, we assume all temporaries are dead unless
4814  * explicitly live-across-conditional-branch; all globals and local
4815  * temps are synced to their location.
4816  */
4817 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4818 {
4819     sync_globals(s, allocated_regs);
4820 
4821     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4822         TCGTemp *ts = &s->temps[i];
4823         /*
4824          * The liveness analysis already ensures that temps are dead.
4825          * Keep tcg_debug_asserts for safety.
4826          */
4827         switch (ts->kind) {
4828         case TEMP_TB:
4829             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4830             break;
4831         case TEMP_EBB:
4832         case TEMP_CONST:
4833             break;
4834         default:
4835             g_assert_not_reached();
4836         }
4837     }
4838 }
4839 
4840 /*
4841  * Specialized code generation for INDEX_op_mov_* with a constant.
4842  */
4843 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4844                                   tcg_target_ulong val, TCGLifeData arg_life,
4845                                   TCGRegSet preferred_regs)
4846 {
4847     /* ENV should not be modified.  */
4848     tcg_debug_assert(!temp_readonly(ots));
4849 
4850     /* The movi is not explicitly generated here.  */
4851     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4852     ots->val = val;
4853     ots->mem_coherent = 0;
4854     if (NEED_SYNC_ARG(0)) {
4855         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4856     } else if (IS_DEAD_ARG(0)) {
4857         temp_dead(s, ots);
4858     }
4859 }
4860 
4861 /*
4862  * Specialized code generation for INDEX_op_mov_*.
4863  */
4864 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4865 {
4866     const TCGLifeData arg_life = op->life;
4867     TCGRegSet allocated_regs, preferred_regs;
4868     TCGTemp *ts, *ots;
4869     TCGType otype, itype;
4870     TCGReg oreg, ireg;
4871 
4872     allocated_regs = s->reserved_regs;
4873     preferred_regs = output_pref(op, 0);
4874     ots = arg_temp(op->args[0]);
4875     ts = arg_temp(op->args[1]);
4876 
4877     /* ENV should not be modified.  */
4878     tcg_debug_assert(!temp_readonly(ots));
4879 
4880     /* Note that otype != itype for no-op truncation.  */
4881     otype = ots->type;
4882     itype = ts->type;
4883 
4884     if (ts->val_type == TEMP_VAL_CONST) {
4885         /* propagate constant or generate sti */
4886         tcg_target_ulong val = ts->val;
4887         if (IS_DEAD_ARG(1)) {
4888             temp_dead(s, ts);
4889         }
4890         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4891         return;
4892     }
4893 
4894     /* If the source value is in memory we're going to be forced
4895        to have it in a register in order to perform the copy.  Copy
4896        the SOURCE value into its own register first, that way we
4897        don't have to reload SOURCE the next time it is used. */
4898     if (ts->val_type == TEMP_VAL_MEM) {
4899         temp_load(s, ts, tcg_target_available_regs[itype],
4900                   allocated_regs, preferred_regs);
4901     }
4902     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4903     ireg = ts->reg;
4904 
4905     if (IS_DEAD_ARG(0)) {
4906         /* mov to a non-saved dead register makes no sense (even with
4907            liveness analysis disabled). */
4908         tcg_debug_assert(NEED_SYNC_ARG(0));
4909         if (!ots->mem_allocated) {
4910             temp_allocate_frame(s, ots);
4911         }
4912         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4913         if (IS_DEAD_ARG(1)) {
4914             temp_dead(s, ts);
4915         }
4916         temp_dead(s, ots);
4917         return;
4918     }
4919 
4920     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4921         /*
4922          * The mov can be suppressed.  Kill input first, so that it
4923          * is unlinked from reg_to_temp, then set the output to the
4924          * reg that we saved from the input.
4925          */
4926         temp_dead(s, ts);
4927         oreg = ireg;
4928     } else {
4929         if (ots->val_type == TEMP_VAL_REG) {
4930             oreg = ots->reg;
4931         } else {
4932             /* Make sure to not spill the input register during allocation. */
4933             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4934                                  allocated_regs | ((TCGRegSet)1 << ireg),
4935                                  preferred_regs, ots->indirect_base);
4936         }
4937         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4938             /*
4939              * Cross register class move not supported.
4940              * Store the source register into the destination slot
4941              * and leave the destination temp as TEMP_VAL_MEM.
4942              */
4943             assert(!temp_readonly(ots));
4944             if (!ts->mem_allocated) {
4945                 temp_allocate_frame(s, ots);
4946             }
4947             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4948             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4949             ots->mem_coherent = 1;
4950             return;
4951         }
4952     }
4953     set_temp_val_reg(s, ots, oreg);
4954     ots->mem_coherent = 0;
4955 
4956     if (NEED_SYNC_ARG(0)) {
4957         temp_sync(s, ots, allocated_regs, 0, 0);
4958     }
4959 }
4960 
4961 /*
4962  * Specialized code generation for INDEX_op_dup_vec.
4963  */
4964 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4965 {
4966     const TCGLifeData arg_life = op->life;
4967     TCGRegSet dup_out_regs, dup_in_regs;
4968     const TCGArgConstraint *dup_args_ct;
4969     TCGTemp *its, *ots;
4970     TCGType itype, vtype;
4971     unsigned vece;
4972     int lowpart_ofs;
4973     bool ok;
4974 
4975     ots = arg_temp(op->args[0]);
4976     its = arg_temp(op->args[1]);
4977 
4978     /* ENV should not be modified.  */
4979     tcg_debug_assert(!temp_readonly(ots));
4980 
4981     itype = its->type;
4982     vece = TCGOP_VECE(op);
4983     vtype = TCGOP_TYPE(op);
4984 
4985     if (its->val_type == TEMP_VAL_CONST) {
4986         /* Propagate constant via movi -> dupi.  */
4987         tcg_target_ulong val = its->val;
4988         if (IS_DEAD_ARG(1)) {
4989             temp_dead(s, its);
4990         }
4991         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4992         return;
4993     }
4994 
4995     dup_args_ct = opcode_args_ct(op);
4996     dup_out_regs = dup_args_ct[0].regs;
4997     dup_in_regs = dup_args_ct[1].regs;
4998 
4999     /* Allocate the output register now.  */
5000     if (ots->val_type != TEMP_VAL_REG) {
5001         TCGRegSet allocated_regs = s->reserved_regs;
5002         TCGReg oreg;
5003 
5004         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
5005             /* Make sure to not spill the input register. */
5006             tcg_regset_set_reg(allocated_regs, its->reg);
5007         }
5008         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5009                              output_pref(op, 0), ots->indirect_base);
5010         set_temp_val_reg(s, ots, oreg);
5011     }
5012 
5013     switch (its->val_type) {
5014     case TEMP_VAL_REG:
5015         /*
5016          * The dup constriaints must be broad, covering all possible VECE.
5017          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5018          * to fail, indicating that extra moves are required for that case.
5019          */
5020         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5021             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5022                 goto done;
5023             }
5024             /* Try again from memory or a vector input register.  */
5025         }
5026         if (!its->mem_coherent) {
5027             /*
5028              * The input register is not synced, and so an extra store
5029              * would be required to use memory.  Attempt an integer-vector
5030              * register move first.  We do not have a TCGRegSet for this.
5031              */
5032             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5033                 break;
5034             }
5035             /* Sync the temp back to its slot and load from there.  */
5036             temp_sync(s, its, s->reserved_regs, 0, 0);
5037         }
5038         /* fall through */
5039 
5040     case TEMP_VAL_MEM:
5041         lowpart_ofs = 0;
5042         if (HOST_BIG_ENDIAN) {
5043             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5044         }
5045         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5046                              its->mem_offset + lowpart_ofs)) {
5047             goto done;
5048         }
5049         /* Load the input into the destination vector register. */
5050         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5051         break;
5052 
5053     default:
5054         g_assert_not_reached();
5055     }
5056 
5057     /* We now have a vector input register, so dup must succeed. */
5058     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5059     tcg_debug_assert(ok);
5060 
5061  done:
5062     ots->mem_coherent = 0;
5063     if (IS_DEAD_ARG(1)) {
5064         temp_dead(s, its);
5065     }
5066     if (NEED_SYNC_ARG(0)) {
5067         temp_sync(s, ots, s->reserved_regs, 0, 0);
5068     }
5069     if (IS_DEAD_ARG(0)) {
5070         temp_dead(s, ots);
5071     }
5072 }
5073 
5074 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5075 {
5076     const TCGLifeData arg_life = op->life;
5077     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5078     TCGRegSet i_allocated_regs;
5079     TCGRegSet o_allocated_regs;
5080     int i, k, nb_iargs, nb_oargs;
5081     TCGReg reg;
5082     TCGArg arg;
5083     const TCGArgConstraint *args_ct;
5084     const TCGArgConstraint *arg_ct;
5085     TCGTemp *ts;
5086     TCGArg new_args[TCG_MAX_OP_ARGS];
5087     int const_args[TCG_MAX_OP_ARGS];
5088     TCGCond op_cond;
5089 
5090     nb_oargs = def->nb_oargs;
5091     nb_iargs = def->nb_iargs;
5092 
5093     /* copy constants */
5094     memcpy(new_args + nb_oargs + nb_iargs,
5095            op->args + nb_oargs + nb_iargs,
5096            sizeof(TCGArg) * def->nb_cargs);
5097 
5098     i_allocated_regs = s->reserved_regs;
5099     o_allocated_regs = s->reserved_regs;
5100 
5101     switch (op->opc) {
5102     case INDEX_op_brcond_i32:
5103     case INDEX_op_brcond_i64:
5104         op_cond = op->args[2];
5105         break;
5106     case INDEX_op_setcond_i32:
5107     case INDEX_op_setcond_i64:
5108     case INDEX_op_negsetcond_i32:
5109     case INDEX_op_negsetcond_i64:
5110     case INDEX_op_cmp_vec:
5111         op_cond = op->args[3];
5112         break;
5113     case INDEX_op_brcond2_i32:
5114         op_cond = op->args[4];
5115         break;
5116     case INDEX_op_movcond_i32:
5117     case INDEX_op_movcond_i64:
5118     case INDEX_op_setcond2_i32:
5119     case INDEX_op_cmpsel_vec:
5120         op_cond = op->args[5];
5121         break;
5122     default:
5123         /* No condition within opcode. */
5124         op_cond = TCG_COND_ALWAYS;
5125         break;
5126     }
5127 
5128     args_ct = opcode_args_ct(op);
5129 
5130     /* satisfy input constraints */
5131     for (k = 0; k < nb_iargs; k++) {
5132         TCGRegSet i_preferred_regs, i_required_regs;
5133         bool allocate_new_reg, copyto_new_reg;
5134         TCGTemp *ts2;
5135         int i1, i2;
5136 
5137         i = args_ct[nb_oargs + k].sort_index;
5138         arg = op->args[i];
5139         arg_ct = &args_ct[i];
5140         ts = arg_temp(arg);
5141 
5142         if (ts->val_type == TEMP_VAL_CONST) {
5143 #ifdef TCG_REG_ZERO
5144             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5145                 /* Hardware zero register: indicate register via non-const. */
5146                 const_args[i] = 0;
5147                 new_args[i] = TCG_REG_ZERO;
5148                 continue;
5149             }
5150 #endif
5151 
5152             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5153                                        op_cond, TCGOP_VECE(op))) {
5154                 /* constant is OK for instruction */
5155                 const_args[i] = 1;
5156                 new_args[i] = ts->val;
5157                 continue;
5158             }
5159         }
5160 
5161         reg = ts->reg;
5162         i_preferred_regs = 0;
5163         i_required_regs = arg_ct->regs;
5164         allocate_new_reg = false;
5165         copyto_new_reg = false;
5166 
5167         switch (arg_ct->pair) {
5168         case 0: /* not paired */
5169             if (arg_ct->ialias) {
5170                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5171 
5172                 /*
5173                  * If the input is readonly, then it cannot also be an
5174                  * output and aliased to itself.  If the input is not
5175                  * dead after the instruction, we must allocate a new
5176                  * register and move it.
5177                  */
5178                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5179                     || args_ct[arg_ct->alias_index].newreg) {
5180                     allocate_new_reg = true;
5181                 } else if (ts->val_type == TEMP_VAL_REG) {
5182                     /*
5183                      * Check if the current register has already been
5184                      * allocated for another input.
5185                      */
5186                     allocate_new_reg =
5187                         tcg_regset_test_reg(i_allocated_regs, reg);
5188                 }
5189             }
5190             if (!allocate_new_reg) {
5191                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5192                           i_preferred_regs);
5193                 reg = ts->reg;
5194                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5195             }
5196             if (allocate_new_reg) {
5197                 /*
5198                  * Allocate a new register matching the constraint
5199                  * and move the temporary register into it.
5200                  */
5201                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5202                           i_allocated_regs, 0);
5203                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5204                                     i_preferred_regs, ts->indirect_base);
5205                 copyto_new_reg = true;
5206             }
5207             break;
5208 
5209         case 1:
5210             /* First of an input pair; if i1 == i2, the second is an output. */
5211             i1 = i;
5212             i2 = arg_ct->pair_index;
5213             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5214 
5215             /*
5216              * It is easier to default to allocating a new pair
5217              * and to identify a few cases where it's not required.
5218              */
5219             if (arg_ct->ialias) {
5220                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5221                 if (IS_DEAD_ARG(i1) &&
5222                     IS_DEAD_ARG(i2) &&
5223                     !temp_readonly(ts) &&
5224                     ts->val_type == TEMP_VAL_REG &&
5225                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5226                     tcg_regset_test_reg(i_required_regs, reg) &&
5227                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5228                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5229                     (ts2
5230                      ? ts2->val_type == TEMP_VAL_REG &&
5231                        ts2->reg == reg + 1 &&
5232                        !temp_readonly(ts2)
5233                      : s->reg_to_temp[reg + 1] == NULL)) {
5234                     break;
5235                 }
5236             } else {
5237                 /* Without aliasing, the pair must also be an input. */
5238                 tcg_debug_assert(ts2);
5239                 if (ts->val_type == TEMP_VAL_REG &&
5240                     ts2->val_type == TEMP_VAL_REG &&
5241                     ts2->reg == reg + 1 &&
5242                     tcg_regset_test_reg(i_required_regs, reg)) {
5243                     break;
5244                 }
5245             }
5246             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5247                                      0, ts->indirect_base);
5248             goto do_pair;
5249 
5250         case 2: /* pair second */
5251             reg = new_args[arg_ct->pair_index] + 1;
5252             goto do_pair;
5253 
5254         case 3: /* ialias with second output, no first input */
5255             tcg_debug_assert(arg_ct->ialias);
5256             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5257 
5258             if (IS_DEAD_ARG(i) &&
5259                 !temp_readonly(ts) &&
5260                 ts->val_type == TEMP_VAL_REG &&
5261                 reg > 0 &&
5262                 s->reg_to_temp[reg - 1] == NULL &&
5263                 tcg_regset_test_reg(i_required_regs, reg) &&
5264                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5265                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5266                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5267                 break;
5268             }
5269             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5270                                      i_allocated_regs, 0,
5271                                      ts->indirect_base);
5272             tcg_regset_set_reg(i_allocated_regs, reg);
5273             reg += 1;
5274             goto do_pair;
5275 
5276         do_pair:
5277             /*
5278              * If an aliased input is not dead after the instruction,
5279              * we must allocate a new register and move it.
5280              */
5281             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5282                 TCGRegSet t_allocated_regs = i_allocated_regs;
5283 
5284                 /*
5285                  * Because of the alias, and the continued life, make sure
5286                  * that the temp is somewhere *other* than the reg pair,
5287                  * and we get a copy in reg.
5288                  */
5289                 tcg_regset_set_reg(t_allocated_regs, reg);
5290                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5291                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5292                     /* If ts was already in reg, copy it somewhere else. */
5293                     TCGReg nr;
5294                     bool ok;
5295 
5296                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5297                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5298                                        t_allocated_regs, 0, ts->indirect_base);
5299                     ok = tcg_out_mov(s, ts->type, nr, reg);
5300                     tcg_debug_assert(ok);
5301 
5302                     set_temp_val_reg(s, ts, nr);
5303                 } else {
5304                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5305                               t_allocated_regs, 0);
5306                     copyto_new_reg = true;
5307                 }
5308             } else {
5309                 /* Preferably allocate to reg, otherwise copy. */
5310                 i_required_regs = (TCGRegSet)1 << reg;
5311                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5312                           i_preferred_regs);
5313                 copyto_new_reg = ts->reg != reg;
5314             }
5315             break;
5316 
5317         default:
5318             g_assert_not_reached();
5319         }
5320 
5321         if (copyto_new_reg) {
5322             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5323                 /*
5324                  * Cross register class move not supported.  Sync the
5325                  * temp back to its slot and load from there.
5326                  */
5327                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5328                 tcg_out_ld(s, ts->type, reg,
5329                            ts->mem_base->reg, ts->mem_offset);
5330             }
5331         }
5332         new_args[i] = reg;
5333         const_args[i] = 0;
5334         tcg_regset_set_reg(i_allocated_regs, reg);
5335     }
5336 
5337     /* mark dead temporaries and free the associated registers */
5338     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5339         if (IS_DEAD_ARG(i)) {
5340             temp_dead(s, arg_temp(op->args[i]));
5341         }
5342     }
5343 
5344     if (def->flags & TCG_OPF_COND_BRANCH) {
5345         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5346     } else if (def->flags & TCG_OPF_BB_END) {
5347         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5348     } else {
5349         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5350             /* XXX: permit generic clobber register list ? */
5351             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5352                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5353                     tcg_reg_free(s, i, i_allocated_regs);
5354                 }
5355             }
5356         }
5357         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5358             /* sync globals if the op has side effects and might trigger
5359                an exception. */
5360             sync_globals(s, i_allocated_regs);
5361         }
5362 
5363         /* satisfy the output constraints */
5364         for (k = 0; k < nb_oargs; k++) {
5365             i = args_ct[k].sort_index;
5366             arg = op->args[i];
5367             arg_ct = &args_ct[i];
5368             ts = arg_temp(arg);
5369 
5370             /* ENV should not be modified.  */
5371             tcg_debug_assert(!temp_readonly(ts));
5372 
5373             switch (arg_ct->pair) {
5374             case 0: /* not paired */
5375                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5376                     reg = new_args[arg_ct->alias_index];
5377                 } else if (arg_ct->newreg) {
5378                     reg = tcg_reg_alloc(s, arg_ct->regs,
5379                                         i_allocated_regs | o_allocated_regs,
5380                                         output_pref(op, k), ts->indirect_base);
5381                 } else {
5382                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5383                                         output_pref(op, k), ts->indirect_base);
5384                 }
5385                 break;
5386 
5387             case 1: /* first of pair */
5388                 if (arg_ct->oalias) {
5389                     reg = new_args[arg_ct->alias_index];
5390                 } else if (arg_ct->newreg) {
5391                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5392                                              i_allocated_regs | o_allocated_regs,
5393                                              output_pref(op, k),
5394                                              ts->indirect_base);
5395                 } else {
5396                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5397                                              output_pref(op, k),
5398                                              ts->indirect_base);
5399                 }
5400                 break;
5401 
5402             case 2: /* second of pair */
5403                 if (arg_ct->oalias) {
5404                     reg = new_args[arg_ct->alias_index];
5405                 } else {
5406                     reg = new_args[arg_ct->pair_index] + 1;
5407                 }
5408                 break;
5409 
5410             case 3: /* first of pair, aliasing with a second input */
5411                 tcg_debug_assert(!arg_ct->newreg);
5412                 reg = new_args[arg_ct->pair_index] - 1;
5413                 break;
5414 
5415             default:
5416                 g_assert_not_reached();
5417             }
5418             tcg_regset_set_reg(o_allocated_regs, reg);
5419             set_temp_val_reg(s, ts, reg);
5420             ts->mem_coherent = 0;
5421             new_args[i] = reg;
5422         }
5423     }
5424 
5425     /* emit instruction */
5426     TCGType type = TCGOP_TYPE(op);
5427     switch (op->opc) {
5428     case INDEX_op_ext_i32_i64:
5429         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5430         break;
5431     case INDEX_op_extu_i32_i64:
5432         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5433         break;
5434     case INDEX_op_extrl_i64_i32:
5435         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5436         break;
5437 
5438     case INDEX_op_add:
5439     case INDEX_op_and:
5440     case INDEX_op_andc:
5441     case INDEX_op_eqv:
5442     case INDEX_op_nand:
5443     case INDEX_op_nor:
5444     case INDEX_op_or:
5445     case INDEX_op_orc:
5446     case INDEX_op_xor:
5447         {
5448             const TCGOutOpBinary *out =
5449                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5450 
5451             /* Constants should never appear in the first source operand. */
5452             tcg_debug_assert(!const_args[1]);
5453             if (const_args[2]) {
5454                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5455             } else {
5456                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5457             }
5458         }
5459         break;
5460 
5461     case INDEX_op_sub:
5462         {
5463             const TCGOutOpSubtract *out = &outop_sub;
5464 
5465             /*
5466              * Constants should never appear in the second source operand.
5467              * These are folded to add with negative constant.
5468              */
5469             tcg_debug_assert(!const_args[2]);
5470             if (const_args[1]) {
5471                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5472             } else {
5473                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5474             }
5475         }
5476         break;
5477 
5478     case INDEX_op_neg:
5479         {
5480             const TCGOutOpUnary *out =
5481                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5482 
5483             /* Constants should have been folded. */
5484             tcg_debug_assert(!const_args[1]);
5485             out->out_rr(s, type, new_args[0], new_args[1]);
5486         }
5487         break;
5488 
5489     default:
5490         if (def->flags & TCG_OPF_VECTOR) {
5491             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5492                            TCGOP_VECE(op), new_args, const_args);
5493         } else {
5494             tcg_out_op(s, op->opc, type, new_args, const_args);
5495         }
5496         break;
5497     }
5498 
5499     /* move the outputs in the correct register if needed */
5500     for(i = 0; i < nb_oargs; i++) {
5501         ts = arg_temp(op->args[i]);
5502 
5503         /* ENV should not be modified.  */
5504         tcg_debug_assert(!temp_readonly(ts));
5505 
5506         if (NEED_SYNC_ARG(i)) {
5507             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5508         } else if (IS_DEAD_ARG(i)) {
5509             temp_dead(s, ts);
5510         }
5511     }
5512 }
5513 
5514 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5515 {
5516     const TCGLifeData arg_life = op->life;
5517     TCGTemp *ots, *itsl, *itsh;
5518     TCGType vtype = TCGOP_TYPE(op);
5519 
5520     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5521     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5522     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5523 
5524     ots = arg_temp(op->args[0]);
5525     itsl = arg_temp(op->args[1]);
5526     itsh = arg_temp(op->args[2]);
5527 
5528     /* ENV should not be modified.  */
5529     tcg_debug_assert(!temp_readonly(ots));
5530 
5531     /* Allocate the output register now.  */
5532     if (ots->val_type != TEMP_VAL_REG) {
5533         TCGRegSet allocated_regs = s->reserved_regs;
5534         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5535         TCGReg oreg;
5536 
5537         /* Make sure to not spill the input registers. */
5538         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5539             tcg_regset_set_reg(allocated_regs, itsl->reg);
5540         }
5541         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5542             tcg_regset_set_reg(allocated_regs, itsh->reg);
5543         }
5544 
5545         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5546                              output_pref(op, 0), ots->indirect_base);
5547         set_temp_val_reg(s, ots, oreg);
5548     }
5549 
5550     /* Promote dup2 of immediates to dupi_vec. */
5551     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5552         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5553         MemOp vece = MO_64;
5554 
5555         if (val == dup_const(MO_8, val)) {
5556             vece = MO_8;
5557         } else if (val == dup_const(MO_16, val)) {
5558             vece = MO_16;
5559         } else if (val == dup_const(MO_32, val)) {
5560             vece = MO_32;
5561         }
5562 
5563         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5564         goto done;
5565     }
5566 
5567     /* If the two inputs form one 64-bit value, try dupm_vec. */
5568     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5569         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5570         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5571         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5572 
5573         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5574         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5575 
5576         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5577                              its->mem_base->reg, its->mem_offset)) {
5578             goto done;
5579         }
5580     }
5581 
5582     /* Fall back to generic expansion. */
5583     return false;
5584 
5585  done:
5586     ots->mem_coherent = 0;
5587     if (IS_DEAD_ARG(1)) {
5588         temp_dead(s, itsl);
5589     }
5590     if (IS_DEAD_ARG(2)) {
5591         temp_dead(s, itsh);
5592     }
5593     if (NEED_SYNC_ARG(0)) {
5594         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5595     } else if (IS_DEAD_ARG(0)) {
5596         temp_dead(s, ots);
5597     }
5598     return true;
5599 }
5600 
5601 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5602                          TCGRegSet allocated_regs)
5603 {
5604     if (ts->val_type == TEMP_VAL_REG) {
5605         if (ts->reg != reg) {
5606             tcg_reg_free(s, reg, allocated_regs);
5607             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5608                 /*
5609                  * Cross register class move not supported.  Sync the
5610                  * temp back to its slot and load from there.
5611                  */
5612                 temp_sync(s, ts, allocated_regs, 0, 0);
5613                 tcg_out_ld(s, ts->type, reg,
5614                            ts->mem_base->reg, ts->mem_offset);
5615             }
5616         }
5617     } else {
5618         TCGRegSet arg_set = 0;
5619 
5620         tcg_reg_free(s, reg, allocated_regs);
5621         tcg_regset_set_reg(arg_set, reg);
5622         temp_load(s, ts, arg_set, allocated_regs, 0);
5623     }
5624 }
5625 
5626 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5627                          TCGRegSet allocated_regs)
5628 {
5629     /*
5630      * When the destination is on the stack, load up the temp and store.
5631      * If there are many call-saved registers, the temp might live to
5632      * see another use; otherwise it'll be discarded.
5633      */
5634     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5635     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5636                arg_slot_stk_ofs(arg_slot));
5637 }
5638 
5639 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5640                             TCGTemp *ts, TCGRegSet *allocated_regs)
5641 {
5642     if (arg_slot_reg_p(l->arg_slot)) {
5643         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5644         load_arg_reg(s, reg, ts, *allocated_regs);
5645         tcg_regset_set_reg(*allocated_regs, reg);
5646     } else {
5647         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5648     }
5649 }
5650 
5651 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5652                          intptr_t ref_off, TCGRegSet *allocated_regs)
5653 {
5654     TCGReg reg;
5655 
5656     if (arg_slot_reg_p(arg_slot)) {
5657         reg = tcg_target_call_iarg_regs[arg_slot];
5658         tcg_reg_free(s, reg, *allocated_regs);
5659         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5660         tcg_regset_set_reg(*allocated_regs, reg);
5661     } else {
5662         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5663                             *allocated_regs, 0, false);
5664         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5665         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5666                    arg_slot_stk_ofs(arg_slot));
5667     }
5668 }
5669 
5670 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5671 {
5672     const int nb_oargs = TCGOP_CALLO(op);
5673     const int nb_iargs = TCGOP_CALLI(op);
5674     const TCGLifeData arg_life = op->life;
5675     const TCGHelperInfo *info = tcg_call_info(op);
5676     TCGRegSet allocated_regs = s->reserved_regs;
5677     int i;
5678 
5679     /*
5680      * Move inputs into place in reverse order,
5681      * so that we place stacked arguments first.
5682      */
5683     for (i = nb_iargs - 1; i >= 0; --i) {
5684         const TCGCallArgumentLoc *loc = &info->in[i];
5685         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5686 
5687         switch (loc->kind) {
5688         case TCG_CALL_ARG_NORMAL:
5689         case TCG_CALL_ARG_EXTEND_U:
5690         case TCG_CALL_ARG_EXTEND_S:
5691             load_arg_normal(s, loc, ts, &allocated_regs);
5692             break;
5693         case TCG_CALL_ARG_BY_REF:
5694             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5695             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5696                          arg_slot_stk_ofs(loc->ref_slot),
5697                          &allocated_regs);
5698             break;
5699         case TCG_CALL_ARG_BY_REF_N:
5700             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5701             break;
5702         default:
5703             g_assert_not_reached();
5704         }
5705     }
5706 
5707     /* Mark dead temporaries and free the associated registers.  */
5708     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5709         if (IS_DEAD_ARG(i)) {
5710             temp_dead(s, arg_temp(op->args[i]));
5711         }
5712     }
5713 
5714     /* Clobber call registers.  */
5715     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5716         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5717             tcg_reg_free(s, i, allocated_regs);
5718         }
5719     }
5720 
5721     /*
5722      * Save globals if they might be written by the helper,
5723      * sync them if they might be read.
5724      */
5725     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5726         /* Nothing to do */
5727     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5728         sync_globals(s, allocated_regs);
5729     } else {
5730         save_globals(s, allocated_regs);
5731     }
5732 
5733     /*
5734      * If the ABI passes a pointer to the returned struct as the first
5735      * argument, load that now.  Pass a pointer to the output home slot.
5736      */
5737     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5738         TCGTemp *ts = arg_temp(op->args[0]);
5739 
5740         if (!ts->mem_allocated) {
5741             temp_allocate_frame(s, ts);
5742         }
5743         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5744     }
5745 
5746     tcg_out_call(s, tcg_call_func(op), info);
5747 
5748     /* Assign output registers and emit moves if needed.  */
5749     switch (info->out_kind) {
5750     case TCG_CALL_RET_NORMAL:
5751         for (i = 0; i < nb_oargs; i++) {
5752             TCGTemp *ts = arg_temp(op->args[i]);
5753             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5754 
5755             /* ENV should not be modified.  */
5756             tcg_debug_assert(!temp_readonly(ts));
5757 
5758             set_temp_val_reg(s, ts, reg);
5759             ts->mem_coherent = 0;
5760         }
5761         break;
5762 
5763     case TCG_CALL_RET_BY_VEC:
5764         {
5765             TCGTemp *ts = arg_temp(op->args[0]);
5766 
5767             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5768             tcg_debug_assert(ts->temp_subindex == 0);
5769             if (!ts->mem_allocated) {
5770                 temp_allocate_frame(s, ts);
5771             }
5772             tcg_out_st(s, TCG_TYPE_V128,
5773                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5774                        ts->mem_base->reg, ts->mem_offset);
5775         }
5776         /* fall through to mark all parts in memory */
5777 
5778     case TCG_CALL_RET_BY_REF:
5779         /* The callee has performed a write through the reference. */
5780         for (i = 0; i < nb_oargs; i++) {
5781             TCGTemp *ts = arg_temp(op->args[i]);
5782             ts->val_type = TEMP_VAL_MEM;
5783         }
5784         break;
5785 
5786     default:
5787         g_assert_not_reached();
5788     }
5789 
5790     /* Flush or discard output registers as needed. */
5791     for (i = 0; i < nb_oargs; i++) {
5792         TCGTemp *ts = arg_temp(op->args[i]);
5793         if (NEED_SYNC_ARG(i)) {
5794             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5795         } else if (IS_DEAD_ARG(i)) {
5796             temp_dead(s, ts);
5797         }
5798     }
5799 }
5800 
5801 /**
5802  * atom_and_align_for_opc:
5803  * @s: tcg context
5804  * @opc: memory operation code
5805  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5806  * @allow_two_ops: true if we are prepared to issue two operations
5807  *
5808  * Return the alignment and atomicity to use for the inline fast path
5809  * for the given memory operation.  The alignment may be larger than
5810  * that specified in @opc, and the correct alignment will be diagnosed
5811  * by the slow path helper.
5812  *
5813  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5814  * and issue two loads or stores for subalignment.
5815  */
5816 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5817                                            MemOp host_atom, bool allow_two_ops)
5818 {
5819     MemOp align = memop_alignment_bits(opc);
5820     MemOp size = opc & MO_SIZE;
5821     MemOp half = size ? size - 1 : 0;
5822     MemOp atom = opc & MO_ATOM_MASK;
5823     MemOp atmax;
5824 
5825     switch (atom) {
5826     case MO_ATOM_NONE:
5827         /* The operation requires no specific atomicity. */
5828         atmax = MO_8;
5829         break;
5830 
5831     case MO_ATOM_IFALIGN:
5832         atmax = size;
5833         break;
5834 
5835     case MO_ATOM_IFALIGN_PAIR:
5836         atmax = half;
5837         break;
5838 
5839     case MO_ATOM_WITHIN16:
5840         atmax = size;
5841         if (size == MO_128) {
5842             /* Misalignment implies !within16, and therefore no atomicity. */
5843         } else if (host_atom != MO_ATOM_WITHIN16) {
5844             /* The host does not implement within16, so require alignment. */
5845             align = MAX(align, size);
5846         }
5847         break;
5848 
5849     case MO_ATOM_WITHIN16_PAIR:
5850         atmax = size;
5851         /*
5852          * Misalignment implies !within16, and therefore half atomicity.
5853          * Any host prepared for two operations can implement this with
5854          * half alignment.
5855          */
5856         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5857             align = MAX(align, half);
5858         }
5859         break;
5860 
5861     case MO_ATOM_SUBALIGN:
5862         atmax = size;
5863         if (host_atom != MO_ATOM_SUBALIGN) {
5864             /* If unaligned but not odd, there are subobjects up to half. */
5865             if (allow_two_ops) {
5866                 align = MAX(align, half);
5867             } else {
5868                 align = MAX(align, size);
5869             }
5870         }
5871         break;
5872 
5873     default:
5874         g_assert_not_reached();
5875     }
5876 
5877     return (TCGAtomAlign){ .atom = atmax, .align = align };
5878 }
5879 
5880 /*
5881  * Similarly for qemu_ld/st slow path helpers.
5882  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5883  * using only the provided backend tcg_out_* functions.
5884  */
5885 
5886 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5887 {
5888     int ofs = arg_slot_stk_ofs(slot);
5889 
5890     /*
5891      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5892      * require extension to uint64_t, adjust the address for uint32_t.
5893      */
5894     if (HOST_BIG_ENDIAN &&
5895         TCG_TARGET_REG_BITS == 64 &&
5896         type == TCG_TYPE_I32) {
5897         ofs += 4;
5898     }
5899     return ofs;
5900 }
5901 
5902 static void tcg_out_helper_load_slots(TCGContext *s,
5903                                       unsigned nmov, TCGMovExtend *mov,
5904                                       const TCGLdstHelperParam *parm)
5905 {
5906     unsigned i;
5907     TCGReg dst3;
5908 
5909     /*
5910      * Start from the end, storing to the stack first.
5911      * This frees those registers, so we need not consider overlap.
5912      */
5913     for (i = nmov; i-- > 0; ) {
5914         unsigned slot = mov[i].dst;
5915 
5916         if (arg_slot_reg_p(slot)) {
5917             goto found_reg;
5918         }
5919 
5920         TCGReg src = mov[i].src;
5921         TCGType dst_type = mov[i].dst_type;
5922         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5923 
5924         /* The argument is going onto the stack; extend into scratch. */
5925         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5926             tcg_debug_assert(parm->ntmp != 0);
5927             mov[i].dst = src = parm->tmp[0];
5928             tcg_out_movext1(s, &mov[i]);
5929         }
5930 
5931         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5932                    tcg_out_helper_stk_ofs(dst_type, slot));
5933     }
5934     return;
5935 
5936  found_reg:
5937     /*
5938      * The remaining arguments are in registers.
5939      * Convert slot numbers to argument registers.
5940      */
5941     nmov = i + 1;
5942     for (i = 0; i < nmov; ++i) {
5943         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5944     }
5945 
5946     switch (nmov) {
5947     case 4:
5948         /* The backend must have provided enough temps for the worst case. */
5949         tcg_debug_assert(parm->ntmp >= 2);
5950 
5951         dst3 = mov[3].dst;
5952         for (unsigned j = 0; j < 3; ++j) {
5953             if (dst3 == mov[j].src) {
5954                 /*
5955                  * Conflict. Copy the source to a temporary, perform the
5956                  * remaining moves, then the extension from our scratch
5957                  * on the way out.
5958                  */
5959                 TCGReg scratch = parm->tmp[1];
5960 
5961                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5962                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5963                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5964                 break;
5965             }
5966         }
5967 
5968         /* No conflicts: perform this move and continue. */
5969         tcg_out_movext1(s, &mov[3]);
5970         /* fall through */
5971 
5972     case 3:
5973         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5974                         parm->ntmp ? parm->tmp[0] : -1);
5975         break;
5976     case 2:
5977         tcg_out_movext2(s, mov, mov + 1,
5978                         parm->ntmp ? parm->tmp[0] : -1);
5979         break;
5980     case 1:
5981         tcg_out_movext1(s, mov);
5982         break;
5983     default:
5984         g_assert_not_reached();
5985     }
5986 }
5987 
5988 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5989                                     TCGType type, tcg_target_long imm,
5990                                     const TCGLdstHelperParam *parm)
5991 {
5992     if (arg_slot_reg_p(slot)) {
5993         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5994     } else {
5995         int ofs = tcg_out_helper_stk_ofs(type, slot);
5996         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5997             tcg_debug_assert(parm->ntmp != 0);
5998             tcg_out_movi(s, type, parm->tmp[0], imm);
5999             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
6000         }
6001     }
6002 }
6003 
6004 static void tcg_out_helper_load_common_args(TCGContext *s,
6005                                             const TCGLabelQemuLdst *ldst,
6006                                             const TCGLdstHelperParam *parm,
6007                                             const TCGHelperInfo *info,
6008                                             unsigned next_arg)
6009 {
6010     TCGMovExtend ptr_mov = {
6011         .dst_type = TCG_TYPE_PTR,
6012         .src_type = TCG_TYPE_PTR,
6013         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6014     };
6015     const TCGCallArgumentLoc *loc = &info->in[0];
6016     TCGType type;
6017     unsigned slot;
6018     tcg_target_ulong imm;
6019 
6020     /*
6021      * Handle env, which is always first.
6022      */
6023     ptr_mov.dst = loc->arg_slot;
6024     ptr_mov.src = TCG_AREG0;
6025     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6026 
6027     /*
6028      * Handle oi.
6029      */
6030     imm = ldst->oi;
6031     loc = &info->in[next_arg];
6032     type = TCG_TYPE_I32;
6033     switch (loc->kind) {
6034     case TCG_CALL_ARG_NORMAL:
6035         break;
6036     case TCG_CALL_ARG_EXTEND_U:
6037     case TCG_CALL_ARG_EXTEND_S:
6038         /* No extension required for MemOpIdx. */
6039         tcg_debug_assert(imm <= INT32_MAX);
6040         type = TCG_TYPE_REG;
6041         break;
6042     default:
6043         g_assert_not_reached();
6044     }
6045     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6046     next_arg++;
6047 
6048     /*
6049      * Handle ra.
6050      */
6051     loc = &info->in[next_arg];
6052     slot = loc->arg_slot;
6053     if (parm->ra_gen) {
6054         int arg_reg = -1;
6055         TCGReg ra_reg;
6056 
6057         if (arg_slot_reg_p(slot)) {
6058             arg_reg = tcg_target_call_iarg_regs[slot];
6059         }
6060         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6061 
6062         ptr_mov.dst = slot;
6063         ptr_mov.src = ra_reg;
6064         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6065     } else {
6066         imm = (uintptr_t)ldst->raddr;
6067         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6068     }
6069 }
6070 
6071 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6072                                        const TCGCallArgumentLoc *loc,
6073                                        TCGType dst_type, TCGType src_type,
6074                                        TCGReg lo, TCGReg hi)
6075 {
6076     MemOp reg_mo;
6077 
6078     if (dst_type <= TCG_TYPE_REG) {
6079         MemOp src_ext;
6080 
6081         switch (loc->kind) {
6082         case TCG_CALL_ARG_NORMAL:
6083             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6084             break;
6085         case TCG_CALL_ARG_EXTEND_U:
6086             dst_type = TCG_TYPE_REG;
6087             src_ext = MO_UL;
6088             break;
6089         case TCG_CALL_ARG_EXTEND_S:
6090             dst_type = TCG_TYPE_REG;
6091             src_ext = MO_SL;
6092             break;
6093         default:
6094             g_assert_not_reached();
6095         }
6096 
6097         mov[0].dst = loc->arg_slot;
6098         mov[0].dst_type = dst_type;
6099         mov[0].src = lo;
6100         mov[0].src_type = src_type;
6101         mov[0].src_ext = src_ext;
6102         return 1;
6103     }
6104 
6105     if (TCG_TARGET_REG_BITS == 32) {
6106         assert(dst_type == TCG_TYPE_I64);
6107         reg_mo = MO_32;
6108     } else {
6109         assert(dst_type == TCG_TYPE_I128);
6110         reg_mo = MO_64;
6111     }
6112 
6113     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6114     mov[0].src = lo;
6115     mov[0].dst_type = TCG_TYPE_REG;
6116     mov[0].src_type = TCG_TYPE_REG;
6117     mov[0].src_ext = reg_mo;
6118 
6119     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6120     mov[1].src = hi;
6121     mov[1].dst_type = TCG_TYPE_REG;
6122     mov[1].src_type = TCG_TYPE_REG;
6123     mov[1].src_ext = reg_mo;
6124 
6125     return 2;
6126 }
6127 
6128 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6129                                    const TCGLdstHelperParam *parm)
6130 {
6131     const TCGHelperInfo *info;
6132     const TCGCallArgumentLoc *loc;
6133     TCGMovExtend mov[2];
6134     unsigned next_arg, nmov;
6135     MemOp mop = get_memop(ldst->oi);
6136 
6137     switch (mop & MO_SIZE) {
6138     case MO_8:
6139     case MO_16:
6140     case MO_32:
6141         info = &info_helper_ld32_mmu;
6142         break;
6143     case MO_64:
6144         info = &info_helper_ld64_mmu;
6145         break;
6146     case MO_128:
6147         info = &info_helper_ld128_mmu;
6148         break;
6149     default:
6150         g_assert_not_reached();
6151     }
6152 
6153     /* Defer env argument. */
6154     next_arg = 1;
6155 
6156     loc = &info->in[next_arg];
6157     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6158         /*
6159          * 32-bit host with 32-bit guest: zero-extend the guest address
6160          * to 64-bits for the helper by storing the low part, then
6161          * load a zero for the high part.
6162          */
6163         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6164                                TCG_TYPE_I32, TCG_TYPE_I32,
6165                                ldst->addr_reg, -1);
6166         tcg_out_helper_load_slots(s, 1, mov, parm);
6167 
6168         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6169                                 TCG_TYPE_I32, 0, parm);
6170         next_arg += 2;
6171     } else {
6172         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6173                                       ldst->addr_reg, -1);
6174         tcg_out_helper_load_slots(s, nmov, mov, parm);
6175         next_arg += nmov;
6176     }
6177 
6178     switch (info->out_kind) {
6179     case TCG_CALL_RET_NORMAL:
6180     case TCG_CALL_RET_BY_VEC:
6181         break;
6182     case TCG_CALL_RET_BY_REF:
6183         /*
6184          * The return reference is in the first argument slot.
6185          * We need memory in which to return: re-use the top of stack.
6186          */
6187         {
6188             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6189 
6190             if (arg_slot_reg_p(0)) {
6191                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6192                                  TCG_REG_CALL_STACK, ofs_slot0);
6193             } else {
6194                 tcg_debug_assert(parm->ntmp != 0);
6195                 tcg_out_addi_ptr(s, parm->tmp[0],
6196                                  TCG_REG_CALL_STACK, ofs_slot0);
6197                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6198                            TCG_REG_CALL_STACK, ofs_slot0);
6199             }
6200         }
6201         break;
6202     default:
6203         g_assert_not_reached();
6204     }
6205 
6206     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6207 }
6208 
6209 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6210                                   bool load_sign,
6211                                   const TCGLdstHelperParam *parm)
6212 {
6213     MemOp mop = get_memop(ldst->oi);
6214     TCGMovExtend mov[2];
6215     int ofs_slot0;
6216 
6217     switch (ldst->type) {
6218     case TCG_TYPE_I64:
6219         if (TCG_TARGET_REG_BITS == 32) {
6220             break;
6221         }
6222         /* fall through */
6223 
6224     case TCG_TYPE_I32:
6225         mov[0].dst = ldst->datalo_reg;
6226         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6227         mov[0].dst_type = ldst->type;
6228         mov[0].src_type = TCG_TYPE_REG;
6229 
6230         /*
6231          * If load_sign, then we allowed the helper to perform the
6232          * appropriate sign extension to tcg_target_ulong, and all
6233          * we need now is a plain move.
6234          *
6235          * If they do not, then we expect the relevant extension
6236          * instruction to be no more expensive than a move, and
6237          * we thus save the icache etc by only using one of two
6238          * helper functions.
6239          */
6240         if (load_sign || !(mop & MO_SIGN)) {
6241             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6242                 mov[0].src_ext = MO_32;
6243             } else {
6244                 mov[0].src_ext = MO_64;
6245             }
6246         } else {
6247             mov[0].src_ext = mop & MO_SSIZE;
6248         }
6249         tcg_out_movext1(s, mov);
6250         return;
6251 
6252     case TCG_TYPE_I128:
6253         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6254         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6255         switch (TCG_TARGET_CALL_RET_I128) {
6256         case TCG_CALL_RET_NORMAL:
6257             break;
6258         case TCG_CALL_RET_BY_VEC:
6259             tcg_out_st(s, TCG_TYPE_V128,
6260                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6261                        TCG_REG_CALL_STACK, ofs_slot0);
6262             /* fall through */
6263         case TCG_CALL_RET_BY_REF:
6264             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6265                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6266             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6267                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6268             return;
6269         default:
6270             g_assert_not_reached();
6271         }
6272         break;
6273 
6274     default:
6275         g_assert_not_reached();
6276     }
6277 
6278     mov[0].dst = ldst->datalo_reg;
6279     mov[0].src =
6280         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6281     mov[0].dst_type = TCG_TYPE_REG;
6282     mov[0].src_type = TCG_TYPE_REG;
6283     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6284 
6285     mov[1].dst = ldst->datahi_reg;
6286     mov[1].src =
6287         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6288     mov[1].dst_type = TCG_TYPE_REG;
6289     mov[1].src_type = TCG_TYPE_REG;
6290     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6291 
6292     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6293 }
6294 
6295 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6296                                    const TCGLdstHelperParam *parm)
6297 {
6298     const TCGHelperInfo *info;
6299     const TCGCallArgumentLoc *loc;
6300     TCGMovExtend mov[4];
6301     TCGType data_type;
6302     unsigned next_arg, nmov, n;
6303     MemOp mop = get_memop(ldst->oi);
6304 
6305     switch (mop & MO_SIZE) {
6306     case MO_8:
6307     case MO_16:
6308     case MO_32:
6309         info = &info_helper_st32_mmu;
6310         data_type = TCG_TYPE_I32;
6311         break;
6312     case MO_64:
6313         info = &info_helper_st64_mmu;
6314         data_type = TCG_TYPE_I64;
6315         break;
6316     case MO_128:
6317         info = &info_helper_st128_mmu;
6318         data_type = TCG_TYPE_I128;
6319         break;
6320     default:
6321         g_assert_not_reached();
6322     }
6323 
6324     /* Defer env argument. */
6325     next_arg = 1;
6326     nmov = 0;
6327 
6328     /* Handle addr argument. */
6329     loc = &info->in[next_arg];
6330     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6331     if (TCG_TARGET_REG_BITS == 32) {
6332         /*
6333          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6334          * to 64-bits for the helper by storing the low part.  Later,
6335          * after we have processed the register inputs, we will load a
6336          * zero for the high part.
6337          */
6338         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6339                                TCG_TYPE_I32, TCG_TYPE_I32,
6340                                ldst->addr_reg, -1);
6341         next_arg += 2;
6342         nmov += 1;
6343     } else {
6344         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6345                                    ldst->addr_reg, -1);
6346         next_arg += n;
6347         nmov += n;
6348     }
6349 
6350     /* Handle data argument. */
6351     loc = &info->in[next_arg];
6352     switch (loc->kind) {
6353     case TCG_CALL_ARG_NORMAL:
6354     case TCG_CALL_ARG_EXTEND_U:
6355     case TCG_CALL_ARG_EXTEND_S:
6356         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6357                                    ldst->datalo_reg, ldst->datahi_reg);
6358         next_arg += n;
6359         nmov += n;
6360         tcg_out_helper_load_slots(s, nmov, mov, parm);
6361         break;
6362 
6363     case TCG_CALL_ARG_BY_REF:
6364         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6365         tcg_debug_assert(data_type == TCG_TYPE_I128);
6366         tcg_out_st(s, TCG_TYPE_I64,
6367                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6368                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6369         tcg_out_st(s, TCG_TYPE_I64,
6370                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6371                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6372 
6373         tcg_out_helper_load_slots(s, nmov, mov, parm);
6374 
6375         if (arg_slot_reg_p(loc->arg_slot)) {
6376             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6377                              TCG_REG_CALL_STACK,
6378                              arg_slot_stk_ofs(loc->ref_slot));
6379         } else {
6380             tcg_debug_assert(parm->ntmp != 0);
6381             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6382                              arg_slot_stk_ofs(loc->ref_slot));
6383             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6384                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6385         }
6386         next_arg += 2;
6387         break;
6388 
6389     default:
6390         g_assert_not_reached();
6391     }
6392 
6393     if (TCG_TARGET_REG_BITS == 32) {
6394         /* Zero extend the address by loading a zero for the high part. */
6395         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6396         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6397     }
6398 
6399     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6400 }
6401 
6402 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6403 {
6404     int i, start_words, num_insns;
6405     TCGOp *op;
6406 
6407     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6408                  && qemu_log_in_addr_range(pc_start))) {
6409         FILE *logfile = qemu_log_trylock();
6410         if (logfile) {
6411             fprintf(logfile, "OP:\n");
6412             tcg_dump_ops(s, logfile, false);
6413             fprintf(logfile, "\n");
6414             qemu_log_unlock(logfile);
6415         }
6416     }
6417 
6418 #ifdef CONFIG_DEBUG_TCG
6419     /* Ensure all labels referenced have been emitted.  */
6420     {
6421         TCGLabel *l;
6422         bool error = false;
6423 
6424         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6425             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6426                 qemu_log_mask(CPU_LOG_TB_OP,
6427                               "$L%d referenced but not present.\n", l->id);
6428                 error = true;
6429             }
6430         }
6431         assert(!error);
6432     }
6433 #endif
6434 
6435     /* Do not reuse any EBB that may be allocated within the TB. */
6436     tcg_temp_ebb_reset_freed(s);
6437 
6438     tcg_optimize(s);
6439 
6440     reachable_code_pass(s);
6441     liveness_pass_0(s);
6442     liveness_pass_1(s);
6443 
6444     if (s->nb_indirects > 0) {
6445         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6446                      && qemu_log_in_addr_range(pc_start))) {
6447             FILE *logfile = qemu_log_trylock();
6448             if (logfile) {
6449                 fprintf(logfile, "OP before indirect lowering:\n");
6450                 tcg_dump_ops(s, logfile, false);
6451                 fprintf(logfile, "\n");
6452                 qemu_log_unlock(logfile);
6453             }
6454         }
6455 
6456         /* Replace indirect temps with direct temps.  */
6457         if (liveness_pass_2(s)) {
6458             /* If changes were made, re-run liveness.  */
6459             liveness_pass_1(s);
6460         }
6461     }
6462 
6463     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6464                  && qemu_log_in_addr_range(pc_start))) {
6465         FILE *logfile = qemu_log_trylock();
6466         if (logfile) {
6467             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6468             tcg_dump_ops(s, logfile, true);
6469             fprintf(logfile, "\n");
6470             qemu_log_unlock(logfile);
6471         }
6472     }
6473 
6474     /* Initialize goto_tb jump offsets. */
6475     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6476     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6477     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6478     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6479 
6480     tcg_reg_alloc_start(s);
6481 
6482     /*
6483      * Reset the buffer pointers when restarting after overflow.
6484      * TODO: Move this into translate-all.c with the rest of the
6485      * buffer management.  Having only this done here is confusing.
6486      */
6487     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6488     s->code_ptr = s->code_buf;
6489     s->data_gen_ptr = NULL;
6490 
6491     QSIMPLEQ_INIT(&s->ldst_labels);
6492     s->pool_labels = NULL;
6493 
6494     start_words = s->insn_start_words;
6495     s->gen_insn_data =
6496         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6497 
6498     tcg_out_tb_start(s);
6499 
6500     num_insns = -1;
6501     QTAILQ_FOREACH(op, &s->ops, link) {
6502         TCGOpcode opc = op->opc;
6503 
6504         switch (opc) {
6505         case INDEX_op_mov:
6506         case INDEX_op_mov_vec:
6507             tcg_reg_alloc_mov(s, op);
6508             break;
6509         case INDEX_op_dup_vec:
6510             tcg_reg_alloc_dup(s, op);
6511             break;
6512         case INDEX_op_insn_start:
6513             if (num_insns >= 0) {
6514                 size_t off = tcg_current_code_size(s);
6515                 s->gen_insn_end_off[num_insns] = off;
6516                 /* Assert that we do not overflow our stored offset.  */
6517                 assert(s->gen_insn_end_off[num_insns] == off);
6518             }
6519             num_insns++;
6520             for (i = 0; i < start_words; ++i) {
6521                 s->gen_insn_data[num_insns * start_words + i] =
6522                     tcg_get_insn_start_param(op, i);
6523             }
6524             break;
6525         case INDEX_op_discard:
6526             temp_dead(s, arg_temp(op->args[0]));
6527             break;
6528         case INDEX_op_set_label:
6529             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6530             tcg_out_label(s, arg_label(op->args[0]));
6531             break;
6532         case INDEX_op_call:
6533             tcg_reg_alloc_call(s, op);
6534             break;
6535         case INDEX_op_exit_tb:
6536             tcg_out_exit_tb(s, op->args[0]);
6537             break;
6538         case INDEX_op_goto_tb:
6539             tcg_out_goto_tb(s, op->args[0]);
6540             break;
6541         case INDEX_op_dup2_vec:
6542             if (tcg_reg_alloc_dup2(s, op)) {
6543                 break;
6544             }
6545             /* fall through */
6546         default:
6547             /* Sanity check that we've not introduced any unhandled opcodes. */
6548             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6549                                               TCGOP_FLAGS(op)));
6550             /* Note: in order to speed up the code, it would be much
6551                faster to have specialized register allocator functions for
6552                some common argument patterns */
6553             tcg_reg_alloc_op(s, op);
6554             break;
6555         }
6556         /* Test for (pending) buffer overflow.  The assumption is that any
6557            one operation beginning below the high water mark cannot overrun
6558            the buffer completely.  Thus we can test for overflow after
6559            generating code without having to check during generation.  */
6560         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6561             return -1;
6562         }
6563         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6564         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6565             return -2;
6566         }
6567     }
6568     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6569     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6570 
6571     /* Generate TB finalization at the end of block */
6572     i = tcg_out_ldst_finalize(s);
6573     if (i < 0) {
6574         return i;
6575     }
6576     i = tcg_out_pool_finalize(s);
6577     if (i < 0) {
6578         return i;
6579     }
6580     if (!tcg_resolve_relocs(s)) {
6581         return -2;
6582     }
6583 
6584 #ifndef CONFIG_TCG_INTERPRETER
6585     /* flush instruction cache */
6586     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6587                         (uintptr_t)s->code_buf,
6588                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6589 #endif
6590 
6591     return tcg_current_code_size(s);
6592 }
6593 
6594 #ifdef ELF_HOST_MACHINE
6595 /* In order to use this feature, the backend needs to do three things:
6596 
6597    (1) Define ELF_HOST_MACHINE to indicate both what value to
6598        put into the ELF image and to indicate support for the feature.
6599 
6600    (2) Define tcg_register_jit.  This should create a buffer containing
6601        the contents of a .debug_frame section that describes the post-
6602        prologue unwind info for the tcg machine.
6603 
6604    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6605 */
6606 
6607 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6608 typedef enum {
6609     JIT_NOACTION = 0,
6610     JIT_REGISTER_FN,
6611     JIT_UNREGISTER_FN
6612 } jit_actions_t;
6613 
6614 struct jit_code_entry {
6615     struct jit_code_entry *next_entry;
6616     struct jit_code_entry *prev_entry;
6617     const void *symfile_addr;
6618     uint64_t symfile_size;
6619 };
6620 
6621 struct jit_descriptor {
6622     uint32_t version;
6623     uint32_t action_flag;
6624     struct jit_code_entry *relevant_entry;
6625     struct jit_code_entry *first_entry;
6626 };
6627 
6628 void __jit_debug_register_code(void) __attribute__((noinline));
6629 void __jit_debug_register_code(void)
6630 {
6631     asm("");
6632 }
6633 
6634 /* Must statically initialize the version, because GDB may check
6635    the version before we can set it.  */
6636 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6637 
6638 /* End GDB interface.  */
6639 
6640 static int find_string(const char *strtab, const char *str)
6641 {
6642     const char *p = strtab + 1;
6643 
6644     while (1) {
6645         if (strcmp(p, str) == 0) {
6646             return p - strtab;
6647         }
6648         p += strlen(p) + 1;
6649     }
6650 }
6651 
6652 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6653                                  const void *debug_frame,
6654                                  size_t debug_frame_size)
6655 {
6656     struct __attribute__((packed)) DebugInfo {
6657         uint32_t  len;
6658         uint16_t  version;
6659         uint32_t  abbrev;
6660         uint8_t   ptr_size;
6661         uint8_t   cu_die;
6662         uint16_t  cu_lang;
6663         uintptr_t cu_low_pc;
6664         uintptr_t cu_high_pc;
6665         uint8_t   fn_die;
6666         char      fn_name[16];
6667         uintptr_t fn_low_pc;
6668         uintptr_t fn_high_pc;
6669         uint8_t   cu_eoc;
6670     };
6671 
6672     struct ElfImage {
6673         ElfW(Ehdr) ehdr;
6674         ElfW(Phdr) phdr;
6675         ElfW(Shdr) shdr[7];
6676         ElfW(Sym)  sym[2];
6677         struct DebugInfo di;
6678         uint8_t    da[24];
6679         char       str[80];
6680     };
6681 
6682     struct ElfImage *img;
6683 
6684     static const struct ElfImage img_template = {
6685         .ehdr = {
6686             .e_ident[EI_MAG0] = ELFMAG0,
6687             .e_ident[EI_MAG1] = ELFMAG1,
6688             .e_ident[EI_MAG2] = ELFMAG2,
6689             .e_ident[EI_MAG3] = ELFMAG3,
6690             .e_ident[EI_CLASS] = ELF_CLASS,
6691             .e_ident[EI_DATA] = ELF_DATA,
6692             .e_ident[EI_VERSION] = EV_CURRENT,
6693             .e_type = ET_EXEC,
6694             .e_machine = ELF_HOST_MACHINE,
6695             .e_version = EV_CURRENT,
6696             .e_phoff = offsetof(struct ElfImage, phdr),
6697             .e_shoff = offsetof(struct ElfImage, shdr),
6698             .e_ehsize = sizeof(ElfW(Shdr)),
6699             .e_phentsize = sizeof(ElfW(Phdr)),
6700             .e_phnum = 1,
6701             .e_shentsize = sizeof(ElfW(Shdr)),
6702             .e_shnum = ARRAY_SIZE(img->shdr),
6703             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6704 #ifdef ELF_HOST_FLAGS
6705             .e_flags = ELF_HOST_FLAGS,
6706 #endif
6707 #ifdef ELF_OSABI
6708             .e_ident[EI_OSABI] = ELF_OSABI,
6709 #endif
6710         },
6711         .phdr = {
6712             .p_type = PT_LOAD,
6713             .p_flags = PF_X,
6714         },
6715         .shdr = {
6716             [0] = { .sh_type = SHT_NULL },
6717             /* Trick: The contents of code_gen_buffer are not present in
6718                this fake ELF file; that got allocated elsewhere.  Therefore
6719                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6720                will not look for contents.  We can record any address.  */
6721             [1] = { /* .text */
6722                 .sh_type = SHT_NOBITS,
6723                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6724             },
6725             [2] = { /* .debug_info */
6726                 .sh_type = SHT_PROGBITS,
6727                 .sh_offset = offsetof(struct ElfImage, di),
6728                 .sh_size = sizeof(struct DebugInfo),
6729             },
6730             [3] = { /* .debug_abbrev */
6731                 .sh_type = SHT_PROGBITS,
6732                 .sh_offset = offsetof(struct ElfImage, da),
6733                 .sh_size = sizeof(img->da),
6734             },
6735             [4] = { /* .debug_frame */
6736                 .sh_type = SHT_PROGBITS,
6737                 .sh_offset = sizeof(struct ElfImage),
6738             },
6739             [5] = { /* .symtab */
6740                 .sh_type = SHT_SYMTAB,
6741                 .sh_offset = offsetof(struct ElfImage, sym),
6742                 .sh_size = sizeof(img->sym),
6743                 .sh_info = 1,
6744                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6745                 .sh_entsize = sizeof(ElfW(Sym)),
6746             },
6747             [6] = { /* .strtab */
6748                 .sh_type = SHT_STRTAB,
6749                 .sh_offset = offsetof(struct ElfImage, str),
6750                 .sh_size = sizeof(img->str),
6751             }
6752         },
6753         .sym = {
6754             [1] = { /* code_gen_buffer */
6755                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6756                 .st_shndx = 1,
6757             }
6758         },
6759         .di = {
6760             .len = sizeof(struct DebugInfo) - 4,
6761             .version = 2,
6762             .ptr_size = sizeof(void *),
6763             .cu_die = 1,
6764             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6765             .fn_die = 2,
6766             .fn_name = "code_gen_buffer"
6767         },
6768         .da = {
6769             1,          /* abbrev number (the cu) */
6770             0x11, 1,    /* DW_TAG_compile_unit, has children */
6771             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6772             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6773             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6774             0, 0,       /* end of abbrev */
6775             2,          /* abbrev number (the fn) */
6776             0x2e, 0,    /* DW_TAG_subprogram, no children */
6777             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6778             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6779             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6780             0, 0,       /* end of abbrev */
6781             0           /* no more abbrev */
6782         },
6783         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6784                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6785     };
6786 
6787     /* We only need a single jit entry; statically allocate it.  */
6788     static struct jit_code_entry one_entry;
6789 
6790     uintptr_t buf = (uintptr_t)buf_ptr;
6791     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6792     DebugFrameHeader *dfh;
6793 
6794     img = g_malloc(img_size);
6795     *img = img_template;
6796 
6797     img->phdr.p_vaddr = buf;
6798     img->phdr.p_paddr = buf;
6799     img->phdr.p_memsz = buf_size;
6800 
6801     img->shdr[1].sh_name = find_string(img->str, ".text");
6802     img->shdr[1].sh_addr = buf;
6803     img->shdr[1].sh_size = buf_size;
6804 
6805     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6806     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6807 
6808     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6809     img->shdr[4].sh_size = debug_frame_size;
6810 
6811     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6812     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6813 
6814     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6815     img->sym[1].st_value = buf;
6816     img->sym[1].st_size = buf_size;
6817 
6818     img->di.cu_low_pc = buf;
6819     img->di.cu_high_pc = buf + buf_size;
6820     img->di.fn_low_pc = buf;
6821     img->di.fn_high_pc = buf + buf_size;
6822 
6823     dfh = (DebugFrameHeader *)(img + 1);
6824     memcpy(dfh, debug_frame, debug_frame_size);
6825     dfh->fde.func_start = buf;
6826     dfh->fde.func_len = buf_size;
6827 
6828 #ifdef DEBUG_JIT
6829     /* Enable this block to be able to debug the ELF image file creation.
6830        One can use readelf, objdump, or other inspection utilities.  */
6831     {
6832         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6833         FILE *f = fopen(jit, "w+b");
6834         if (f) {
6835             if (fwrite(img, img_size, 1, f) != img_size) {
6836                 /* Avoid stupid unused return value warning for fwrite.  */
6837             }
6838             fclose(f);
6839         }
6840     }
6841 #endif
6842 
6843     one_entry.symfile_addr = img;
6844     one_entry.symfile_size = img_size;
6845 
6846     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6847     __jit_debug_descriptor.relevant_entry = &one_entry;
6848     __jit_debug_descriptor.first_entry = &one_entry;
6849     __jit_debug_register_code();
6850 }
6851 #else
6852 /* No support for the feature.  Provide the entry point expected by exec.c,
6853    and implement the internal function we declared earlier.  */
6854 
6855 static void tcg_register_jit_int(const void *buf, size_t size,
6856                                  const void *debug_frame,
6857                                  size_t debug_frame_size)
6858 {
6859 }
6860 
6861 void tcg_register_jit(const void *buf, size_t buf_size)
6862 {
6863 }
6864 #endif /* ELF_HOST_MACHINE */
6865 
6866 #if !TCG_TARGET_MAYBE_vec
6867 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6868 {
6869     g_assert_not_reached();
6870 }
6871 #endif
6872