xref: /openbmc/qemu/tcg/tcg.c (revision 6ca594517ab389f3095c4aab745e168cdd8e8ff5)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpDivRem {
990     TCGOutOp base;
991     void (*out_rr01r)(TCGContext *s, TCGType type,
992                       TCGReg a0, TCGReg a1, TCGReg a4);
993 } TCGOutOpDivRem;
994 
995 typedef struct TCGOutOpUnary {
996     TCGOutOp base;
997     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
998 } TCGOutOpUnary;
999 
1000 typedef struct TCGOutOpSubtract {
1001     TCGOutOp base;
1002     void (*out_rrr)(TCGContext *s, TCGType type,
1003                     TCGReg a0, TCGReg a1, TCGReg a2);
1004     void (*out_rir)(TCGContext *s, TCGType type,
1005                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1006 } TCGOutOpSubtract;
1007 
1008 #include "tcg-target.c.inc"
1009 
1010 #ifndef CONFIG_TCG_INTERPRETER
1011 /* Validate CPUTLBDescFast placement. */
1012 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1013                         sizeof(CPUNegativeOffsetState))
1014                   < MIN_TLB_MASK_TABLE_OFS);
1015 #endif
1016 
1017 /*
1018  * Register V as the TCGOutOp for O.
1019  * This verifies that V is of type T, otherwise give a nice compiler error.
1020  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1021  */
1022 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1023 
1024 /* Register allocation descriptions for every TCGOpcode. */
1025 static const TCGOutOp * const all_outop[NB_OPS] = {
1026     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1027     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1028     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1029     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1030     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1031     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1032     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1033     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1034     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1035     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1036     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1037     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1038     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1039     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1040     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1041     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1042     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1043     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1044     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1045     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1046     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1047     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1048 };
1049 
1050 #undef OUTOP
1051 
1052 /*
1053  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1054  * and registered the target's TCG globals) must register with this function
1055  * before initiating translation.
1056  *
1057  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1058  * of tcg_region_init() for the reasoning behind this.
1059  *
1060  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1061  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1062  * is not used anymore for translation once this function is called.
1063  *
1064  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1065  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1066  * modes.
1067  */
1068 #ifdef CONFIG_USER_ONLY
1069 void tcg_register_thread(void)
1070 {
1071     tcg_ctx = &tcg_init_ctx;
1072 }
1073 #else
1074 void tcg_register_thread(void)
1075 {
1076     TCGContext *s = g_malloc(sizeof(*s));
1077     unsigned int i, n;
1078 
1079     *s = tcg_init_ctx;
1080 
1081     /* Relink mem_base.  */
1082     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1083         if (tcg_init_ctx.temps[i].mem_base) {
1084             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1085             tcg_debug_assert(b >= 0 && b < n);
1086             s->temps[i].mem_base = &s->temps[b];
1087         }
1088     }
1089 
1090     /* Claim an entry in tcg_ctxs */
1091     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1092     g_assert(n < tcg_max_ctxs);
1093     qatomic_set(&tcg_ctxs[n], s);
1094 
1095     if (n > 0) {
1096         tcg_region_initial_alloc(s);
1097     }
1098 
1099     tcg_ctx = s;
1100 }
1101 #endif /* !CONFIG_USER_ONLY */
1102 
1103 /* pool based memory allocation */
1104 void *tcg_malloc_internal(TCGContext *s, int size)
1105 {
1106     TCGPool *p;
1107     int pool_size;
1108 
1109     if (size > TCG_POOL_CHUNK_SIZE) {
1110         /* big malloc: insert a new pool (XXX: could optimize) */
1111         p = g_malloc(sizeof(TCGPool) + size);
1112         p->size = size;
1113         p->next = s->pool_first_large;
1114         s->pool_first_large = p;
1115         return p->data;
1116     } else {
1117         p = s->pool_current;
1118         if (!p) {
1119             p = s->pool_first;
1120             if (!p)
1121                 goto new_pool;
1122         } else {
1123             if (!p->next) {
1124             new_pool:
1125                 pool_size = TCG_POOL_CHUNK_SIZE;
1126                 p = g_malloc(sizeof(TCGPool) + pool_size);
1127                 p->size = pool_size;
1128                 p->next = NULL;
1129                 if (s->pool_current) {
1130                     s->pool_current->next = p;
1131                 } else {
1132                     s->pool_first = p;
1133                 }
1134             } else {
1135                 p = p->next;
1136             }
1137         }
1138     }
1139     s->pool_current = p;
1140     s->pool_cur = p->data + size;
1141     s->pool_end = p->data + p->size;
1142     return p->data;
1143 }
1144 
1145 void tcg_pool_reset(TCGContext *s)
1146 {
1147     TCGPool *p, *t;
1148     for (p = s->pool_first_large; p; p = t) {
1149         t = p->next;
1150         g_free(p);
1151     }
1152     s->pool_first_large = NULL;
1153     s->pool_cur = s->pool_end = NULL;
1154     s->pool_current = NULL;
1155 }
1156 
1157 /*
1158  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1159  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1160  * We only use these for layout in tcg_out_ld_helper_ret and
1161  * tcg_out_st_helper_args, and share them between several of
1162  * the helpers, with the end result that it's easier to build manually.
1163  */
1164 
1165 #if TCG_TARGET_REG_BITS == 32
1166 # define dh_typecode_ttl  dh_typecode_i32
1167 #else
1168 # define dh_typecode_ttl  dh_typecode_i64
1169 #endif
1170 
1171 static TCGHelperInfo info_helper_ld32_mmu = {
1172     .flags = TCG_CALL_NO_WG,
1173     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1174               | dh_typemask(env, 1)
1175               | dh_typemask(i64, 2)  /* uint64_t addr */
1176               | dh_typemask(i32, 3)  /* unsigned oi */
1177               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1178 };
1179 
1180 static TCGHelperInfo info_helper_ld64_mmu = {
1181     .flags = TCG_CALL_NO_WG,
1182     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1183               | dh_typemask(env, 1)
1184               | dh_typemask(i64, 2)  /* uint64_t addr */
1185               | dh_typemask(i32, 3)  /* unsigned oi */
1186               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1187 };
1188 
1189 static TCGHelperInfo info_helper_ld128_mmu = {
1190     .flags = TCG_CALL_NO_WG,
1191     .typemask = dh_typemask(i128, 0) /* return Int128 */
1192               | dh_typemask(env, 1)
1193               | dh_typemask(i64, 2)  /* uint64_t addr */
1194               | dh_typemask(i32, 3)  /* unsigned oi */
1195               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1196 };
1197 
1198 static TCGHelperInfo info_helper_st32_mmu = {
1199     .flags = TCG_CALL_NO_WG,
1200     .typemask = dh_typemask(void, 0)
1201               | dh_typemask(env, 1)
1202               | dh_typemask(i64, 2)  /* uint64_t addr */
1203               | dh_typemask(i32, 3)  /* uint32_t data */
1204               | dh_typemask(i32, 4)  /* unsigned oi */
1205               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1206 };
1207 
1208 static TCGHelperInfo info_helper_st64_mmu = {
1209     .flags = TCG_CALL_NO_WG,
1210     .typemask = dh_typemask(void, 0)
1211               | dh_typemask(env, 1)
1212               | dh_typemask(i64, 2)  /* uint64_t addr */
1213               | dh_typemask(i64, 3)  /* uint64_t data */
1214               | dh_typemask(i32, 4)  /* unsigned oi */
1215               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1216 };
1217 
1218 static TCGHelperInfo info_helper_st128_mmu = {
1219     .flags = TCG_CALL_NO_WG,
1220     .typemask = dh_typemask(void, 0)
1221               | dh_typemask(env, 1)
1222               | dh_typemask(i64, 2)  /* uint64_t addr */
1223               | dh_typemask(i128, 3) /* Int128 data */
1224               | dh_typemask(i32, 4)  /* unsigned oi */
1225               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1226 };
1227 
1228 #ifdef CONFIG_TCG_INTERPRETER
1229 static ffi_type *typecode_to_ffi(int argmask)
1230 {
1231     /*
1232      * libffi does not support __int128_t, so we have forced Int128
1233      * to use the structure definition instead of the builtin type.
1234      */
1235     static ffi_type *ffi_type_i128_elements[3] = {
1236         &ffi_type_uint64,
1237         &ffi_type_uint64,
1238         NULL
1239     };
1240     static ffi_type ffi_type_i128 = {
1241         .size = 16,
1242         .alignment = __alignof__(Int128),
1243         .type = FFI_TYPE_STRUCT,
1244         .elements = ffi_type_i128_elements,
1245     };
1246 
1247     switch (argmask) {
1248     case dh_typecode_void:
1249         return &ffi_type_void;
1250     case dh_typecode_i32:
1251         return &ffi_type_uint32;
1252     case dh_typecode_s32:
1253         return &ffi_type_sint32;
1254     case dh_typecode_i64:
1255         return &ffi_type_uint64;
1256     case dh_typecode_s64:
1257         return &ffi_type_sint64;
1258     case dh_typecode_ptr:
1259         return &ffi_type_pointer;
1260     case dh_typecode_i128:
1261         return &ffi_type_i128;
1262     }
1263     g_assert_not_reached();
1264 }
1265 
1266 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1267 {
1268     unsigned typemask = info->typemask;
1269     struct {
1270         ffi_cif cif;
1271         ffi_type *args[];
1272     } *ca;
1273     ffi_status status;
1274     int nargs;
1275 
1276     /* Ignoring the return type, find the last non-zero field. */
1277     nargs = 32 - clz32(typemask >> 3);
1278     nargs = DIV_ROUND_UP(nargs, 3);
1279     assert(nargs <= MAX_CALL_IARGS);
1280 
1281     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1282     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1283     ca->cif.nargs = nargs;
1284 
1285     if (nargs != 0) {
1286         ca->cif.arg_types = ca->args;
1287         for (int j = 0; j < nargs; ++j) {
1288             int typecode = extract32(typemask, (j + 1) * 3, 3);
1289             ca->args[j] = typecode_to_ffi(typecode);
1290         }
1291     }
1292 
1293     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1294                           ca->cif.rtype, ca->cif.arg_types);
1295     assert(status == FFI_OK);
1296 
1297     return &ca->cif;
1298 }
1299 
1300 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1301 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1302 #else
1303 #define HELPER_INFO_INIT(I)      (&(I)->init)
1304 #define HELPER_INFO_INIT_VAL(I)  1
1305 #endif /* CONFIG_TCG_INTERPRETER */
1306 
1307 static inline bool arg_slot_reg_p(unsigned arg_slot)
1308 {
1309     /*
1310      * Split the sizeof away from the comparison to avoid Werror from
1311      * "unsigned < 0 is always false", when iarg_regs is empty.
1312      */
1313     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1314     return arg_slot < nreg;
1315 }
1316 
1317 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1318 {
1319     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1320     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1321 
1322     tcg_debug_assert(stk_slot < max);
1323     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1324 }
1325 
1326 typedef struct TCGCumulativeArgs {
1327     int arg_idx;                /* tcg_gen_callN args[] */
1328     int info_in_idx;            /* TCGHelperInfo in[] */
1329     int arg_slot;               /* regs+stack slot */
1330     int ref_slot;               /* stack slots for references */
1331 } TCGCumulativeArgs;
1332 
1333 static void layout_arg_even(TCGCumulativeArgs *cum)
1334 {
1335     cum->arg_slot += cum->arg_slot & 1;
1336 }
1337 
1338 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1339                          TCGCallArgumentKind kind)
1340 {
1341     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1342 
1343     *loc = (TCGCallArgumentLoc){
1344         .kind = kind,
1345         .arg_idx = cum->arg_idx,
1346         .arg_slot = cum->arg_slot,
1347     };
1348     cum->info_in_idx++;
1349     cum->arg_slot++;
1350 }
1351 
1352 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1353                                 TCGHelperInfo *info, int n)
1354 {
1355     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1356 
1357     for (int i = 0; i < n; ++i) {
1358         /* Layout all using the same arg_idx, adjusting the subindex. */
1359         loc[i] = (TCGCallArgumentLoc){
1360             .kind = TCG_CALL_ARG_NORMAL,
1361             .arg_idx = cum->arg_idx,
1362             .tmp_subindex = i,
1363             .arg_slot = cum->arg_slot + i,
1364         };
1365     }
1366     cum->info_in_idx += n;
1367     cum->arg_slot += n;
1368 }
1369 
1370 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1371 {
1372     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1373     int n = 128 / TCG_TARGET_REG_BITS;
1374 
1375     /* The first subindex carries the pointer. */
1376     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1377 
1378     /*
1379      * The callee is allowed to clobber memory associated with
1380      * structure pass by-reference.  Therefore we must make copies.
1381      * Allocate space from "ref_slot", which will be adjusted to
1382      * follow the parameters on the stack.
1383      */
1384     loc[0].ref_slot = cum->ref_slot;
1385 
1386     /*
1387      * Subsequent words also go into the reference slot, but
1388      * do not accumulate into the regular arguments.
1389      */
1390     for (int i = 1; i < n; ++i) {
1391         loc[i] = (TCGCallArgumentLoc){
1392             .kind = TCG_CALL_ARG_BY_REF_N,
1393             .arg_idx = cum->arg_idx,
1394             .tmp_subindex = i,
1395             .ref_slot = cum->ref_slot + i,
1396         };
1397     }
1398     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1399     cum->ref_slot += n;
1400 }
1401 
1402 static void init_call_layout(TCGHelperInfo *info)
1403 {
1404     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1405     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1406     unsigned typemask = info->typemask;
1407     unsigned typecode;
1408     TCGCumulativeArgs cum = { };
1409 
1410     /*
1411      * Parse and place any function return value.
1412      */
1413     typecode = typemask & 7;
1414     switch (typecode) {
1415     case dh_typecode_void:
1416         info->nr_out = 0;
1417         break;
1418     case dh_typecode_i32:
1419     case dh_typecode_s32:
1420     case dh_typecode_ptr:
1421         info->nr_out = 1;
1422         info->out_kind = TCG_CALL_RET_NORMAL;
1423         break;
1424     case dh_typecode_i64:
1425     case dh_typecode_s64:
1426         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1427         info->out_kind = TCG_CALL_RET_NORMAL;
1428         /* Query the last register now to trigger any assert early. */
1429         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1430         break;
1431     case dh_typecode_i128:
1432         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1433         info->out_kind = TCG_TARGET_CALL_RET_I128;
1434         switch (TCG_TARGET_CALL_RET_I128) {
1435         case TCG_CALL_RET_NORMAL:
1436             /* Query the last register now to trigger any assert early. */
1437             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1438             break;
1439         case TCG_CALL_RET_BY_VEC:
1440             /* Query the single register now to trigger any assert early. */
1441             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1442             break;
1443         case TCG_CALL_RET_BY_REF:
1444             /*
1445              * Allocate the first argument to the output.
1446              * We don't need to store this anywhere, just make it
1447              * unavailable for use in the input loop below.
1448              */
1449             cum.arg_slot = 1;
1450             break;
1451         default:
1452             qemu_build_not_reached();
1453         }
1454         break;
1455     default:
1456         g_assert_not_reached();
1457     }
1458 
1459     /*
1460      * Parse and place function arguments.
1461      */
1462     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1463         TCGCallArgumentKind kind;
1464         TCGType type;
1465 
1466         typecode = typemask & 7;
1467         switch (typecode) {
1468         case dh_typecode_i32:
1469         case dh_typecode_s32:
1470             type = TCG_TYPE_I32;
1471             break;
1472         case dh_typecode_i64:
1473         case dh_typecode_s64:
1474             type = TCG_TYPE_I64;
1475             break;
1476         case dh_typecode_ptr:
1477             type = TCG_TYPE_PTR;
1478             break;
1479         case dh_typecode_i128:
1480             type = TCG_TYPE_I128;
1481             break;
1482         default:
1483             g_assert_not_reached();
1484         }
1485 
1486         switch (type) {
1487         case TCG_TYPE_I32:
1488             switch (TCG_TARGET_CALL_ARG_I32) {
1489             case TCG_CALL_ARG_EVEN:
1490                 layout_arg_even(&cum);
1491                 /* fall through */
1492             case TCG_CALL_ARG_NORMAL:
1493                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1494                 break;
1495             case TCG_CALL_ARG_EXTEND:
1496                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1497                 layout_arg_1(&cum, info, kind);
1498                 break;
1499             default:
1500                 qemu_build_not_reached();
1501             }
1502             break;
1503 
1504         case TCG_TYPE_I64:
1505             switch (TCG_TARGET_CALL_ARG_I64) {
1506             case TCG_CALL_ARG_EVEN:
1507                 layout_arg_even(&cum);
1508                 /* fall through */
1509             case TCG_CALL_ARG_NORMAL:
1510                 if (TCG_TARGET_REG_BITS == 32) {
1511                     layout_arg_normal_n(&cum, info, 2);
1512                 } else {
1513                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1514                 }
1515                 break;
1516             default:
1517                 qemu_build_not_reached();
1518             }
1519             break;
1520 
1521         case TCG_TYPE_I128:
1522             switch (TCG_TARGET_CALL_ARG_I128) {
1523             case TCG_CALL_ARG_EVEN:
1524                 layout_arg_even(&cum);
1525                 /* fall through */
1526             case TCG_CALL_ARG_NORMAL:
1527                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1528                 break;
1529             case TCG_CALL_ARG_BY_REF:
1530                 layout_arg_by_ref(&cum, info);
1531                 break;
1532             default:
1533                 qemu_build_not_reached();
1534             }
1535             break;
1536 
1537         default:
1538             g_assert_not_reached();
1539         }
1540     }
1541     info->nr_in = cum.info_in_idx;
1542 
1543     /* Validate that we didn't overrun the input array. */
1544     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1545     /* Validate the backend has enough argument space. */
1546     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1547 
1548     /*
1549      * Relocate the "ref_slot" area to the end of the parameters.
1550      * Minimizing this stack offset helps code size for x86,
1551      * which has a signed 8-bit offset encoding.
1552      */
1553     if (cum.ref_slot != 0) {
1554         int ref_base = 0;
1555 
1556         if (cum.arg_slot > max_reg_slots) {
1557             int align = __alignof(Int128) / sizeof(tcg_target_long);
1558 
1559             ref_base = cum.arg_slot - max_reg_slots;
1560             if (align > 1) {
1561                 ref_base = ROUND_UP(ref_base, align);
1562             }
1563         }
1564         assert(ref_base + cum.ref_slot <= max_stk_slots);
1565         ref_base += max_reg_slots;
1566 
1567         if (ref_base != 0) {
1568             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1569                 TCGCallArgumentLoc *loc = &info->in[i];
1570                 switch (loc->kind) {
1571                 case TCG_CALL_ARG_BY_REF:
1572                 case TCG_CALL_ARG_BY_REF_N:
1573                     loc->ref_slot += ref_base;
1574                     break;
1575                 default:
1576                     break;
1577                 }
1578             }
1579         }
1580     }
1581 }
1582 
1583 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1584 static void process_constraint_sets(void);
1585 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1586                                             TCGReg reg, const char *name);
1587 
1588 static void tcg_context_init(unsigned max_threads)
1589 {
1590     TCGContext *s = &tcg_init_ctx;
1591     int n, i;
1592     TCGTemp *ts;
1593 
1594     memset(s, 0, sizeof(*s));
1595     s->nb_globals = 0;
1596 
1597     init_call_layout(&info_helper_ld32_mmu);
1598     init_call_layout(&info_helper_ld64_mmu);
1599     init_call_layout(&info_helper_ld128_mmu);
1600     init_call_layout(&info_helper_st32_mmu);
1601     init_call_layout(&info_helper_st64_mmu);
1602     init_call_layout(&info_helper_st128_mmu);
1603 
1604     tcg_target_init(s);
1605     process_constraint_sets();
1606 
1607     /* Reverse the order of the saved registers, assuming they're all at
1608        the start of tcg_target_reg_alloc_order.  */
1609     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1610         int r = tcg_target_reg_alloc_order[n];
1611         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1612             break;
1613         }
1614     }
1615     for (i = 0; i < n; ++i) {
1616         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1617     }
1618     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1619         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1620     }
1621 
1622     tcg_ctx = s;
1623     /*
1624      * In user-mode we simply share the init context among threads, since we
1625      * use a single region. See the documentation tcg_region_init() for the
1626      * reasoning behind this.
1627      * In system-mode we will have at most max_threads TCG threads.
1628      */
1629 #ifdef CONFIG_USER_ONLY
1630     tcg_ctxs = &tcg_ctx;
1631     tcg_cur_ctxs = 1;
1632     tcg_max_ctxs = 1;
1633 #else
1634     tcg_max_ctxs = max_threads;
1635     tcg_ctxs = g_new0(TCGContext *, max_threads);
1636 #endif
1637 
1638     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1639     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1640     tcg_env = temp_tcgv_ptr(ts);
1641 }
1642 
1643 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1644 {
1645     tcg_context_init(max_threads);
1646     tcg_region_init(tb_size, splitwx, max_threads);
1647 }
1648 
1649 /*
1650  * Allocate TBs right before their corresponding translated code, making
1651  * sure that TBs and code are on different cache lines.
1652  */
1653 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1654 {
1655     uintptr_t align = qemu_icache_linesize;
1656     TranslationBlock *tb;
1657     void *next;
1658 
1659  retry:
1660     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1661     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1662 
1663     if (unlikely(next > s->code_gen_highwater)) {
1664         if (tcg_region_alloc(s)) {
1665             return NULL;
1666         }
1667         goto retry;
1668     }
1669     qatomic_set(&s->code_gen_ptr, next);
1670     return tb;
1671 }
1672 
1673 void tcg_prologue_init(void)
1674 {
1675     TCGContext *s = tcg_ctx;
1676     size_t prologue_size;
1677 
1678     s->code_ptr = s->code_gen_ptr;
1679     s->code_buf = s->code_gen_ptr;
1680     s->data_gen_ptr = NULL;
1681 
1682 #ifndef CONFIG_TCG_INTERPRETER
1683     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1684 #endif
1685 
1686     s->pool_labels = NULL;
1687 
1688     qemu_thread_jit_write();
1689     /* Generate the prologue.  */
1690     tcg_target_qemu_prologue(s);
1691 
1692     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1693     {
1694         int result = tcg_out_pool_finalize(s);
1695         tcg_debug_assert(result == 0);
1696     }
1697 
1698     prologue_size = tcg_current_code_size(s);
1699     perf_report_prologue(s->code_gen_ptr, prologue_size);
1700 
1701 #ifndef CONFIG_TCG_INTERPRETER
1702     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1703                         (uintptr_t)s->code_buf, prologue_size);
1704 #endif
1705 
1706     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1707         FILE *logfile = qemu_log_trylock();
1708         if (logfile) {
1709             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1710             if (s->data_gen_ptr) {
1711                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1712                 size_t data_size = prologue_size - code_size;
1713                 size_t i;
1714 
1715                 disas(logfile, s->code_gen_ptr, code_size);
1716 
1717                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1718                     if (sizeof(tcg_target_ulong) == 8) {
1719                         fprintf(logfile,
1720                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1721                                 (uintptr_t)s->data_gen_ptr + i,
1722                                 *(uint64_t *)(s->data_gen_ptr + i));
1723                     } else {
1724                         fprintf(logfile,
1725                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1726                                 (uintptr_t)s->data_gen_ptr + i,
1727                                 *(uint32_t *)(s->data_gen_ptr + i));
1728                     }
1729                 }
1730             } else {
1731                 disas(logfile, s->code_gen_ptr, prologue_size);
1732             }
1733             fprintf(logfile, "\n");
1734             qemu_log_unlock(logfile);
1735         }
1736     }
1737 
1738 #ifndef CONFIG_TCG_INTERPRETER
1739     /*
1740      * Assert that goto_ptr is implemented completely, setting an epilogue.
1741      * For tci, we use NULL as the signal to return from the interpreter,
1742      * so skip this check.
1743      */
1744     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1745 #endif
1746 
1747     tcg_region_prologue_set(s);
1748 }
1749 
1750 void tcg_func_start(TCGContext *s)
1751 {
1752     tcg_pool_reset(s);
1753     s->nb_temps = s->nb_globals;
1754 
1755     /* No temps have been previously allocated for size or locality.  */
1756     tcg_temp_ebb_reset_freed(s);
1757 
1758     /* No constant temps have been previously allocated. */
1759     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1760         if (s->const_table[i]) {
1761             g_hash_table_remove_all(s->const_table[i]);
1762         }
1763     }
1764 
1765     s->nb_ops = 0;
1766     s->nb_labels = 0;
1767     s->current_frame_offset = s->frame_start;
1768 
1769 #ifdef CONFIG_DEBUG_TCG
1770     s->goto_tb_issue_mask = 0;
1771 #endif
1772 
1773     QTAILQ_INIT(&s->ops);
1774     QTAILQ_INIT(&s->free_ops);
1775     s->emit_before_op = NULL;
1776     QSIMPLEQ_INIT(&s->labels);
1777 
1778     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1779     tcg_debug_assert(s->insn_start_words > 0);
1780 }
1781 
1782 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1783 {
1784     int n = s->nb_temps++;
1785 
1786     if (n >= TCG_MAX_TEMPS) {
1787         tcg_raise_tb_overflow(s);
1788     }
1789     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1790 }
1791 
1792 static TCGTemp *tcg_global_alloc(TCGContext *s)
1793 {
1794     TCGTemp *ts;
1795 
1796     tcg_debug_assert(s->nb_globals == s->nb_temps);
1797     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1798     s->nb_globals++;
1799     ts = tcg_temp_alloc(s);
1800     ts->kind = TEMP_GLOBAL;
1801 
1802     return ts;
1803 }
1804 
1805 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1806                                             TCGReg reg, const char *name)
1807 {
1808     TCGTemp *ts;
1809 
1810     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1811 
1812     ts = tcg_global_alloc(s);
1813     ts->base_type = type;
1814     ts->type = type;
1815     ts->kind = TEMP_FIXED;
1816     ts->reg = reg;
1817     ts->name = name;
1818     tcg_regset_set_reg(s->reserved_regs, reg);
1819 
1820     return ts;
1821 }
1822 
1823 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1824 {
1825     s->frame_start = start;
1826     s->frame_end = start + size;
1827     s->frame_temp
1828         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1829 }
1830 
1831 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1832                                             const char *name, TCGType type)
1833 {
1834     TCGContext *s = tcg_ctx;
1835     TCGTemp *base_ts = tcgv_ptr_temp(base);
1836     TCGTemp *ts = tcg_global_alloc(s);
1837     int indirect_reg = 0;
1838 
1839     switch (base_ts->kind) {
1840     case TEMP_FIXED:
1841         break;
1842     case TEMP_GLOBAL:
1843         /* We do not support double-indirect registers.  */
1844         tcg_debug_assert(!base_ts->indirect_reg);
1845         base_ts->indirect_base = 1;
1846         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1847                             ? 2 : 1);
1848         indirect_reg = 1;
1849         break;
1850     default:
1851         g_assert_not_reached();
1852     }
1853 
1854     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1855         TCGTemp *ts2 = tcg_global_alloc(s);
1856         char buf[64];
1857 
1858         ts->base_type = TCG_TYPE_I64;
1859         ts->type = TCG_TYPE_I32;
1860         ts->indirect_reg = indirect_reg;
1861         ts->mem_allocated = 1;
1862         ts->mem_base = base_ts;
1863         ts->mem_offset = offset;
1864         pstrcpy(buf, sizeof(buf), name);
1865         pstrcat(buf, sizeof(buf), "_0");
1866         ts->name = strdup(buf);
1867 
1868         tcg_debug_assert(ts2 == ts + 1);
1869         ts2->base_type = TCG_TYPE_I64;
1870         ts2->type = TCG_TYPE_I32;
1871         ts2->indirect_reg = indirect_reg;
1872         ts2->mem_allocated = 1;
1873         ts2->mem_base = base_ts;
1874         ts2->mem_offset = offset + 4;
1875         ts2->temp_subindex = 1;
1876         pstrcpy(buf, sizeof(buf), name);
1877         pstrcat(buf, sizeof(buf), "_1");
1878         ts2->name = strdup(buf);
1879     } else {
1880         ts->base_type = type;
1881         ts->type = type;
1882         ts->indirect_reg = indirect_reg;
1883         ts->mem_allocated = 1;
1884         ts->mem_base = base_ts;
1885         ts->mem_offset = offset;
1886         ts->name = name;
1887     }
1888     return ts;
1889 }
1890 
1891 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1892 {
1893     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1894     return temp_tcgv_i32(ts);
1895 }
1896 
1897 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1898 {
1899     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1900     return temp_tcgv_i64(ts);
1901 }
1902 
1903 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1904 {
1905     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1906     return temp_tcgv_ptr(ts);
1907 }
1908 
1909 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1910 {
1911     TCGContext *s = tcg_ctx;
1912     TCGTemp *ts;
1913     int n;
1914 
1915     if (kind == TEMP_EBB) {
1916         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1917 
1918         if (idx < TCG_MAX_TEMPS) {
1919             /* There is already an available temp with the right type.  */
1920             clear_bit(idx, s->free_temps[type].l);
1921 
1922             ts = &s->temps[idx];
1923             ts->temp_allocated = 1;
1924             tcg_debug_assert(ts->base_type == type);
1925             tcg_debug_assert(ts->kind == kind);
1926             return ts;
1927         }
1928     } else {
1929         tcg_debug_assert(kind == TEMP_TB);
1930     }
1931 
1932     switch (type) {
1933     case TCG_TYPE_I32:
1934     case TCG_TYPE_V64:
1935     case TCG_TYPE_V128:
1936     case TCG_TYPE_V256:
1937         n = 1;
1938         break;
1939     case TCG_TYPE_I64:
1940         n = 64 / TCG_TARGET_REG_BITS;
1941         break;
1942     case TCG_TYPE_I128:
1943         n = 128 / TCG_TARGET_REG_BITS;
1944         break;
1945     default:
1946         g_assert_not_reached();
1947     }
1948 
1949     ts = tcg_temp_alloc(s);
1950     ts->base_type = type;
1951     ts->temp_allocated = 1;
1952     ts->kind = kind;
1953 
1954     if (n == 1) {
1955         ts->type = type;
1956     } else {
1957         ts->type = TCG_TYPE_REG;
1958 
1959         for (int i = 1; i < n; ++i) {
1960             TCGTemp *ts2 = tcg_temp_alloc(s);
1961 
1962             tcg_debug_assert(ts2 == ts + i);
1963             ts2->base_type = type;
1964             ts2->type = TCG_TYPE_REG;
1965             ts2->temp_allocated = 1;
1966             ts2->temp_subindex = i;
1967             ts2->kind = kind;
1968         }
1969     }
1970     return ts;
1971 }
1972 
1973 TCGv_i32 tcg_temp_new_i32(void)
1974 {
1975     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1976 }
1977 
1978 TCGv_i32 tcg_temp_ebb_new_i32(void)
1979 {
1980     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1981 }
1982 
1983 TCGv_i64 tcg_temp_new_i64(void)
1984 {
1985     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1986 }
1987 
1988 TCGv_i64 tcg_temp_ebb_new_i64(void)
1989 {
1990     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1991 }
1992 
1993 TCGv_ptr tcg_temp_new_ptr(void)
1994 {
1995     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1996 }
1997 
1998 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1999 {
2000     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2001 }
2002 
2003 TCGv_i128 tcg_temp_new_i128(void)
2004 {
2005     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2006 }
2007 
2008 TCGv_i128 tcg_temp_ebb_new_i128(void)
2009 {
2010     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2011 }
2012 
2013 TCGv_vec tcg_temp_new_vec(TCGType type)
2014 {
2015     TCGTemp *t;
2016 
2017 #ifdef CONFIG_DEBUG_TCG
2018     switch (type) {
2019     case TCG_TYPE_V64:
2020         assert(TCG_TARGET_HAS_v64);
2021         break;
2022     case TCG_TYPE_V128:
2023         assert(TCG_TARGET_HAS_v128);
2024         break;
2025     case TCG_TYPE_V256:
2026         assert(TCG_TARGET_HAS_v256);
2027         break;
2028     default:
2029         g_assert_not_reached();
2030     }
2031 #endif
2032 
2033     t = tcg_temp_new_internal(type, TEMP_EBB);
2034     return temp_tcgv_vec(t);
2035 }
2036 
2037 /* Create a new temp of the same type as an existing temp.  */
2038 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2039 {
2040     TCGTemp *t = tcgv_vec_temp(match);
2041 
2042     tcg_debug_assert(t->temp_allocated != 0);
2043 
2044     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2045     return temp_tcgv_vec(t);
2046 }
2047 
2048 void tcg_temp_free_internal(TCGTemp *ts)
2049 {
2050     TCGContext *s = tcg_ctx;
2051 
2052     switch (ts->kind) {
2053     case TEMP_CONST:
2054     case TEMP_TB:
2055         /* Silently ignore free. */
2056         break;
2057     case TEMP_EBB:
2058         tcg_debug_assert(ts->temp_allocated != 0);
2059         ts->temp_allocated = 0;
2060         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2061         break;
2062     default:
2063         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2064         g_assert_not_reached();
2065     }
2066 }
2067 
2068 void tcg_temp_free_i32(TCGv_i32 arg)
2069 {
2070     tcg_temp_free_internal(tcgv_i32_temp(arg));
2071 }
2072 
2073 void tcg_temp_free_i64(TCGv_i64 arg)
2074 {
2075     tcg_temp_free_internal(tcgv_i64_temp(arg));
2076 }
2077 
2078 void tcg_temp_free_i128(TCGv_i128 arg)
2079 {
2080     tcg_temp_free_internal(tcgv_i128_temp(arg));
2081 }
2082 
2083 void tcg_temp_free_ptr(TCGv_ptr arg)
2084 {
2085     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2086 }
2087 
2088 void tcg_temp_free_vec(TCGv_vec arg)
2089 {
2090     tcg_temp_free_internal(tcgv_vec_temp(arg));
2091 }
2092 
2093 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2094 {
2095     TCGContext *s = tcg_ctx;
2096     GHashTable *h = s->const_table[type];
2097     TCGTemp *ts;
2098 
2099     if (h == NULL) {
2100         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2101         s->const_table[type] = h;
2102     }
2103 
2104     ts = g_hash_table_lookup(h, &val);
2105     if (ts == NULL) {
2106         int64_t *val_ptr;
2107 
2108         ts = tcg_temp_alloc(s);
2109 
2110         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2111             TCGTemp *ts2 = tcg_temp_alloc(s);
2112 
2113             tcg_debug_assert(ts2 == ts + 1);
2114 
2115             ts->base_type = TCG_TYPE_I64;
2116             ts->type = TCG_TYPE_I32;
2117             ts->kind = TEMP_CONST;
2118             ts->temp_allocated = 1;
2119 
2120             ts2->base_type = TCG_TYPE_I64;
2121             ts2->type = TCG_TYPE_I32;
2122             ts2->kind = TEMP_CONST;
2123             ts2->temp_allocated = 1;
2124             ts2->temp_subindex = 1;
2125 
2126             /*
2127              * Retain the full value of the 64-bit constant in the low
2128              * part, so that the hash table works.  Actual uses will
2129              * truncate the value to the low part.
2130              */
2131             ts[HOST_BIG_ENDIAN].val = val;
2132             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2133             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2134         } else {
2135             ts->base_type = type;
2136             ts->type = type;
2137             ts->kind = TEMP_CONST;
2138             ts->temp_allocated = 1;
2139             ts->val = val;
2140             val_ptr = &ts->val;
2141         }
2142         g_hash_table_insert(h, val_ptr, ts);
2143     }
2144 
2145     return ts;
2146 }
2147 
2148 TCGv_i32 tcg_constant_i32(int32_t val)
2149 {
2150     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2151 }
2152 
2153 TCGv_i64 tcg_constant_i64(int64_t val)
2154 {
2155     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2156 }
2157 
2158 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2159 {
2160     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2161 }
2162 
2163 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2164 {
2165     val = dup_const(vece, val);
2166     return temp_tcgv_vec(tcg_constant_internal(type, val));
2167 }
2168 
2169 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2170 {
2171     TCGTemp *t = tcgv_vec_temp(match);
2172 
2173     tcg_debug_assert(t->temp_allocated != 0);
2174     return tcg_constant_vec(t->base_type, vece, val);
2175 }
2176 
2177 #ifdef CONFIG_DEBUG_TCG
2178 size_t temp_idx(TCGTemp *ts)
2179 {
2180     ptrdiff_t n = ts - tcg_ctx->temps;
2181     assert(n >= 0 && n < tcg_ctx->nb_temps);
2182     return n;
2183 }
2184 
2185 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2186 {
2187     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2188 
2189     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2190     assert(o % sizeof(TCGTemp) == 0);
2191 
2192     return (void *)tcg_ctx + (uintptr_t)v;
2193 }
2194 #endif /* CONFIG_DEBUG_TCG */
2195 
2196 /*
2197  * Return true if OP may appear in the opcode stream with TYPE.
2198  * Test the runtime variable that controls each opcode.
2199  */
2200 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2201 {
2202     bool has_type;
2203 
2204     switch (type) {
2205     case TCG_TYPE_I32:
2206         has_type = true;
2207         break;
2208     case TCG_TYPE_I64:
2209         has_type = TCG_TARGET_REG_BITS == 64;
2210         break;
2211     case TCG_TYPE_V64:
2212         has_type = TCG_TARGET_HAS_v64;
2213         break;
2214     case TCG_TYPE_V128:
2215         has_type = TCG_TARGET_HAS_v128;
2216         break;
2217     case TCG_TYPE_V256:
2218         has_type = TCG_TARGET_HAS_v256;
2219         break;
2220     default:
2221         has_type = false;
2222         break;
2223     }
2224 
2225     switch (op) {
2226     case INDEX_op_discard:
2227     case INDEX_op_set_label:
2228     case INDEX_op_call:
2229     case INDEX_op_br:
2230     case INDEX_op_mb:
2231     case INDEX_op_insn_start:
2232     case INDEX_op_exit_tb:
2233     case INDEX_op_goto_tb:
2234     case INDEX_op_goto_ptr:
2235     case INDEX_op_qemu_ld_i32:
2236     case INDEX_op_qemu_st_i32:
2237     case INDEX_op_qemu_ld_i64:
2238     case INDEX_op_qemu_st_i64:
2239         return true;
2240 
2241     case INDEX_op_qemu_st8_i32:
2242         return TCG_TARGET_HAS_qemu_st8_i32;
2243 
2244     case INDEX_op_qemu_ld_i128:
2245     case INDEX_op_qemu_st_i128:
2246         return TCG_TARGET_HAS_qemu_ldst_i128;
2247 
2248     case INDEX_op_add:
2249     case INDEX_op_and:
2250     case INDEX_op_mov:
2251     case INDEX_op_or:
2252     case INDEX_op_xor:
2253         return has_type;
2254 
2255     case INDEX_op_setcond_i32:
2256     case INDEX_op_brcond_i32:
2257     case INDEX_op_movcond_i32:
2258     case INDEX_op_ld8u_i32:
2259     case INDEX_op_ld8s_i32:
2260     case INDEX_op_ld16u_i32:
2261     case INDEX_op_ld16s_i32:
2262     case INDEX_op_ld_i32:
2263     case INDEX_op_st8_i32:
2264     case INDEX_op_st16_i32:
2265     case INDEX_op_st_i32:
2266     case INDEX_op_shr_i32:
2267     case INDEX_op_sar_i32:
2268     case INDEX_op_extract_i32:
2269     case INDEX_op_sextract_i32:
2270     case INDEX_op_deposit_i32:
2271         return true;
2272 
2273     case INDEX_op_negsetcond_i32:
2274         return TCG_TARGET_HAS_negsetcond_i32;
2275     case INDEX_op_rotl_i32:
2276     case INDEX_op_rotr_i32:
2277         return TCG_TARGET_HAS_rot_i32;
2278     case INDEX_op_extract2_i32:
2279         return TCG_TARGET_HAS_extract2_i32;
2280     case INDEX_op_add2_i32:
2281         return TCG_TARGET_HAS_add2_i32;
2282     case INDEX_op_sub2_i32:
2283         return TCG_TARGET_HAS_sub2_i32;
2284     case INDEX_op_mulu2_i32:
2285         return TCG_TARGET_HAS_mulu2_i32;
2286     case INDEX_op_muls2_i32:
2287         return TCG_TARGET_HAS_muls2_i32;
2288     case INDEX_op_bswap16_i32:
2289         return TCG_TARGET_HAS_bswap16_i32;
2290     case INDEX_op_bswap32_i32:
2291         return TCG_TARGET_HAS_bswap32_i32;
2292     case INDEX_op_clz_i32:
2293         return TCG_TARGET_HAS_clz_i32;
2294     case INDEX_op_ctz_i32:
2295         return TCG_TARGET_HAS_ctz_i32;
2296     case INDEX_op_ctpop_i32:
2297         return TCG_TARGET_HAS_ctpop_i32;
2298 
2299     case INDEX_op_brcond2_i32:
2300     case INDEX_op_setcond2_i32:
2301         return TCG_TARGET_REG_BITS == 32;
2302 
2303     case INDEX_op_setcond_i64:
2304     case INDEX_op_brcond_i64:
2305     case INDEX_op_movcond_i64:
2306     case INDEX_op_ld8u_i64:
2307     case INDEX_op_ld8s_i64:
2308     case INDEX_op_ld16u_i64:
2309     case INDEX_op_ld16s_i64:
2310     case INDEX_op_ld32u_i64:
2311     case INDEX_op_ld32s_i64:
2312     case INDEX_op_ld_i64:
2313     case INDEX_op_st8_i64:
2314     case INDEX_op_st16_i64:
2315     case INDEX_op_st32_i64:
2316     case INDEX_op_st_i64:
2317     case INDEX_op_shr_i64:
2318     case INDEX_op_sar_i64:
2319     case INDEX_op_ext_i32_i64:
2320     case INDEX_op_extu_i32_i64:
2321     case INDEX_op_extract_i64:
2322     case INDEX_op_sextract_i64:
2323     case INDEX_op_deposit_i64:
2324         return TCG_TARGET_REG_BITS == 64;
2325 
2326     case INDEX_op_negsetcond_i64:
2327         return TCG_TARGET_HAS_negsetcond_i64;
2328     case INDEX_op_rotl_i64:
2329     case INDEX_op_rotr_i64:
2330         return TCG_TARGET_HAS_rot_i64;
2331     case INDEX_op_extract2_i64:
2332         return TCG_TARGET_HAS_extract2_i64;
2333     case INDEX_op_extrl_i64_i32:
2334     case INDEX_op_extrh_i64_i32:
2335         return TCG_TARGET_HAS_extr_i64_i32;
2336     case INDEX_op_bswap16_i64:
2337         return TCG_TARGET_HAS_bswap16_i64;
2338     case INDEX_op_bswap32_i64:
2339         return TCG_TARGET_HAS_bswap32_i64;
2340     case INDEX_op_bswap64_i64:
2341         return TCG_TARGET_HAS_bswap64_i64;
2342     case INDEX_op_clz_i64:
2343         return TCG_TARGET_HAS_clz_i64;
2344     case INDEX_op_ctz_i64:
2345         return TCG_TARGET_HAS_ctz_i64;
2346     case INDEX_op_ctpop_i64:
2347         return TCG_TARGET_HAS_ctpop_i64;
2348     case INDEX_op_add2_i64:
2349         return TCG_TARGET_HAS_add2_i64;
2350     case INDEX_op_sub2_i64:
2351         return TCG_TARGET_HAS_sub2_i64;
2352     case INDEX_op_mulu2_i64:
2353         return TCG_TARGET_HAS_mulu2_i64;
2354     case INDEX_op_muls2_i64:
2355         return TCG_TARGET_HAS_muls2_i64;
2356 
2357     case INDEX_op_mov_vec:
2358     case INDEX_op_dup_vec:
2359     case INDEX_op_dupm_vec:
2360     case INDEX_op_ld_vec:
2361     case INDEX_op_st_vec:
2362     case INDEX_op_add_vec:
2363     case INDEX_op_sub_vec:
2364     case INDEX_op_and_vec:
2365     case INDEX_op_or_vec:
2366     case INDEX_op_xor_vec:
2367     case INDEX_op_cmp_vec:
2368         return has_type;
2369     case INDEX_op_dup2_vec:
2370         return has_type && TCG_TARGET_REG_BITS == 32;
2371     case INDEX_op_not_vec:
2372         return has_type && TCG_TARGET_HAS_not_vec;
2373     case INDEX_op_neg_vec:
2374         return has_type && TCG_TARGET_HAS_neg_vec;
2375     case INDEX_op_abs_vec:
2376         return has_type && TCG_TARGET_HAS_abs_vec;
2377     case INDEX_op_andc_vec:
2378         return has_type && TCG_TARGET_HAS_andc_vec;
2379     case INDEX_op_orc_vec:
2380         return has_type && TCG_TARGET_HAS_orc_vec;
2381     case INDEX_op_nand_vec:
2382         return has_type && TCG_TARGET_HAS_nand_vec;
2383     case INDEX_op_nor_vec:
2384         return has_type && TCG_TARGET_HAS_nor_vec;
2385     case INDEX_op_eqv_vec:
2386         return has_type && TCG_TARGET_HAS_eqv_vec;
2387     case INDEX_op_mul_vec:
2388         return has_type && TCG_TARGET_HAS_mul_vec;
2389     case INDEX_op_shli_vec:
2390     case INDEX_op_shri_vec:
2391     case INDEX_op_sari_vec:
2392         return has_type && TCG_TARGET_HAS_shi_vec;
2393     case INDEX_op_shls_vec:
2394     case INDEX_op_shrs_vec:
2395     case INDEX_op_sars_vec:
2396         return has_type && TCG_TARGET_HAS_shs_vec;
2397     case INDEX_op_shlv_vec:
2398     case INDEX_op_shrv_vec:
2399     case INDEX_op_sarv_vec:
2400         return has_type && TCG_TARGET_HAS_shv_vec;
2401     case INDEX_op_rotli_vec:
2402         return has_type && TCG_TARGET_HAS_roti_vec;
2403     case INDEX_op_rotls_vec:
2404         return has_type && TCG_TARGET_HAS_rots_vec;
2405     case INDEX_op_rotlv_vec:
2406     case INDEX_op_rotrv_vec:
2407         return has_type && TCG_TARGET_HAS_rotv_vec;
2408     case INDEX_op_ssadd_vec:
2409     case INDEX_op_usadd_vec:
2410     case INDEX_op_sssub_vec:
2411     case INDEX_op_ussub_vec:
2412         return has_type && TCG_TARGET_HAS_sat_vec;
2413     case INDEX_op_smin_vec:
2414     case INDEX_op_umin_vec:
2415     case INDEX_op_smax_vec:
2416     case INDEX_op_umax_vec:
2417         return has_type && TCG_TARGET_HAS_minmax_vec;
2418     case INDEX_op_bitsel_vec:
2419         return has_type && TCG_TARGET_HAS_bitsel_vec;
2420     case INDEX_op_cmpsel_vec:
2421         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2422 
2423     default:
2424         if (op < INDEX_op_last_generic) {
2425             const TCGOutOp *outop;
2426             TCGConstraintSetIndex con_set;
2427 
2428             if (!has_type) {
2429                 return false;
2430             }
2431 
2432             outop = all_outop[op];
2433             tcg_debug_assert(outop != NULL);
2434 
2435             con_set = outop->static_constraint;
2436             if (con_set == C_Dynamic) {
2437                 con_set = outop->dynamic_constraint(type, flags);
2438             }
2439             if (con_set >= 0) {
2440                 return true;
2441             }
2442             tcg_debug_assert(con_set == C_NotImplemented);
2443             return false;
2444         }
2445         tcg_debug_assert(op < NB_OPS);
2446         return true;
2447 
2448     case INDEX_op_last_generic:
2449         g_assert_not_reached();
2450     }
2451 }
2452 
2453 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2454 {
2455     unsigned width;
2456 
2457     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2458     width = (type == TCG_TYPE_I32 ? 32 : 64);
2459 
2460     tcg_debug_assert(ofs < width);
2461     tcg_debug_assert(len > 0);
2462     tcg_debug_assert(len <= width - ofs);
2463 
2464     return TCG_TARGET_deposit_valid(type, ofs, len);
2465 }
2466 
2467 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2468 
2469 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2470                           TCGTemp *ret, TCGTemp **args)
2471 {
2472     TCGv_i64 extend_free[MAX_CALL_IARGS];
2473     int n_extend = 0;
2474     TCGOp *op;
2475     int i, n, pi = 0, total_args;
2476 
2477     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2478         init_call_layout(info);
2479         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2480     }
2481 
2482     total_args = info->nr_out + info->nr_in + 2;
2483     op = tcg_op_alloc(INDEX_op_call, total_args);
2484 
2485 #ifdef CONFIG_PLUGIN
2486     /* Flag helpers that may affect guest state */
2487     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2488         tcg_ctx->plugin_insn->calls_helpers = true;
2489     }
2490 #endif
2491 
2492     TCGOP_CALLO(op) = n = info->nr_out;
2493     switch (n) {
2494     case 0:
2495         tcg_debug_assert(ret == NULL);
2496         break;
2497     case 1:
2498         tcg_debug_assert(ret != NULL);
2499         op->args[pi++] = temp_arg(ret);
2500         break;
2501     case 2:
2502     case 4:
2503         tcg_debug_assert(ret != NULL);
2504         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2505         tcg_debug_assert(ret->temp_subindex == 0);
2506         for (i = 0; i < n; ++i) {
2507             op->args[pi++] = temp_arg(ret + i);
2508         }
2509         break;
2510     default:
2511         g_assert_not_reached();
2512     }
2513 
2514     TCGOP_CALLI(op) = n = info->nr_in;
2515     for (i = 0; i < n; i++) {
2516         const TCGCallArgumentLoc *loc = &info->in[i];
2517         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2518 
2519         switch (loc->kind) {
2520         case TCG_CALL_ARG_NORMAL:
2521         case TCG_CALL_ARG_BY_REF:
2522         case TCG_CALL_ARG_BY_REF_N:
2523             op->args[pi++] = temp_arg(ts);
2524             break;
2525 
2526         case TCG_CALL_ARG_EXTEND_U:
2527         case TCG_CALL_ARG_EXTEND_S:
2528             {
2529                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2530                 TCGv_i32 orig = temp_tcgv_i32(ts);
2531 
2532                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2533                     tcg_gen_ext_i32_i64(temp, orig);
2534                 } else {
2535                     tcg_gen_extu_i32_i64(temp, orig);
2536                 }
2537                 op->args[pi++] = tcgv_i64_arg(temp);
2538                 extend_free[n_extend++] = temp;
2539             }
2540             break;
2541 
2542         default:
2543             g_assert_not_reached();
2544         }
2545     }
2546     op->args[pi++] = (uintptr_t)func;
2547     op->args[pi++] = (uintptr_t)info;
2548     tcg_debug_assert(pi == total_args);
2549 
2550     if (tcg_ctx->emit_before_op) {
2551         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2552     } else {
2553         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2554     }
2555 
2556     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2557     for (i = 0; i < n_extend; ++i) {
2558         tcg_temp_free_i64(extend_free[i]);
2559     }
2560 }
2561 
2562 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2563 {
2564     tcg_gen_callN(func, info, ret, NULL);
2565 }
2566 
2567 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2568 {
2569     tcg_gen_callN(func, info, ret, &t1);
2570 }
2571 
2572 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2573                    TCGTemp *t1, TCGTemp *t2)
2574 {
2575     TCGTemp *args[2] = { t1, t2 };
2576     tcg_gen_callN(func, info, ret, args);
2577 }
2578 
2579 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2580                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2581 {
2582     TCGTemp *args[3] = { t1, t2, t3 };
2583     tcg_gen_callN(func, info, ret, args);
2584 }
2585 
2586 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2587                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2588 {
2589     TCGTemp *args[4] = { t1, t2, t3, t4 };
2590     tcg_gen_callN(func, info, ret, args);
2591 }
2592 
2593 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2594                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2595 {
2596     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2597     tcg_gen_callN(func, info, ret, args);
2598 }
2599 
2600 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2601                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2602                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2603 {
2604     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2605     tcg_gen_callN(func, info, ret, args);
2606 }
2607 
2608 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2609                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2610                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2611 {
2612     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2613     tcg_gen_callN(func, info, ret, args);
2614 }
2615 
2616 static void tcg_reg_alloc_start(TCGContext *s)
2617 {
2618     int i, n;
2619 
2620     for (i = 0, n = s->nb_temps; i < n; i++) {
2621         TCGTemp *ts = &s->temps[i];
2622         TCGTempVal val = TEMP_VAL_MEM;
2623 
2624         switch (ts->kind) {
2625         case TEMP_CONST:
2626             val = TEMP_VAL_CONST;
2627             break;
2628         case TEMP_FIXED:
2629             val = TEMP_VAL_REG;
2630             break;
2631         case TEMP_GLOBAL:
2632             break;
2633         case TEMP_EBB:
2634             val = TEMP_VAL_DEAD;
2635             /* fall through */
2636         case TEMP_TB:
2637             ts->mem_allocated = 0;
2638             break;
2639         default:
2640             g_assert_not_reached();
2641         }
2642         ts->val_type = val;
2643     }
2644 
2645     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2646 }
2647 
2648 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2649                                  TCGTemp *ts)
2650 {
2651     int idx = temp_idx(ts);
2652 
2653     switch (ts->kind) {
2654     case TEMP_FIXED:
2655     case TEMP_GLOBAL:
2656         pstrcpy(buf, buf_size, ts->name);
2657         break;
2658     case TEMP_TB:
2659         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2660         break;
2661     case TEMP_EBB:
2662         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2663         break;
2664     case TEMP_CONST:
2665         switch (ts->type) {
2666         case TCG_TYPE_I32:
2667             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2668             break;
2669 #if TCG_TARGET_REG_BITS > 32
2670         case TCG_TYPE_I64:
2671             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2672             break;
2673 #endif
2674         case TCG_TYPE_V64:
2675         case TCG_TYPE_V128:
2676         case TCG_TYPE_V256:
2677             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2678                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2679             break;
2680         default:
2681             g_assert_not_reached();
2682         }
2683         break;
2684     }
2685     return buf;
2686 }
2687 
2688 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2689                              int buf_size, TCGArg arg)
2690 {
2691     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2692 }
2693 
2694 static const char * const cond_name[] =
2695 {
2696     [TCG_COND_NEVER] = "never",
2697     [TCG_COND_ALWAYS] = "always",
2698     [TCG_COND_EQ] = "eq",
2699     [TCG_COND_NE] = "ne",
2700     [TCG_COND_LT] = "lt",
2701     [TCG_COND_GE] = "ge",
2702     [TCG_COND_LE] = "le",
2703     [TCG_COND_GT] = "gt",
2704     [TCG_COND_LTU] = "ltu",
2705     [TCG_COND_GEU] = "geu",
2706     [TCG_COND_LEU] = "leu",
2707     [TCG_COND_GTU] = "gtu",
2708     [TCG_COND_TSTEQ] = "tsteq",
2709     [TCG_COND_TSTNE] = "tstne",
2710 };
2711 
2712 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2713 {
2714     [MO_UB]   = "ub",
2715     [MO_SB]   = "sb",
2716     [MO_LEUW] = "leuw",
2717     [MO_LESW] = "lesw",
2718     [MO_LEUL] = "leul",
2719     [MO_LESL] = "lesl",
2720     [MO_LEUQ] = "leq",
2721     [MO_BEUW] = "beuw",
2722     [MO_BESW] = "besw",
2723     [MO_BEUL] = "beul",
2724     [MO_BESL] = "besl",
2725     [MO_BEUQ] = "beq",
2726     [MO_128 + MO_BE] = "beo",
2727     [MO_128 + MO_LE] = "leo",
2728 };
2729 
2730 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2731     [MO_UNALN >> MO_ASHIFT]    = "un+",
2732     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2733     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2734     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2735     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2736     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2737     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2738     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2739 };
2740 
2741 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2742     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2743     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2744     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2745     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2746     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2747     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2748 };
2749 
2750 static const char bswap_flag_name[][6] = {
2751     [TCG_BSWAP_IZ] = "iz",
2752     [TCG_BSWAP_OZ] = "oz",
2753     [TCG_BSWAP_OS] = "os",
2754     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2755     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2756 };
2757 
2758 #ifdef CONFIG_PLUGIN
2759 static const char * const plugin_from_name[] = {
2760     "from-tb",
2761     "from-insn",
2762     "after-insn",
2763     "after-tb",
2764 };
2765 #endif
2766 
2767 static inline bool tcg_regset_single(TCGRegSet d)
2768 {
2769     return (d & (d - 1)) == 0;
2770 }
2771 
2772 static inline TCGReg tcg_regset_first(TCGRegSet d)
2773 {
2774     if (TCG_TARGET_NB_REGS <= 32) {
2775         return ctz32(d);
2776     } else {
2777         return ctz64(d);
2778     }
2779 }
2780 
2781 /* Return only the number of characters output -- no error return. */
2782 #define ne_fprintf(...) \
2783     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2784 
2785 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2786 {
2787     char buf[128];
2788     TCGOp *op;
2789 
2790     QTAILQ_FOREACH(op, &s->ops, link) {
2791         int i, k, nb_oargs, nb_iargs, nb_cargs;
2792         const TCGOpDef *def;
2793         TCGOpcode c;
2794         int col = 0;
2795 
2796         c = op->opc;
2797         def = &tcg_op_defs[c];
2798 
2799         if (c == INDEX_op_insn_start) {
2800             nb_oargs = 0;
2801             col += ne_fprintf(f, "\n ----");
2802 
2803             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2804                 col += ne_fprintf(f, " %016" PRIx64,
2805                                   tcg_get_insn_start_param(op, i));
2806             }
2807         } else if (c == INDEX_op_call) {
2808             const TCGHelperInfo *info = tcg_call_info(op);
2809             void *func = tcg_call_func(op);
2810 
2811             /* variable number of arguments */
2812             nb_oargs = TCGOP_CALLO(op);
2813             nb_iargs = TCGOP_CALLI(op);
2814             nb_cargs = def->nb_cargs;
2815 
2816             col += ne_fprintf(f, " %s ", def->name);
2817 
2818             /*
2819              * Print the function name from TCGHelperInfo, if available.
2820              * Note that plugins have a template function for the info,
2821              * but the actual function pointer comes from the plugin.
2822              */
2823             if (func == info->func) {
2824                 col += ne_fprintf(f, "%s", info->name);
2825             } else {
2826                 col += ne_fprintf(f, "plugin(%p)", func);
2827             }
2828 
2829             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2830             for (i = 0; i < nb_oargs; i++) {
2831                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2832                                                             op->args[i]));
2833             }
2834             for (i = 0; i < nb_iargs; i++) {
2835                 TCGArg arg = op->args[nb_oargs + i];
2836                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2837                 col += ne_fprintf(f, ",%s", t);
2838             }
2839         } else {
2840             if (def->flags & TCG_OPF_INT) {
2841                 col += ne_fprintf(f, " %s_i%d ",
2842                                   def->name,
2843                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2844             } else if (def->flags & TCG_OPF_VECTOR) {
2845                 col += ne_fprintf(f, "%s v%d,e%d,",
2846                                   def->name,
2847                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2848                                   8 << TCGOP_VECE(op));
2849             } else {
2850                 col += ne_fprintf(f, " %s ", def->name);
2851             }
2852 
2853             nb_oargs = def->nb_oargs;
2854             nb_iargs = def->nb_iargs;
2855             nb_cargs = def->nb_cargs;
2856 
2857             k = 0;
2858             for (i = 0; i < nb_oargs; i++) {
2859                 const char *sep =  k ? "," : "";
2860                 col += ne_fprintf(f, "%s%s", sep,
2861                                   tcg_get_arg_str(s, buf, sizeof(buf),
2862                                                   op->args[k++]));
2863             }
2864             for (i = 0; i < nb_iargs; i++) {
2865                 const char *sep =  k ? "," : "";
2866                 col += ne_fprintf(f, "%s%s", sep,
2867                                   tcg_get_arg_str(s, buf, sizeof(buf),
2868                                                   op->args[k++]));
2869             }
2870             switch (c) {
2871             case INDEX_op_brcond_i32:
2872             case INDEX_op_setcond_i32:
2873             case INDEX_op_negsetcond_i32:
2874             case INDEX_op_movcond_i32:
2875             case INDEX_op_brcond2_i32:
2876             case INDEX_op_setcond2_i32:
2877             case INDEX_op_brcond_i64:
2878             case INDEX_op_setcond_i64:
2879             case INDEX_op_negsetcond_i64:
2880             case INDEX_op_movcond_i64:
2881             case INDEX_op_cmp_vec:
2882             case INDEX_op_cmpsel_vec:
2883                 if (op->args[k] < ARRAY_SIZE(cond_name)
2884                     && cond_name[op->args[k]]) {
2885                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2886                 } else {
2887                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2888                 }
2889                 i = 1;
2890                 break;
2891             case INDEX_op_qemu_ld_i32:
2892             case INDEX_op_qemu_st_i32:
2893             case INDEX_op_qemu_st8_i32:
2894             case INDEX_op_qemu_ld_i64:
2895             case INDEX_op_qemu_st_i64:
2896             case INDEX_op_qemu_ld_i128:
2897             case INDEX_op_qemu_st_i128:
2898                 {
2899                     const char *s_al, *s_op, *s_at;
2900                     MemOpIdx oi = op->args[k++];
2901                     MemOp mop = get_memop(oi);
2902                     unsigned ix = get_mmuidx(oi);
2903 
2904                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2905                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2906                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2907                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2908 
2909                     /* If all fields are accounted for, print symbolically. */
2910                     if (!mop && s_al && s_op && s_at) {
2911                         col += ne_fprintf(f, ",%s%s%s,%u",
2912                                           s_at, s_al, s_op, ix);
2913                     } else {
2914                         mop = get_memop(oi);
2915                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2916                     }
2917                     i = 1;
2918                 }
2919                 break;
2920             case INDEX_op_bswap16_i32:
2921             case INDEX_op_bswap16_i64:
2922             case INDEX_op_bswap32_i32:
2923             case INDEX_op_bswap32_i64:
2924             case INDEX_op_bswap64_i64:
2925                 {
2926                     TCGArg flags = op->args[k];
2927                     const char *name = NULL;
2928 
2929                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2930                         name = bswap_flag_name[flags];
2931                     }
2932                     if (name) {
2933                         col += ne_fprintf(f, ",%s", name);
2934                     } else {
2935                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2936                     }
2937                     i = k = 1;
2938                 }
2939                 break;
2940 #ifdef CONFIG_PLUGIN
2941             case INDEX_op_plugin_cb:
2942                 {
2943                     TCGArg from = op->args[k++];
2944                     const char *name = NULL;
2945 
2946                     if (from < ARRAY_SIZE(plugin_from_name)) {
2947                         name = plugin_from_name[from];
2948                     }
2949                     if (name) {
2950                         col += ne_fprintf(f, "%s", name);
2951                     } else {
2952                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2953                     }
2954                     i = 1;
2955                 }
2956                 break;
2957 #endif
2958             default:
2959                 i = 0;
2960                 break;
2961             }
2962             switch (c) {
2963             case INDEX_op_set_label:
2964             case INDEX_op_br:
2965             case INDEX_op_brcond_i32:
2966             case INDEX_op_brcond_i64:
2967             case INDEX_op_brcond2_i32:
2968                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2969                                   arg_label(op->args[k])->id);
2970                 i++, k++;
2971                 break;
2972             case INDEX_op_mb:
2973                 {
2974                     TCGBar membar = op->args[k];
2975                     const char *b_op, *m_op;
2976 
2977                     switch (membar & TCG_BAR_SC) {
2978                     case 0:
2979                         b_op = "none";
2980                         break;
2981                     case TCG_BAR_LDAQ:
2982                         b_op = "acq";
2983                         break;
2984                     case TCG_BAR_STRL:
2985                         b_op = "rel";
2986                         break;
2987                     case TCG_BAR_SC:
2988                         b_op = "seq";
2989                         break;
2990                     default:
2991                         g_assert_not_reached();
2992                     }
2993 
2994                     switch (membar & TCG_MO_ALL) {
2995                     case 0:
2996                         m_op = "none";
2997                         break;
2998                     case TCG_MO_LD_LD:
2999                         m_op = "rr";
3000                         break;
3001                     case TCG_MO_LD_ST:
3002                         m_op = "rw";
3003                         break;
3004                     case TCG_MO_ST_LD:
3005                         m_op = "wr";
3006                         break;
3007                     case TCG_MO_ST_ST:
3008                         m_op = "ww";
3009                         break;
3010                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3011                         m_op = "rr+rw";
3012                         break;
3013                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3014                         m_op = "rr+wr";
3015                         break;
3016                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3017                         m_op = "rr+ww";
3018                         break;
3019                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3020                         m_op = "rw+wr";
3021                         break;
3022                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3023                         m_op = "rw+ww";
3024                         break;
3025                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3026                         m_op = "wr+ww";
3027                         break;
3028                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3029                         m_op = "rr+rw+wr";
3030                         break;
3031                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3032                         m_op = "rr+rw+ww";
3033                         break;
3034                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3035                         m_op = "rr+wr+ww";
3036                         break;
3037                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3038                         m_op = "rw+wr+ww";
3039                         break;
3040                     case TCG_MO_ALL:
3041                         m_op = "all";
3042                         break;
3043                     default:
3044                         g_assert_not_reached();
3045                     }
3046 
3047                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3048                     i++, k++;
3049                 }
3050                 break;
3051             default:
3052                 break;
3053             }
3054             for (; i < nb_cargs; i++, k++) {
3055                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3056                                   op->args[k]);
3057             }
3058         }
3059 
3060         if (have_prefs || op->life) {
3061             for (; col < 40; ++col) {
3062                 putc(' ', f);
3063             }
3064         }
3065 
3066         if (op->life) {
3067             unsigned life = op->life;
3068 
3069             if (life & (SYNC_ARG * 3)) {
3070                 ne_fprintf(f, "  sync:");
3071                 for (i = 0; i < 2; ++i) {
3072                     if (life & (SYNC_ARG << i)) {
3073                         ne_fprintf(f, " %d", i);
3074                     }
3075                 }
3076             }
3077             life /= DEAD_ARG;
3078             if (life) {
3079                 ne_fprintf(f, "  dead:");
3080                 for (i = 0; life; ++i, life >>= 1) {
3081                     if (life & 1) {
3082                         ne_fprintf(f, " %d", i);
3083                     }
3084                 }
3085             }
3086         }
3087 
3088         if (have_prefs) {
3089             for (i = 0; i < nb_oargs; ++i) {
3090                 TCGRegSet set = output_pref(op, i);
3091 
3092                 if (i == 0) {
3093                     ne_fprintf(f, "  pref=");
3094                 } else {
3095                     ne_fprintf(f, ",");
3096                 }
3097                 if (set == 0) {
3098                     ne_fprintf(f, "none");
3099                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3100                     ne_fprintf(f, "all");
3101 #ifdef CONFIG_DEBUG_TCG
3102                 } else if (tcg_regset_single(set)) {
3103                     TCGReg reg = tcg_regset_first(set);
3104                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3105 #endif
3106                 } else if (TCG_TARGET_NB_REGS <= 32) {
3107                     ne_fprintf(f, "0x%x", (uint32_t)set);
3108                 } else {
3109                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3110                 }
3111             }
3112         }
3113 
3114         putc('\n', f);
3115     }
3116 }
3117 
3118 /* we give more priority to constraints with less registers */
3119 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3120 {
3121     int n;
3122 
3123     arg_ct += k;
3124     n = ctpop64(arg_ct->regs);
3125 
3126     /*
3127      * Sort constraints of a single register first, which includes output
3128      * aliases (which must exactly match the input already allocated).
3129      */
3130     if (n == 1 || arg_ct->oalias) {
3131         return INT_MAX;
3132     }
3133 
3134     /*
3135      * Sort register pairs next, first then second immediately after.
3136      * Arbitrarily sort multiple pairs by the index of the first reg;
3137      * there shouldn't be many pairs.
3138      */
3139     switch (arg_ct->pair) {
3140     case 1:
3141     case 3:
3142         return (k + 1) * 2;
3143     case 2:
3144         return (arg_ct->pair_index + 1) * 2 - 1;
3145     }
3146 
3147     /* Finally, sort by decreasing register count. */
3148     assert(n > 1);
3149     return -n;
3150 }
3151 
3152 /* sort from highest priority to lowest */
3153 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3154 {
3155     int i, j;
3156 
3157     for (i = 0; i < n; i++) {
3158         a[start + i].sort_index = start + i;
3159     }
3160     if (n <= 1) {
3161         return;
3162     }
3163     for (i = 0; i < n - 1; i++) {
3164         for (j = i + 1; j < n; j++) {
3165             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3166             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3167             if (p1 < p2) {
3168                 int tmp = a[start + i].sort_index;
3169                 a[start + i].sort_index = a[start + j].sort_index;
3170                 a[start + j].sort_index = tmp;
3171             }
3172         }
3173     }
3174 }
3175 
3176 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3177 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3178 
3179 static void process_constraint_sets(void)
3180 {
3181     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3182         const TCGConstraintSet *tdefs = &constraint_sets[c];
3183         TCGArgConstraint *args_ct = all_cts[c];
3184         int nb_oargs = tdefs->nb_oargs;
3185         int nb_iargs = tdefs->nb_iargs;
3186         int nb_args = nb_oargs + nb_iargs;
3187         bool saw_alias_pair = false;
3188 
3189         for (int i = 0; i < nb_args; i++) {
3190             const char *ct_str = tdefs->args_ct_str[i];
3191             bool input_p = i >= nb_oargs;
3192             int o;
3193 
3194             switch (*ct_str) {
3195             case '0' ... '9':
3196                 o = *ct_str - '0';
3197                 tcg_debug_assert(input_p);
3198                 tcg_debug_assert(o < nb_oargs);
3199                 tcg_debug_assert(args_ct[o].regs != 0);
3200                 tcg_debug_assert(!args_ct[o].oalias);
3201                 args_ct[i] = args_ct[o];
3202                 /* The output sets oalias.  */
3203                 args_ct[o].oalias = 1;
3204                 args_ct[o].alias_index = i;
3205                 /* The input sets ialias. */
3206                 args_ct[i].ialias = 1;
3207                 args_ct[i].alias_index = o;
3208                 if (args_ct[i].pair) {
3209                     saw_alias_pair = true;
3210                 }
3211                 tcg_debug_assert(ct_str[1] == '\0');
3212                 continue;
3213 
3214             case '&':
3215                 tcg_debug_assert(!input_p);
3216                 args_ct[i].newreg = true;
3217                 ct_str++;
3218                 break;
3219 
3220             case 'p': /* plus */
3221                 /* Allocate to the register after the previous. */
3222                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3223                 o = i - 1;
3224                 tcg_debug_assert(!args_ct[o].pair);
3225                 tcg_debug_assert(!args_ct[o].ct);
3226                 args_ct[i] = (TCGArgConstraint){
3227                     .pair = 2,
3228                     .pair_index = o,
3229                     .regs = args_ct[o].regs << 1,
3230                     .newreg = args_ct[o].newreg,
3231                 };
3232                 args_ct[o].pair = 1;
3233                 args_ct[o].pair_index = i;
3234                 tcg_debug_assert(ct_str[1] == '\0');
3235                 continue;
3236 
3237             case 'm': /* minus */
3238                 /* Allocate to the register before the previous. */
3239                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3240                 o = i - 1;
3241                 tcg_debug_assert(!args_ct[o].pair);
3242                 tcg_debug_assert(!args_ct[o].ct);
3243                 args_ct[i] = (TCGArgConstraint){
3244                     .pair = 1,
3245                     .pair_index = o,
3246                     .regs = args_ct[o].regs >> 1,
3247                     .newreg = args_ct[o].newreg,
3248                 };
3249                 args_ct[o].pair = 2;
3250                 args_ct[o].pair_index = i;
3251                 tcg_debug_assert(ct_str[1] == '\0');
3252                 continue;
3253             }
3254 
3255             do {
3256                 switch (*ct_str) {
3257                 case 'i':
3258                     args_ct[i].ct |= TCG_CT_CONST;
3259                     break;
3260 #ifdef TCG_REG_ZERO
3261                 case 'z':
3262                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3263                     break;
3264 #endif
3265 
3266                 /* Include all of the target-specific constraints. */
3267 
3268 #undef CONST
3269 #define CONST(CASE, MASK) \
3270     case CASE: args_ct[i].ct |= MASK; break;
3271 #define REGS(CASE, MASK) \
3272     case CASE: args_ct[i].regs |= MASK; break;
3273 
3274 #include "tcg-target-con-str.h"
3275 
3276 #undef REGS
3277 #undef CONST
3278                 default:
3279                 case '0' ... '9':
3280                 case '&':
3281                 case 'p':
3282                 case 'm':
3283                     /* Typo in TCGConstraintSet constraint. */
3284                     g_assert_not_reached();
3285                 }
3286             } while (*++ct_str != '\0');
3287         }
3288 
3289         /*
3290          * Fix up output pairs that are aliased with inputs.
3291          * When we created the alias, we copied pair from the output.
3292          * There are three cases:
3293          *    (1a) Pairs of inputs alias pairs of outputs.
3294          *    (1b) One input aliases the first of a pair of outputs.
3295          *    (2)  One input aliases the second of a pair of outputs.
3296          *
3297          * Case 1a is handled by making sure that the pair_index'es are
3298          * properly updated so that they appear the same as a pair of inputs.
3299          *
3300          * Case 1b is handled by setting the pair_index of the input to
3301          * itself, simply so it doesn't point to an unrelated argument.
3302          * Since we don't encounter the "second" during the input allocation
3303          * phase, nothing happens with the second half of the input pair.
3304          *
3305          * Case 2 is handled by setting the second input to pair=3, the
3306          * first output to pair=3, and the pair_index'es to match.
3307          */
3308         if (saw_alias_pair) {
3309             for (int i = nb_oargs; i < nb_args; i++) {
3310                 int o, o2, i2;
3311 
3312                 /*
3313                  * Since [0-9pm] must be alone in the constraint string,
3314                  * the only way they can both be set is if the pair comes
3315                  * from the output alias.
3316                  */
3317                 if (!args_ct[i].ialias) {
3318                     continue;
3319                 }
3320                 switch (args_ct[i].pair) {
3321                 case 0:
3322                     break;
3323                 case 1:
3324                     o = args_ct[i].alias_index;
3325                     o2 = args_ct[o].pair_index;
3326                     tcg_debug_assert(args_ct[o].pair == 1);
3327                     tcg_debug_assert(args_ct[o2].pair == 2);
3328                     if (args_ct[o2].oalias) {
3329                         /* Case 1a */
3330                         i2 = args_ct[o2].alias_index;
3331                         tcg_debug_assert(args_ct[i2].pair == 2);
3332                         args_ct[i2].pair_index = i;
3333                         args_ct[i].pair_index = i2;
3334                     } else {
3335                         /* Case 1b */
3336                         args_ct[i].pair_index = i;
3337                     }
3338                     break;
3339                 case 2:
3340                     o = args_ct[i].alias_index;
3341                     o2 = args_ct[o].pair_index;
3342                     tcg_debug_assert(args_ct[o].pair == 2);
3343                     tcg_debug_assert(args_ct[o2].pair == 1);
3344                     if (args_ct[o2].oalias) {
3345                         /* Case 1a */
3346                         i2 = args_ct[o2].alias_index;
3347                         tcg_debug_assert(args_ct[i2].pair == 1);
3348                         args_ct[i2].pair_index = i;
3349                         args_ct[i].pair_index = i2;
3350                     } else {
3351                         /* Case 2 */
3352                         args_ct[i].pair = 3;
3353                         args_ct[o2].pair = 3;
3354                         args_ct[i].pair_index = o2;
3355                         args_ct[o2].pair_index = i;
3356                     }
3357                     break;
3358                 default:
3359                     g_assert_not_reached();
3360                 }
3361             }
3362         }
3363 
3364         /* sort the constraints (XXX: this is just an heuristic) */
3365         sort_constraints(args_ct, 0, nb_oargs);
3366         sort_constraints(args_ct, nb_oargs, nb_iargs);
3367     }
3368 }
3369 
3370 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3371 {
3372     TCGOpcode opc = op->opc;
3373     TCGType type = TCGOP_TYPE(op);
3374     unsigned flags = TCGOP_FLAGS(op);
3375     const TCGOpDef *def = &tcg_op_defs[opc];
3376     const TCGOutOp *outop = all_outop[opc];
3377     TCGConstraintSetIndex con_set;
3378 
3379     if (def->flags & TCG_OPF_NOT_PRESENT) {
3380         return empty_cts;
3381     }
3382 
3383     if (outop) {
3384         con_set = outop->static_constraint;
3385         if (con_set == C_Dynamic) {
3386             con_set = outop->dynamic_constraint(type, flags);
3387         }
3388     } else {
3389         con_set = tcg_target_op_def(opc, type, flags);
3390     }
3391     tcg_debug_assert(con_set >= 0);
3392     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3393 
3394     /* The constraint arguments must match TCGOpcode arguments. */
3395     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3396     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3397 
3398     return all_cts[con_set];
3399 }
3400 
3401 static void remove_label_use(TCGOp *op, int idx)
3402 {
3403     TCGLabel *label = arg_label(op->args[idx]);
3404     TCGLabelUse *use;
3405 
3406     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3407         if (use->op == op) {
3408             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3409             return;
3410         }
3411     }
3412     g_assert_not_reached();
3413 }
3414 
3415 void tcg_op_remove(TCGContext *s, TCGOp *op)
3416 {
3417     switch (op->opc) {
3418     case INDEX_op_br:
3419         remove_label_use(op, 0);
3420         break;
3421     case INDEX_op_brcond_i32:
3422     case INDEX_op_brcond_i64:
3423         remove_label_use(op, 3);
3424         break;
3425     case INDEX_op_brcond2_i32:
3426         remove_label_use(op, 5);
3427         break;
3428     default:
3429         break;
3430     }
3431 
3432     QTAILQ_REMOVE(&s->ops, op, link);
3433     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3434     s->nb_ops--;
3435 }
3436 
3437 void tcg_remove_ops_after(TCGOp *op)
3438 {
3439     TCGContext *s = tcg_ctx;
3440 
3441     while (true) {
3442         TCGOp *last = tcg_last_op();
3443         if (last == op) {
3444             return;
3445         }
3446         tcg_op_remove(s, last);
3447     }
3448 }
3449 
3450 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3451 {
3452     TCGContext *s = tcg_ctx;
3453     TCGOp *op = NULL;
3454 
3455     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3456         QTAILQ_FOREACH(op, &s->free_ops, link) {
3457             if (nargs <= op->nargs) {
3458                 QTAILQ_REMOVE(&s->free_ops, op, link);
3459                 nargs = op->nargs;
3460                 goto found;
3461             }
3462         }
3463     }
3464 
3465     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3466     nargs = MAX(4, nargs);
3467     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3468 
3469  found:
3470     memset(op, 0, offsetof(TCGOp, link));
3471     op->opc = opc;
3472     op->nargs = nargs;
3473 
3474     /* Check for bitfield overflow. */
3475     tcg_debug_assert(op->nargs == nargs);
3476 
3477     s->nb_ops++;
3478     return op;
3479 }
3480 
3481 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3482 {
3483     TCGOp *op = tcg_op_alloc(opc, nargs);
3484 
3485     if (tcg_ctx->emit_before_op) {
3486         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3487     } else {
3488         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3489     }
3490     return op;
3491 }
3492 
3493 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3494                             TCGOpcode opc, TCGType type, unsigned nargs)
3495 {
3496     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3497 
3498     TCGOP_TYPE(new_op) = type;
3499     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3500     return new_op;
3501 }
3502 
3503 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3504                            TCGOpcode opc, TCGType type, unsigned nargs)
3505 {
3506     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3507 
3508     TCGOP_TYPE(new_op) = type;
3509     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3510     return new_op;
3511 }
3512 
3513 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3514 {
3515     TCGLabelUse *u;
3516 
3517     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3518         TCGOp *op = u->op;
3519         switch (op->opc) {
3520         case INDEX_op_br:
3521             op->args[0] = label_arg(to);
3522             break;
3523         case INDEX_op_brcond_i32:
3524         case INDEX_op_brcond_i64:
3525             op->args[3] = label_arg(to);
3526             break;
3527         case INDEX_op_brcond2_i32:
3528             op->args[5] = label_arg(to);
3529             break;
3530         default:
3531             g_assert_not_reached();
3532         }
3533     }
3534 
3535     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3536 }
3537 
3538 /* Reachable analysis : remove unreachable code.  */
3539 static void __attribute__((noinline))
3540 reachable_code_pass(TCGContext *s)
3541 {
3542     TCGOp *op, *op_next, *op_prev;
3543     bool dead = false;
3544 
3545     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3546         bool remove = dead;
3547         TCGLabel *label;
3548 
3549         switch (op->opc) {
3550         case INDEX_op_set_label:
3551             label = arg_label(op->args[0]);
3552 
3553             /*
3554              * Note that the first op in the TB is always a load,
3555              * so there is always something before a label.
3556              */
3557             op_prev = QTAILQ_PREV(op, link);
3558 
3559             /*
3560              * If we find two sequential labels, move all branches to
3561              * reference the second label and remove the first label.
3562              * Do this before branch to next optimization, so that the
3563              * middle label is out of the way.
3564              */
3565             if (op_prev->opc == INDEX_op_set_label) {
3566                 move_label_uses(label, arg_label(op_prev->args[0]));
3567                 tcg_op_remove(s, op_prev);
3568                 op_prev = QTAILQ_PREV(op, link);
3569             }
3570 
3571             /*
3572              * Optimization can fold conditional branches to unconditional.
3573              * If we find a label which is preceded by an unconditional
3574              * branch to next, remove the branch.  We couldn't do this when
3575              * processing the branch because any dead code between the branch
3576              * and label had not yet been removed.
3577              */
3578             if (op_prev->opc == INDEX_op_br &&
3579                 label == arg_label(op_prev->args[0])) {
3580                 tcg_op_remove(s, op_prev);
3581                 /* Fall through means insns become live again.  */
3582                 dead = false;
3583             }
3584 
3585             if (QSIMPLEQ_EMPTY(&label->branches)) {
3586                 /*
3587                  * While there is an occasional backward branch, virtually
3588                  * all branches generated by the translators are forward.
3589                  * Which means that generally we will have already removed
3590                  * all references to the label that will be, and there is
3591                  * little to be gained by iterating.
3592                  */
3593                 remove = true;
3594             } else {
3595                 /* Once we see a label, insns become live again.  */
3596                 dead = false;
3597                 remove = false;
3598             }
3599             break;
3600 
3601         case INDEX_op_br:
3602         case INDEX_op_exit_tb:
3603         case INDEX_op_goto_ptr:
3604             /* Unconditional branches; everything following is dead.  */
3605             dead = true;
3606             break;
3607 
3608         case INDEX_op_call:
3609             /* Notice noreturn helper calls, raising exceptions.  */
3610             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3611                 dead = true;
3612             }
3613             break;
3614 
3615         case INDEX_op_insn_start:
3616             /* Never remove -- we need to keep these for unwind.  */
3617             remove = false;
3618             break;
3619 
3620         default:
3621             break;
3622         }
3623 
3624         if (remove) {
3625             tcg_op_remove(s, op);
3626         }
3627     }
3628 }
3629 
3630 #define TS_DEAD  1
3631 #define TS_MEM   2
3632 
3633 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3634 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3635 
3636 /* For liveness_pass_1, the register preferences for a given temp.  */
3637 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3638 {
3639     return ts->state_ptr;
3640 }
3641 
3642 /* For liveness_pass_1, reset the preferences for a given temp to the
3643  * maximal regset for its type.
3644  */
3645 static inline void la_reset_pref(TCGTemp *ts)
3646 {
3647     *la_temp_pref(ts)
3648         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3649 }
3650 
3651 /* liveness analysis: end of function: all temps are dead, and globals
3652    should be in memory. */
3653 static void la_func_end(TCGContext *s, int ng, int nt)
3654 {
3655     int i;
3656 
3657     for (i = 0; i < ng; ++i) {
3658         s->temps[i].state = TS_DEAD | TS_MEM;
3659         la_reset_pref(&s->temps[i]);
3660     }
3661     for (i = ng; i < nt; ++i) {
3662         s->temps[i].state = TS_DEAD;
3663         la_reset_pref(&s->temps[i]);
3664     }
3665 }
3666 
3667 /* liveness analysis: end of basic block: all temps are dead, globals
3668    and local temps should be in memory. */
3669 static void la_bb_end(TCGContext *s, int ng, int nt)
3670 {
3671     int i;
3672 
3673     for (i = 0; i < nt; ++i) {
3674         TCGTemp *ts = &s->temps[i];
3675         int state;
3676 
3677         switch (ts->kind) {
3678         case TEMP_FIXED:
3679         case TEMP_GLOBAL:
3680         case TEMP_TB:
3681             state = TS_DEAD | TS_MEM;
3682             break;
3683         case TEMP_EBB:
3684         case TEMP_CONST:
3685             state = TS_DEAD;
3686             break;
3687         default:
3688             g_assert_not_reached();
3689         }
3690         ts->state = state;
3691         la_reset_pref(ts);
3692     }
3693 }
3694 
3695 /* liveness analysis: sync globals back to memory.  */
3696 static void la_global_sync(TCGContext *s, int ng)
3697 {
3698     int i;
3699 
3700     for (i = 0; i < ng; ++i) {
3701         int state = s->temps[i].state;
3702         s->temps[i].state = state | TS_MEM;
3703         if (state == TS_DEAD) {
3704             /* If the global was previously dead, reset prefs.  */
3705             la_reset_pref(&s->temps[i]);
3706         }
3707     }
3708 }
3709 
3710 /*
3711  * liveness analysis: conditional branch: all temps are dead unless
3712  * explicitly live-across-conditional-branch, globals and local temps
3713  * should be synced.
3714  */
3715 static void la_bb_sync(TCGContext *s, int ng, int nt)
3716 {
3717     la_global_sync(s, ng);
3718 
3719     for (int i = ng; i < nt; ++i) {
3720         TCGTemp *ts = &s->temps[i];
3721         int state;
3722 
3723         switch (ts->kind) {
3724         case TEMP_TB:
3725             state = ts->state;
3726             ts->state = state | TS_MEM;
3727             if (state != TS_DEAD) {
3728                 continue;
3729             }
3730             break;
3731         case TEMP_EBB:
3732         case TEMP_CONST:
3733             continue;
3734         default:
3735             g_assert_not_reached();
3736         }
3737         la_reset_pref(&s->temps[i]);
3738     }
3739 }
3740 
3741 /* liveness analysis: sync globals back to memory and kill.  */
3742 static void la_global_kill(TCGContext *s, int ng)
3743 {
3744     int i;
3745 
3746     for (i = 0; i < ng; i++) {
3747         s->temps[i].state = TS_DEAD | TS_MEM;
3748         la_reset_pref(&s->temps[i]);
3749     }
3750 }
3751 
3752 /* liveness analysis: note live globals crossing calls.  */
3753 static void la_cross_call(TCGContext *s, int nt)
3754 {
3755     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3756     int i;
3757 
3758     for (i = 0; i < nt; i++) {
3759         TCGTemp *ts = &s->temps[i];
3760         if (!(ts->state & TS_DEAD)) {
3761             TCGRegSet *pset = la_temp_pref(ts);
3762             TCGRegSet set = *pset;
3763 
3764             set &= mask;
3765             /* If the combination is not possible, restart.  */
3766             if (set == 0) {
3767                 set = tcg_target_available_regs[ts->type] & mask;
3768             }
3769             *pset = set;
3770         }
3771     }
3772 }
3773 
3774 /*
3775  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3776  * to TEMP_EBB, if possible.
3777  */
3778 static void __attribute__((noinline))
3779 liveness_pass_0(TCGContext *s)
3780 {
3781     void * const multiple_ebb = (void *)(uintptr_t)-1;
3782     int nb_temps = s->nb_temps;
3783     TCGOp *op, *ebb;
3784 
3785     for (int i = s->nb_globals; i < nb_temps; ++i) {
3786         s->temps[i].state_ptr = NULL;
3787     }
3788 
3789     /*
3790      * Represent each EBB by the op at which it begins.  In the case of
3791      * the first EBB, this is the first op, otherwise it is a label.
3792      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3793      * within a single EBB, else MULTIPLE_EBB.
3794      */
3795     ebb = QTAILQ_FIRST(&s->ops);
3796     QTAILQ_FOREACH(op, &s->ops, link) {
3797         const TCGOpDef *def;
3798         int nb_oargs, nb_iargs;
3799 
3800         switch (op->opc) {
3801         case INDEX_op_set_label:
3802             ebb = op;
3803             continue;
3804         case INDEX_op_discard:
3805             continue;
3806         case INDEX_op_call:
3807             nb_oargs = TCGOP_CALLO(op);
3808             nb_iargs = TCGOP_CALLI(op);
3809             break;
3810         default:
3811             def = &tcg_op_defs[op->opc];
3812             nb_oargs = def->nb_oargs;
3813             nb_iargs = def->nb_iargs;
3814             break;
3815         }
3816 
3817         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3818             TCGTemp *ts = arg_temp(op->args[i]);
3819 
3820             if (ts->kind != TEMP_TB) {
3821                 continue;
3822             }
3823             if (ts->state_ptr == NULL) {
3824                 ts->state_ptr = ebb;
3825             } else if (ts->state_ptr != ebb) {
3826                 ts->state_ptr = multiple_ebb;
3827             }
3828         }
3829     }
3830 
3831     /*
3832      * For TEMP_TB that turned out not to be used beyond one EBB,
3833      * reduce the liveness to TEMP_EBB.
3834      */
3835     for (int i = s->nb_globals; i < nb_temps; ++i) {
3836         TCGTemp *ts = &s->temps[i];
3837         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3838             ts->kind = TEMP_EBB;
3839         }
3840     }
3841 }
3842 
3843 /* Liveness analysis : update the opc_arg_life array to tell if a
3844    given input arguments is dead. Instructions updating dead
3845    temporaries are removed. */
3846 static void __attribute__((noinline))
3847 liveness_pass_1(TCGContext *s)
3848 {
3849     int nb_globals = s->nb_globals;
3850     int nb_temps = s->nb_temps;
3851     TCGOp *op, *op_prev;
3852     TCGRegSet *prefs;
3853     int i;
3854 
3855     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3856     for (i = 0; i < nb_temps; ++i) {
3857         s->temps[i].state_ptr = prefs + i;
3858     }
3859 
3860     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3861     la_func_end(s, nb_globals, nb_temps);
3862 
3863     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3864         int nb_iargs, nb_oargs;
3865         TCGOpcode opc_new, opc_new2;
3866         TCGLifeData arg_life = 0;
3867         TCGTemp *ts;
3868         TCGOpcode opc = op->opc;
3869         const TCGOpDef *def = &tcg_op_defs[opc];
3870         const TCGArgConstraint *args_ct;
3871 
3872         switch (opc) {
3873         case INDEX_op_call:
3874             {
3875                 const TCGHelperInfo *info = tcg_call_info(op);
3876                 int call_flags = tcg_call_flags(op);
3877 
3878                 nb_oargs = TCGOP_CALLO(op);
3879                 nb_iargs = TCGOP_CALLI(op);
3880 
3881                 /* pure functions can be removed if their result is unused */
3882                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3883                     for (i = 0; i < nb_oargs; i++) {
3884                         ts = arg_temp(op->args[i]);
3885                         if (ts->state != TS_DEAD) {
3886                             goto do_not_remove_call;
3887                         }
3888                     }
3889                     goto do_remove;
3890                 }
3891             do_not_remove_call:
3892 
3893                 /* Output args are dead.  */
3894                 for (i = 0; i < nb_oargs; i++) {
3895                     ts = arg_temp(op->args[i]);
3896                     if (ts->state & TS_DEAD) {
3897                         arg_life |= DEAD_ARG << i;
3898                     }
3899                     if (ts->state & TS_MEM) {
3900                         arg_life |= SYNC_ARG << i;
3901                     }
3902                     ts->state = TS_DEAD;
3903                     la_reset_pref(ts);
3904                 }
3905 
3906                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3907                 memset(op->output_pref, 0, sizeof(op->output_pref));
3908 
3909                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3910                                     TCG_CALL_NO_READ_GLOBALS))) {
3911                     la_global_kill(s, nb_globals);
3912                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3913                     la_global_sync(s, nb_globals);
3914                 }
3915 
3916                 /* Record arguments that die in this helper.  */
3917                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3918                     ts = arg_temp(op->args[i]);
3919                     if (ts->state & TS_DEAD) {
3920                         arg_life |= DEAD_ARG << i;
3921                     }
3922                 }
3923 
3924                 /* For all live registers, remove call-clobbered prefs.  */
3925                 la_cross_call(s, nb_temps);
3926 
3927                 /*
3928                  * Input arguments are live for preceding opcodes.
3929                  *
3930                  * For those arguments that die, and will be allocated in
3931                  * registers, clear the register set for that arg, to be
3932                  * filled in below.  For args that will be on the stack,
3933                  * reset to any available reg.  Process arguments in reverse
3934                  * order so that if a temp is used more than once, the stack
3935                  * reset to max happens before the register reset to 0.
3936                  */
3937                 for (i = nb_iargs - 1; i >= 0; i--) {
3938                     const TCGCallArgumentLoc *loc = &info->in[i];
3939                     ts = arg_temp(op->args[nb_oargs + i]);
3940 
3941                     if (ts->state & TS_DEAD) {
3942                         switch (loc->kind) {
3943                         case TCG_CALL_ARG_NORMAL:
3944                         case TCG_CALL_ARG_EXTEND_U:
3945                         case TCG_CALL_ARG_EXTEND_S:
3946                             if (arg_slot_reg_p(loc->arg_slot)) {
3947                                 *la_temp_pref(ts) = 0;
3948                                 break;
3949                             }
3950                             /* fall through */
3951                         default:
3952                             *la_temp_pref(ts) =
3953                                 tcg_target_available_regs[ts->type];
3954                             break;
3955                         }
3956                         ts->state &= ~TS_DEAD;
3957                     }
3958                 }
3959 
3960                 /*
3961                  * For each input argument, add its input register to prefs.
3962                  * If a temp is used once, this produces a single set bit;
3963                  * if a temp is used multiple times, this produces a set.
3964                  */
3965                 for (i = 0; i < nb_iargs; i++) {
3966                     const TCGCallArgumentLoc *loc = &info->in[i];
3967                     ts = arg_temp(op->args[nb_oargs + i]);
3968 
3969                     switch (loc->kind) {
3970                     case TCG_CALL_ARG_NORMAL:
3971                     case TCG_CALL_ARG_EXTEND_U:
3972                     case TCG_CALL_ARG_EXTEND_S:
3973                         if (arg_slot_reg_p(loc->arg_slot)) {
3974                             tcg_regset_set_reg(*la_temp_pref(ts),
3975                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3976                         }
3977                         break;
3978                     default:
3979                         break;
3980                     }
3981                 }
3982             }
3983             break;
3984         case INDEX_op_insn_start:
3985             break;
3986         case INDEX_op_discard:
3987             /* mark the temporary as dead */
3988             ts = arg_temp(op->args[0]);
3989             ts->state = TS_DEAD;
3990             la_reset_pref(ts);
3991             break;
3992 
3993         case INDEX_op_add2_i32:
3994         case INDEX_op_add2_i64:
3995             opc_new = INDEX_op_add;
3996             goto do_addsub2;
3997         case INDEX_op_sub2_i32:
3998         case INDEX_op_sub2_i64:
3999             opc_new = INDEX_op_sub;
4000         do_addsub2:
4001             nb_iargs = 4;
4002             nb_oargs = 2;
4003             /* Test if the high part of the operation is dead, but not
4004                the low part.  The result can be optimized to a simple
4005                add or sub.  This happens often for x86_64 guest when the
4006                cpu mode is set to 32 bit.  */
4007             if (arg_temp(op->args[1])->state == TS_DEAD) {
4008                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4009                     goto do_remove;
4010                 }
4011                 /* Replace the opcode and adjust the args in place,
4012                    leaving 3 unused args at the end.  */
4013                 op->opc = opc = opc_new;
4014                 op->args[1] = op->args[2];
4015                 op->args[2] = op->args[4];
4016                 /* Fall through and mark the single-word operation live.  */
4017                 nb_iargs = 2;
4018                 nb_oargs = 1;
4019             }
4020             goto do_not_remove;
4021 
4022         case INDEX_op_muls2_i32:
4023         case INDEX_op_muls2_i64:
4024             opc_new = INDEX_op_mul;
4025             opc_new2 = INDEX_op_mulsh;
4026             goto do_mul2;
4027         case INDEX_op_mulu2_i32:
4028         case INDEX_op_mulu2_i64:
4029             opc_new = INDEX_op_mul;
4030             opc_new2 = INDEX_op_muluh;
4031         do_mul2:
4032             nb_iargs = 2;
4033             nb_oargs = 2;
4034             if (arg_temp(op->args[1])->state == TS_DEAD) {
4035                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4036                     /* Both parts of the operation are dead.  */
4037                     goto do_remove;
4038                 }
4039                 /* The high part of the operation is dead; generate the low. */
4040                 op->opc = opc = opc_new;
4041                 op->args[1] = op->args[2];
4042                 op->args[2] = op->args[3];
4043             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4044                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4045                 /* The low part of the operation is dead; generate the high. */
4046                 op->opc = opc = opc_new2;
4047                 op->args[0] = op->args[1];
4048                 op->args[1] = op->args[2];
4049                 op->args[2] = op->args[3];
4050             } else {
4051                 goto do_not_remove;
4052             }
4053             /* Mark the single-word operation live.  */
4054             nb_oargs = 1;
4055             goto do_not_remove;
4056 
4057         default:
4058             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4059             nb_iargs = def->nb_iargs;
4060             nb_oargs = def->nb_oargs;
4061 
4062             /* Test if the operation can be removed because all
4063                its outputs are dead. We assume that nb_oargs == 0
4064                implies side effects */
4065             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4066                 for (i = 0; i < nb_oargs; i++) {
4067                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4068                         goto do_not_remove;
4069                     }
4070                 }
4071                 goto do_remove;
4072             }
4073             goto do_not_remove;
4074 
4075         do_remove:
4076             tcg_op_remove(s, op);
4077             break;
4078 
4079         do_not_remove:
4080             for (i = 0; i < nb_oargs; i++) {
4081                 ts = arg_temp(op->args[i]);
4082 
4083                 /* Remember the preference of the uses that followed.  */
4084                 if (i < ARRAY_SIZE(op->output_pref)) {
4085                     op->output_pref[i] = *la_temp_pref(ts);
4086                 }
4087 
4088                 /* Output args are dead.  */
4089                 if (ts->state & TS_DEAD) {
4090                     arg_life |= DEAD_ARG << i;
4091                 }
4092                 if (ts->state & TS_MEM) {
4093                     arg_life |= SYNC_ARG << i;
4094                 }
4095                 ts->state = TS_DEAD;
4096                 la_reset_pref(ts);
4097             }
4098 
4099             /* If end of basic block, update.  */
4100             if (def->flags & TCG_OPF_BB_EXIT) {
4101                 la_func_end(s, nb_globals, nb_temps);
4102             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4103                 la_bb_sync(s, nb_globals, nb_temps);
4104             } else if (def->flags & TCG_OPF_BB_END) {
4105                 la_bb_end(s, nb_globals, nb_temps);
4106             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4107                 la_global_sync(s, nb_globals);
4108                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4109                     la_cross_call(s, nb_temps);
4110                 }
4111             }
4112 
4113             /* Record arguments that die in this opcode.  */
4114             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4115                 ts = arg_temp(op->args[i]);
4116                 if (ts->state & TS_DEAD) {
4117                     arg_life |= DEAD_ARG << i;
4118                 }
4119             }
4120 
4121             /* Input arguments are live for preceding opcodes.  */
4122             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4123                 ts = arg_temp(op->args[i]);
4124                 if (ts->state & TS_DEAD) {
4125                     /* For operands that were dead, initially allow
4126                        all regs for the type.  */
4127                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4128                     ts->state &= ~TS_DEAD;
4129                 }
4130             }
4131 
4132             /* Incorporate constraints for this operand.  */
4133             switch (opc) {
4134             case INDEX_op_mov:
4135                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4136                    have proper constraints.  That said, special case
4137                    moves to propagate preferences backward.  */
4138                 if (IS_DEAD_ARG(1)) {
4139                     *la_temp_pref(arg_temp(op->args[0]))
4140                         = *la_temp_pref(arg_temp(op->args[1]));
4141                 }
4142                 break;
4143 
4144             default:
4145                 args_ct = opcode_args_ct(op);
4146                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4147                     const TCGArgConstraint *ct = &args_ct[i];
4148                     TCGRegSet set, *pset;
4149 
4150                     ts = arg_temp(op->args[i]);
4151                     pset = la_temp_pref(ts);
4152                     set = *pset;
4153 
4154                     set &= ct->regs;
4155                     if (ct->ialias) {
4156                         set &= output_pref(op, ct->alias_index);
4157                     }
4158                     /* If the combination is not possible, restart.  */
4159                     if (set == 0) {
4160                         set = ct->regs;
4161                     }
4162                     *pset = set;
4163                 }
4164                 break;
4165             }
4166             break;
4167         }
4168         op->life = arg_life;
4169     }
4170 }
4171 
4172 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4173 static bool __attribute__((noinline))
4174 liveness_pass_2(TCGContext *s)
4175 {
4176     int nb_globals = s->nb_globals;
4177     int nb_temps, i;
4178     bool changes = false;
4179     TCGOp *op, *op_next;
4180 
4181     /* Create a temporary for each indirect global.  */
4182     for (i = 0; i < nb_globals; ++i) {
4183         TCGTemp *its = &s->temps[i];
4184         if (its->indirect_reg) {
4185             TCGTemp *dts = tcg_temp_alloc(s);
4186             dts->type = its->type;
4187             dts->base_type = its->base_type;
4188             dts->temp_subindex = its->temp_subindex;
4189             dts->kind = TEMP_EBB;
4190             its->state_ptr = dts;
4191         } else {
4192             its->state_ptr = NULL;
4193         }
4194         /* All globals begin dead.  */
4195         its->state = TS_DEAD;
4196     }
4197     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4198         TCGTemp *its = &s->temps[i];
4199         its->state_ptr = NULL;
4200         its->state = TS_DEAD;
4201     }
4202 
4203     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4204         TCGOpcode opc = op->opc;
4205         const TCGOpDef *def = &tcg_op_defs[opc];
4206         TCGLifeData arg_life = op->life;
4207         int nb_iargs, nb_oargs, call_flags;
4208         TCGTemp *arg_ts, *dir_ts;
4209 
4210         if (opc == INDEX_op_call) {
4211             nb_oargs = TCGOP_CALLO(op);
4212             nb_iargs = TCGOP_CALLI(op);
4213             call_flags = tcg_call_flags(op);
4214         } else {
4215             nb_iargs = def->nb_iargs;
4216             nb_oargs = def->nb_oargs;
4217 
4218             /* Set flags similar to how calls require.  */
4219             if (def->flags & TCG_OPF_COND_BRANCH) {
4220                 /* Like reading globals: sync_globals */
4221                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4222             } else if (def->flags & TCG_OPF_BB_END) {
4223                 /* Like writing globals: save_globals */
4224                 call_flags = 0;
4225             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4226                 /* Like reading globals: sync_globals */
4227                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4228             } else {
4229                 /* No effect on globals.  */
4230                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4231                               TCG_CALL_NO_WRITE_GLOBALS);
4232             }
4233         }
4234 
4235         /* Make sure that input arguments are available.  */
4236         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4237             arg_ts = arg_temp(op->args[i]);
4238             dir_ts = arg_ts->state_ptr;
4239             if (dir_ts && arg_ts->state == TS_DEAD) {
4240                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4241                                   ? INDEX_op_ld_i32
4242                                   : INDEX_op_ld_i64);
4243                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4244                                                   arg_ts->type, 3);
4245 
4246                 lop->args[0] = temp_arg(dir_ts);
4247                 lop->args[1] = temp_arg(arg_ts->mem_base);
4248                 lop->args[2] = arg_ts->mem_offset;
4249 
4250                 /* Loaded, but synced with memory.  */
4251                 arg_ts->state = TS_MEM;
4252             }
4253         }
4254 
4255         /* Perform input replacement, and mark inputs that became dead.
4256            No action is required except keeping temp_state up to date
4257            so that we reload when needed.  */
4258         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4259             arg_ts = arg_temp(op->args[i]);
4260             dir_ts = arg_ts->state_ptr;
4261             if (dir_ts) {
4262                 op->args[i] = temp_arg(dir_ts);
4263                 changes = true;
4264                 if (IS_DEAD_ARG(i)) {
4265                     arg_ts->state = TS_DEAD;
4266                 }
4267             }
4268         }
4269 
4270         /* Liveness analysis should ensure that the following are
4271            all correct, for call sites and basic block end points.  */
4272         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4273             /* Nothing to do */
4274         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4275             for (i = 0; i < nb_globals; ++i) {
4276                 /* Liveness should see that globals are synced back,
4277                    that is, either TS_DEAD or TS_MEM.  */
4278                 arg_ts = &s->temps[i];
4279                 tcg_debug_assert(arg_ts->state_ptr == 0
4280                                  || arg_ts->state != 0);
4281             }
4282         } else {
4283             for (i = 0; i < nb_globals; ++i) {
4284                 /* Liveness should see that globals are saved back,
4285                    that is, TS_DEAD, waiting to be reloaded.  */
4286                 arg_ts = &s->temps[i];
4287                 tcg_debug_assert(arg_ts->state_ptr == 0
4288                                  || arg_ts->state == TS_DEAD);
4289             }
4290         }
4291 
4292         /* Outputs become available.  */
4293         if (opc == INDEX_op_mov) {
4294             arg_ts = arg_temp(op->args[0]);
4295             dir_ts = arg_ts->state_ptr;
4296             if (dir_ts) {
4297                 op->args[0] = temp_arg(dir_ts);
4298                 changes = true;
4299 
4300                 /* The output is now live and modified.  */
4301                 arg_ts->state = 0;
4302 
4303                 if (NEED_SYNC_ARG(0)) {
4304                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4305                                       ? INDEX_op_st_i32
4306                                       : INDEX_op_st_i64);
4307                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4308                                                      arg_ts->type, 3);
4309                     TCGTemp *out_ts = dir_ts;
4310 
4311                     if (IS_DEAD_ARG(0)) {
4312                         out_ts = arg_temp(op->args[1]);
4313                         arg_ts->state = TS_DEAD;
4314                         tcg_op_remove(s, op);
4315                     } else {
4316                         arg_ts->state = TS_MEM;
4317                     }
4318 
4319                     sop->args[0] = temp_arg(out_ts);
4320                     sop->args[1] = temp_arg(arg_ts->mem_base);
4321                     sop->args[2] = arg_ts->mem_offset;
4322                 } else {
4323                     tcg_debug_assert(!IS_DEAD_ARG(0));
4324                 }
4325             }
4326         } else {
4327             for (i = 0; i < nb_oargs; i++) {
4328                 arg_ts = arg_temp(op->args[i]);
4329                 dir_ts = arg_ts->state_ptr;
4330                 if (!dir_ts) {
4331                     continue;
4332                 }
4333                 op->args[i] = temp_arg(dir_ts);
4334                 changes = true;
4335 
4336                 /* The output is now live and modified.  */
4337                 arg_ts->state = 0;
4338 
4339                 /* Sync outputs upon their last write.  */
4340                 if (NEED_SYNC_ARG(i)) {
4341                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4342                                       ? INDEX_op_st_i32
4343                                       : INDEX_op_st_i64);
4344                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4345                                                      arg_ts->type, 3);
4346 
4347                     sop->args[0] = temp_arg(dir_ts);
4348                     sop->args[1] = temp_arg(arg_ts->mem_base);
4349                     sop->args[2] = arg_ts->mem_offset;
4350 
4351                     arg_ts->state = TS_MEM;
4352                 }
4353                 /* Drop outputs that are dead.  */
4354                 if (IS_DEAD_ARG(i)) {
4355                     arg_ts->state = TS_DEAD;
4356                 }
4357             }
4358         }
4359     }
4360 
4361     return changes;
4362 }
4363 
4364 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4365 {
4366     intptr_t off;
4367     int size, align;
4368 
4369     /* When allocating an object, look at the full type. */
4370     size = tcg_type_size(ts->base_type);
4371     switch (ts->base_type) {
4372     case TCG_TYPE_I32:
4373         align = 4;
4374         break;
4375     case TCG_TYPE_I64:
4376     case TCG_TYPE_V64:
4377         align = 8;
4378         break;
4379     case TCG_TYPE_I128:
4380     case TCG_TYPE_V128:
4381     case TCG_TYPE_V256:
4382         /*
4383          * Note that we do not require aligned storage for V256,
4384          * and that we provide alignment for I128 to match V128,
4385          * even if that's above what the host ABI requires.
4386          */
4387         align = 16;
4388         break;
4389     default:
4390         g_assert_not_reached();
4391     }
4392 
4393     /*
4394      * Assume the stack is sufficiently aligned.
4395      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4396      * and do not require 16 byte vector alignment.  This seems slightly
4397      * easier than fully parameterizing the above switch statement.
4398      */
4399     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4400     off = ROUND_UP(s->current_frame_offset, align);
4401 
4402     /* If we've exhausted the stack frame, restart with a smaller TB. */
4403     if (off + size > s->frame_end) {
4404         tcg_raise_tb_overflow(s);
4405     }
4406     s->current_frame_offset = off + size;
4407 #if defined(__sparc__)
4408     off += TCG_TARGET_STACK_BIAS;
4409 #endif
4410 
4411     /* If the object was subdivided, assign memory to all the parts. */
4412     if (ts->base_type != ts->type) {
4413         int part_size = tcg_type_size(ts->type);
4414         int part_count = size / part_size;
4415 
4416         /*
4417          * Each part is allocated sequentially in tcg_temp_new_internal.
4418          * Jump back to the first part by subtracting the current index.
4419          */
4420         ts -= ts->temp_subindex;
4421         for (int i = 0; i < part_count; ++i) {
4422             ts[i].mem_offset = off + i * part_size;
4423             ts[i].mem_base = s->frame_temp;
4424             ts[i].mem_allocated = 1;
4425         }
4426     } else {
4427         ts->mem_offset = off;
4428         ts->mem_base = s->frame_temp;
4429         ts->mem_allocated = 1;
4430     }
4431 }
4432 
4433 /* Assign @reg to @ts, and update reg_to_temp[]. */
4434 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4435 {
4436     if (ts->val_type == TEMP_VAL_REG) {
4437         TCGReg old = ts->reg;
4438         tcg_debug_assert(s->reg_to_temp[old] == ts);
4439         if (old == reg) {
4440             return;
4441         }
4442         s->reg_to_temp[old] = NULL;
4443     }
4444     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4445     s->reg_to_temp[reg] = ts;
4446     ts->val_type = TEMP_VAL_REG;
4447     ts->reg = reg;
4448 }
4449 
4450 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4451 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4452 {
4453     tcg_debug_assert(type != TEMP_VAL_REG);
4454     if (ts->val_type == TEMP_VAL_REG) {
4455         TCGReg reg = ts->reg;
4456         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4457         s->reg_to_temp[reg] = NULL;
4458     }
4459     ts->val_type = type;
4460 }
4461 
4462 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4463 
4464 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4465    mark it free; otherwise mark it dead.  */
4466 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4467 {
4468     TCGTempVal new_type;
4469 
4470     switch (ts->kind) {
4471     case TEMP_FIXED:
4472         return;
4473     case TEMP_GLOBAL:
4474     case TEMP_TB:
4475         new_type = TEMP_VAL_MEM;
4476         break;
4477     case TEMP_EBB:
4478         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4479         break;
4480     case TEMP_CONST:
4481         new_type = TEMP_VAL_CONST;
4482         break;
4483     default:
4484         g_assert_not_reached();
4485     }
4486     set_temp_val_nonreg(s, ts, new_type);
4487 }
4488 
4489 /* Mark a temporary as dead.  */
4490 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4491 {
4492     temp_free_or_dead(s, ts, 1);
4493 }
4494 
4495 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4496    registers needs to be allocated to store a constant.  If 'free_or_dead'
4497    is non-zero, subsequently release the temporary; if it is positive, the
4498    temp is dead; if it is negative, the temp is free.  */
4499 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4500                       TCGRegSet preferred_regs, int free_or_dead)
4501 {
4502     if (!temp_readonly(ts) && !ts->mem_coherent) {
4503         if (!ts->mem_allocated) {
4504             temp_allocate_frame(s, ts);
4505         }
4506         switch (ts->val_type) {
4507         case TEMP_VAL_CONST:
4508             /* If we're going to free the temp immediately, then we won't
4509                require it later in a register, so attempt to store the
4510                constant to memory directly.  */
4511             if (free_or_dead
4512                 && tcg_out_sti(s, ts->type, ts->val,
4513                                ts->mem_base->reg, ts->mem_offset)) {
4514                 break;
4515             }
4516             temp_load(s, ts, tcg_target_available_regs[ts->type],
4517                       allocated_regs, preferred_regs);
4518             /* fallthrough */
4519 
4520         case TEMP_VAL_REG:
4521             tcg_out_st(s, ts->type, ts->reg,
4522                        ts->mem_base->reg, ts->mem_offset);
4523             break;
4524 
4525         case TEMP_VAL_MEM:
4526             break;
4527 
4528         case TEMP_VAL_DEAD:
4529         default:
4530             g_assert_not_reached();
4531         }
4532         ts->mem_coherent = 1;
4533     }
4534     if (free_or_dead) {
4535         temp_free_or_dead(s, ts, free_or_dead);
4536     }
4537 }
4538 
4539 /* free register 'reg' by spilling the corresponding temporary if necessary */
4540 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4541 {
4542     TCGTemp *ts = s->reg_to_temp[reg];
4543     if (ts != NULL) {
4544         temp_sync(s, ts, allocated_regs, 0, -1);
4545     }
4546 }
4547 
4548 /**
4549  * tcg_reg_alloc:
4550  * @required_regs: Set of registers in which we must allocate.
4551  * @allocated_regs: Set of registers which must be avoided.
4552  * @preferred_regs: Set of registers we should prefer.
4553  * @rev: True if we search the registers in "indirect" order.
4554  *
4555  * The allocated register must be in @required_regs & ~@allocated_regs,
4556  * but if we can put it in @preferred_regs we may save a move later.
4557  */
4558 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4559                             TCGRegSet allocated_regs,
4560                             TCGRegSet preferred_regs, bool rev)
4561 {
4562     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4563     TCGRegSet reg_ct[2];
4564     const int *order;
4565 
4566     reg_ct[1] = required_regs & ~allocated_regs;
4567     tcg_debug_assert(reg_ct[1] != 0);
4568     reg_ct[0] = reg_ct[1] & preferred_regs;
4569 
4570     /* Skip the preferred_regs option if it cannot be satisfied,
4571        or if the preference made no difference.  */
4572     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4573 
4574     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4575 
4576     /* Try free registers, preferences first.  */
4577     for (j = f; j < 2; j++) {
4578         TCGRegSet set = reg_ct[j];
4579 
4580         if (tcg_regset_single(set)) {
4581             /* One register in the set.  */
4582             TCGReg reg = tcg_regset_first(set);
4583             if (s->reg_to_temp[reg] == NULL) {
4584                 return reg;
4585             }
4586         } else {
4587             for (i = 0; i < n; i++) {
4588                 TCGReg reg = order[i];
4589                 if (s->reg_to_temp[reg] == NULL &&
4590                     tcg_regset_test_reg(set, reg)) {
4591                     return reg;
4592                 }
4593             }
4594         }
4595     }
4596 
4597     /* We must spill something.  */
4598     for (j = f; j < 2; j++) {
4599         TCGRegSet set = reg_ct[j];
4600 
4601         if (tcg_regset_single(set)) {
4602             /* One register in the set.  */
4603             TCGReg reg = tcg_regset_first(set);
4604             tcg_reg_free(s, reg, allocated_regs);
4605             return reg;
4606         } else {
4607             for (i = 0; i < n; i++) {
4608                 TCGReg reg = order[i];
4609                 if (tcg_regset_test_reg(set, reg)) {
4610                     tcg_reg_free(s, reg, allocated_regs);
4611                     return reg;
4612                 }
4613             }
4614         }
4615     }
4616 
4617     g_assert_not_reached();
4618 }
4619 
4620 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4621                                  TCGRegSet allocated_regs,
4622                                  TCGRegSet preferred_regs, bool rev)
4623 {
4624     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4625     TCGRegSet reg_ct[2];
4626     const int *order;
4627 
4628     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4629     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4630     tcg_debug_assert(reg_ct[1] != 0);
4631     reg_ct[0] = reg_ct[1] & preferred_regs;
4632 
4633     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4634 
4635     /*
4636      * Skip the preferred_regs option if it cannot be satisfied,
4637      * or if the preference made no difference.
4638      */
4639     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4640 
4641     /*
4642      * Minimize the number of flushes by looking for 2 free registers first,
4643      * then a single flush, then two flushes.
4644      */
4645     for (fmin = 2; fmin >= 0; fmin--) {
4646         for (j = k; j < 2; j++) {
4647             TCGRegSet set = reg_ct[j];
4648 
4649             for (i = 0; i < n; i++) {
4650                 TCGReg reg = order[i];
4651 
4652                 if (tcg_regset_test_reg(set, reg)) {
4653                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4654                     if (f >= fmin) {
4655                         tcg_reg_free(s, reg, allocated_regs);
4656                         tcg_reg_free(s, reg + 1, allocated_regs);
4657                         return reg;
4658                     }
4659                 }
4660             }
4661         }
4662     }
4663     g_assert_not_reached();
4664 }
4665 
4666 /* Make sure the temporary is in a register.  If needed, allocate the register
4667    from DESIRED while avoiding ALLOCATED.  */
4668 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4669                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4670 {
4671     TCGReg reg;
4672 
4673     switch (ts->val_type) {
4674     case TEMP_VAL_REG:
4675         return;
4676     case TEMP_VAL_CONST:
4677         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4678                             preferred_regs, ts->indirect_base);
4679         if (ts->type <= TCG_TYPE_I64) {
4680             tcg_out_movi(s, ts->type, reg, ts->val);
4681         } else {
4682             uint64_t val = ts->val;
4683             MemOp vece = MO_64;
4684 
4685             /*
4686              * Find the minimal vector element that matches the constant.
4687              * The targets will, in general, have to do this search anyway,
4688              * do this generically.
4689              */
4690             if (val == dup_const(MO_8, val)) {
4691                 vece = MO_8;
4692             } else if (val == dup_const(MO_16, val)) {
4693                 vece = MO_16;
4694             } else if (val == dup_const(MO_32, val)) {
4695                 vece = MO_32;
4696             }
4697 
4698             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4699         }
4700         ts->mem_coherent = 0;
4701         break;
4702     case TEMP_VAL_MEM:
4703         if (!ts->mem_allocated) {
4704             temp_allocate_frame(s, ts);
4705         }
4706         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4707                             preferred_regs, ts->indirect_base);
4708         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4709         ts->mem_coherent = 1;
4710         break;
4711     case TEMP_VAL_DEAD:
4712     default:
4713         g_assert_not_reached();
4714     }
4715     set_temp_val_reg(s, ts, reg);
4716 }
4717 
4718 /* Save a temporary to memory. 'allocated_regs' is used in case a
4719    temporary registers needs to be allocated to store a constant.  */
4720 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4721 {
4722     /* The liveness analysis already ensures that globals are back
4723        in memory. Keep an tcg_debug_assert for safety. */
4724     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4725 }
4726 
4727 /* save globals to their canonical location and assume they can be
4728    modified be the following code. 'allocated_regs' is used in case a
4729    temporary registers needs to be allocated to store a constant. */
4730 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4731 {
4732     int i, n;
4733 
4734     for (i = 0, n = s->nb_globals; i < n; i++) {
4735         temp_save(s, &s->temps[i], allocated_regs);
4736     }
4737 }
4738 
4739 /* sync globals to their canonical location and assume they can be
4740    read by the following code. 'allocated_regs' is used in case a
4741    temporary registers needs to be allocated to store a constant. */
4742 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4743 {
4744     int i, n;
4745 
4746     for (i = 0, n = s->nb_globals; i < n; i++) {
4747         TCGTemp *ts = &s->temps[i];
4748         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4749                          || ts->kind == TEMP_FIXED
4750                          || ts->mem_coherent);
4751     }
4752 }
4753 
4754 /* at the end of a basic block, we assume all temporaries are dead and
4755    all globals are stored at their canonical location. */
4756 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4757 {
4758     int i;
4759 
4760     for (i = s->nb_globals; i < s->nb_temps; i++) {
4761         TCGTemp *ts = &s->temps[i];
4762 
4763         switch (ts->kind) {
4764         case TEMP_TB:
4765             temp_save(s, ts, allocated_regs);
4766             break;
4767         case TEMP_EBB:
4768             /* The liveness analysis already ensures that temps are dead.
4769                Keep an tcg_debug_assert for safety. */
4770             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4771             break;
4772         case TEMP_CONST:
4773             /* Similarly, we should have freed any allocated register. */
4774             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4775             break;
4776         default:
4777             g_assert_not_reached();
4778         }
4779     }
4780 
4781     save_globals(s, allocated_regs);
4782 }
4783 
4784 /*
4785  * At a conditional branch, we assume all temporaries are dead unless
4786  * explicitly live-across-conditional-branch; all globals and local
4787  * temps are synced to their location.
4788  */
4789 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4790 {
4791     sync_globals(s, allocated_regs);
4792 
4793     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4794         TCGTemp *ts = &s->temps[i];
4795         /*
4796          * The liveness analysis already ensures that temps are dead.
4797          * Keep tcg_debug_asserts for safety.
4798          */
4799         switch (ts->kind) {
4800         case TEMP_TB:
4801             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4802             break;
4803         case TEMP_EBB:
4804         case TEMP_CONST:
4805             break;
4806         default:
4807             g_assert_not_reached();
4808         }
4809     }
4810 }
4811 
4812 /*
4813  * Specialized code generation for INDEX_op_mov_* with a constant.
4814  */
4815 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4816                                   tcg_target_ulong val, TCGLifeData arg_life,
4817                                   TCGRegSet preferred_regs)
4818 {
4819     /* ENV should not be modified.  */
4820     tcg_debug_assert(!temp_readonly(ots));
4821 
4822     /* The movi is not explicitly generated here.  */
4823     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4824     ots->val = val;
4825     ots->mem_coherent = 0;
4826     if (NEED_SYNC_ARG(0)) {
4827         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4828     } else if (IS_DEAD_ARG(0)) {
4829         temp_dead(s, ots);
4830     }
4831 }
4832 
4833 /*
4834  * Specialized code generation for INDEX_op_mov_*.
4835  */
4836 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4837 {
4838     const TCGLifeData arg_life = op->life;
4839     TCGRegSet allocated_regs, preferred_regs;
4840     TCGTemp *ts, *ots;
4841     TCGType otype, itype;
4842     TCGReg oreg, ireg;
4843 
4844     allocated_regs = s->reserved_regs;
4845     preferred_regs = output_pref(op, 0);
4846     ots = arg_temp(op->args[0]);
4847     ts = arg_temp(op->args[1]);
4848 
4849     /* ENV should not be modified.  */
4850     tcg_debug_assert(!temp_readonly(ots));
4851 
4852     /* Note that otype != itype for no-op truncation.  */
4853     otype = ots->type;
4854     itype = ts->type;
4855 
4856     if (ts->val_type == TEMP_VAL_CONST) {
4857         /* propagate constant or generate sti */
4858         tcg_target_ulong val = ts->val;
4859         if (IS_DEAD_ARG(1)) {
4860             temp_dead(s, ts);
4861         }
4862         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4863         return;
4864     }
4865 
4866     /* If the source value is in memory we're going to be forced
4867        to have it in a register in order to perform the copy.  Copy
4868        the SOURCE value into its own register first, that way we
4869        don't have to reload SOURCE the next time it is used. */
4870     if (ts->val_type == TEMP_VAL_MEM) {
4871         temp_load(s, ts, tcg_target_available_regs[itype],
4872                   allocated_regs, preferred_regs);
4873     }
4874     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4875     ireg = ts->reg;
4876 
4877     if (IS_DEAD_ARG(0)) {
4878         /* mov to a non-saved dead register makes no sense (even with
4879            liveness analysis disabled). */
4880         tcg_debug_assert(NEED_SYNC_ARG(0));
4881         if (!ots->mem_allocated) {
4882             temp_allocate_frame(s, ots);
4883         }
4884         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4885         if (IS_DEAD_ARG(1)) {
4886             temp_dead(s, ts);
4887         }
4888         temp_dead(s, ots);
4889         return;
4890     }
4891 
4892     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4893         /*
4894          * The mov can be suppressed.  Kill input first, so that it
4895          * is unlinked from reg_to_temp, then set the output to the
4896          * reg that we saved from the input.
4897          */
4898         temp_dead(s, ts);
4899         oreg = ireg;
4900     } else {
4901         if (ots->val_type == TEMP_VAL_REG) {
4902             oreg = ots->reg;
4903         } else {
4904             /* Make sure to not spill the input register during allocation. */
4905             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4906                                  allocated_regs | ((TCGRegSet)1 << ireg),
4907                                  preferred_regs, ots->indirect_base);
4908         }
4909         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4910             /*
4911              * Cross register class move not supported.
4912              * Store the source register into the destination slot
4913              * and leave the destination temp as TEMP_VAL_MEM.
4914              */
4915             assert(!temp_readonly(ots));
4916             if (!ts->mem_allocated) {
4917                 temp_allocate_frame(s, ots);
4918             }
4919             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4920             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4921             ots->mem_coherent = 1;
4922             return;
4923         }
4924     }
4925     set_temp_val_reg(s, ots, oreg);
4926     ots->mem_coherent = 0;
4927 
4928     if (NEED_SYNC_ARG(0)) {
4929         temp_sync(s, ots, allocated_regs, 0, 0);
4930     }
4931 }
4932 
4933 /*
4934  * Specialized code generation for INDEX_op_dup_vec.
4935  */
4936 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4937 {
4938     const TCGLifeData arg_life = op->life;
4939     TCGRegSet dup_out_regs, dup_in_regs;
4940     const TCGArgConstraint *dup_args_ct;
4941     TCGTemp *its, *ots;
4942     TCGType itype, vtype;
4943     unsigned vece;
4944     int lowpart_ofs;
4945     bool ok;
4946 
4947     ots = arg_temp(op->args[0]);
4948     its = arg_temp(op->args[1]);
4949 
4950     /* ENV should not be modified.  */
4951     tcg_debug_assert(!temp_readonly(ots));
4952 
4953     itype = its->type;
4954     vece = TCGOP_VECE(op);
4955     vtype = TCGOP_TYPE(op);
4956 
4957     if (its->val_type == TEMP_VAL_CONST) {
4958         /* Propagate constant via movi -> dupi.  */
4959         tcg_target_ulong val = its->val;
4960         if (IS_DEAD_ARG(1)) {
4961             temp_dead(s, its);
4962         }
4963         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4964         return;
4965     }
4966 
4967     dup_args_ct = opcode_args_ct(op);
4968     dup_out_regs = dup_args_ct[0].regs;
4969     dup_in_regs = dup_args_ct[1].regs;
4970 
4971     /* Allocate the output register now.  */
4972     if (ots->val_type != TEMP_VAL_REG) {
4973         TCGRegSet allocated_regs = s->reserved_regs;
4974         TCGReg oreg;
4975 
4976         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4977             /* Make sure to not spill the input register. */
4978             tcg_regset_set_reg(allocated_regs, its->reg);
4979         }
4980         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4981                              output_pref(op, 0), ots->indirect_base);
4982         set_temp_val_reg(s, ots, oreg);
4983     }
4984 
4985     switch (its->val_type) {
4986     case TEMP_VAL_REG:
4987         /*
4988          * The dup constriaints must be broad, covering all possible VECE.
4989          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4990          * to fail, indicating that extra moves are required for that case.
4991          */
4992         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4993             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4994                 goto done;
4995             }
4996             /* Try again from memory or a vector input register.  */
4997         }
4998         if (!its->mem_coherent) {
4999             /*
5000              * The input register is not synced, and so an extra store
5001              * would be required to use memory.  Attempt an integer-vector
5002              * register move first.  We do not have a TCGRegSet for this.
5003              */
5004             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5005                 break;
5006             }
5007             /* Sync the temp back to its slot and load from there.  */
5008             temp_sync(s, its, s->reserved_regs, 0, 0);
5009         }
5010         /* fall through */
5011 
5012     case TEMP_VAL_MEM:
5013         lowpart_ofs = 0;
5014         if (HOST_BIG_ENDIAN) {
5015             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5016         }
5017         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5018                              its->mem_offset + lowpart_ofs)) {
5019             goto done;
5020         }
5021         /* Load the input into the destination vector register. */
5022         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5023         break;
5024 
5025     default:
5026         g_assert_not_reached();
5027     }
5028 
5029     /* We now have a vector input register, so dup must succeed. */
5030     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5031     tcg_debug_assert(ok);
5032 
5033  done:
5034     ots->mem_coherent = 0;
5035     if (IS_DEAD_ARG(1)) {
5036         temp_dead(s, its);
5037     }
5038     if (NEED_SYNC_ARG(0)) {
5039         temp_sync(s, ots, s->reserved_regs, 0, 0);
5040     }
5041     if (IS_DEAD_ARG(0)) {
5042         temp_dead(s, ots);
5043     }
5044 }
5045 
5046 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5047 {
5048     const TCGLifeData arg_life = op->life;
5049     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5050     TCGRegSet i_allocated_regs;
5051     TCGRegSet o_allocated_regs;
5052     int i, k, nb_iargs, nb_oargs;
5053     TCGReg reg;
5054     TCGArg arg;
5055     const TCGArgConstraint *args_ct;
5056     const TCGArgConstraint *arg_ct;
5057     TCGTemp *ts;
5058     TCGArg new_args[TCG_MAX_OP_ARGS];
5059     int const_args[TCG_MAX_OP_ARGS];
5060     TCGCond op_cond;
5061 
5062     nb_oargs = def->nb_oargs;
5063     nb_iargs = def->nb_iargs;
5064 
5065     /* copy constants */
5066     memcpy(new_args + nb_oargs + nb_iargs,
5067            op->args + nb_oargs + nb_iargs,
5068            sizeof(TCGArg) * def->nb_cargs);
5069 
5070     i_allocated_regs = s->reserved_regs;
5071     o_allocated_regs = s->reserved_regs;
5072 
5073     switch (op->opc) {
5074     case INDEX_op_brcond_i32:
5075     case INDEX_op_brcond_i64:
5076         op_cond = op->args[2];
5077         break;
5078     case INDEX_op_setcond_i32:
5079     case INDEX_op_setcond_i64:
5080     case INDEX_op_negsetcond_i32:
5081     case INDEX_op_negsetcond_i64:
5082     case INDEX_op_cmp_vec:
5083         op_cond = op->args[3];
5084         break;
5085     case INDEX_op_brcond2_i32:
5086         op_cond = op->args[4];
5087         break;
5088     case INDEX_op_movcond_i32:
5089     case INDEX_op_movcond_i64:
5090     case INDEX_op_setcond2_i32:
5091     case INDEX_op_cmpsel_vec:
5092         op_cond = op->args[5];
5093         break;
5094     default:
5095         /* No condition within opcode. */
5096         op_cond = TCG_COND_ALWAYS;
5097         break;
5098     }
5099 
5100     args_ct = opcode_args_ct(op);
5101 
5102     /* satisfy input constraints */
5103     for (k = 0; k < nb_iargs; k++) {
5104         TCGRegSet i_preferred_regs, i_required_regs;
5105         bool allocate_new_reg, copyto_new_reg;
5106         TCGTemp *ts2;
5107         int i1, i2;
5108 
5109         i = args_ct[nb_oargs + k].sort_index;
5110         arg = op->args[i];
5111         arg_ct = &args_ct[i];
5112         ts = arg_temp(arg);
5113 
5114         if (ts->val_type == TEMP_VAL_CONST) {
5115 #ifdef TCG_REG_ZERO
5116             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5117                 /* Hardware zero register: indicate register via non-const. */
5118                 const_args[i] = 0;
5119                 new_args[i] = TCG_REG_ZERO;
5120                 continue;
5121             }
5122 #endif
5123 
5124             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5125                                        op_cond, TCGOP_VECE(op))) {
5126                 /* constant is OK for instruction */
5127                 const_args[i] = 1;
5128                 new_args[i] = ts->val;
5129                 continue;
5130             }
5131         }
5132 
5133         reg = ts->reg;
5134         i_preferred_regs = 0;
5135         i_required_regs = arg_ct->regs;
5136         allocate_new_reg = false;
5137         copyto_new_reg = false;
5138 
5139         switch (arg_ct->pair) {
5140         case 0: /* not paired */
5141             if (arg_ct->ialias) {
5142                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5143 
5144                 /*
5145                  * If the input is readonly, then it cannot also be an
5146                  * output and aliased to itself.  If the input is not
5147                  * dead after the instruction, we must allocate a new
5148                  * register and move it.
5149                  */
5150                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5151                     || args_ct[arg_ct->alias_index].newreg) {
5152                     allocate_new_reg = true;
5153                 } else if (ts->val_type == TEMP_VAL_REG) {
5154                     /*
5155                      * Check if the current register has already been
5156                      * allocated for another input.
5157                      */
5158                     allocate_new_reg =
5159                         tcg_regset_test_reg(i_allocated_regs, reg);
5160                 }
5161             }
5162             if (!allocate_new_reg) {
5163                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5164                           i_preferred_regs);
5165                 reg = ts->reg;
5166                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5167             }
5168             if (allocate_new_reg) {
5169                 /*
5170                  * Allocate a new register matching the constraint
5171                  * and move the temporary register into it.
5172                  */
5173                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5174                           i_allocated_regs, 0);
5175                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5176                                     i_preferred_regs, ts->indirect_base);
5177                 copyto_new_reg = true;
5178             }
5179             break;
5180 
5181         case 1:
5182             /* First of an input pair; if i1 == i2, the second is an output. */
5183             i1 = i;
5184             i2 = arg_ct->pair_index;
5185             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5186 
5187             /*
5188              * It is easier to default to allocating a new pair
5189              * and to identify a few cases where it's not required.
5190              */
5191             if (arg_ct->ialias) {
5192                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5193                 if (IS_DEAD_ARG(i1) &&
5194                     IS_DEAD_ARG(i2) &&
5195                     !temp_readonly(ts) &&
5196                     ts->val_type == TEMP_VAL_REG &&
5197                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5198                     tcg_regset_test_reg(i_required_regs, reg) &&
5199                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5200                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5201                     (ts2
5202                      ? ts2->val_type == TEMP_VAL_REG &&
5203                        ts2->reg == reg + 1 &&
5204                        !temp_readonly(ts2)
5205                      : s->reg_to_temp[reg + 1] == NULL)) {
5206                     break;
5207                 }
5208             } else {
5209                 /* Without aliasing, the pair must also be an input. */
5210                 tcg_debug_assert(ts2);
5211                 if (ts->val_type == TEMP_VAL_REG &&
5212                     ts2->val_type == TEMP_VAL_REG &&
5213                     ts2->reg == reg + 1 &&
5214                     tcg_regset_test_reg(i_required_regs, reg)) {
5215                     break;
5216                 }
5217             }
5218             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5219                                      0, ts->indirect_base);
5220             goto do_pair;
5221 
5222         case 2: /* pair second */
5223             reg = new_args[arg_ct->pair_index] + 1;
5224             goto do_pair;
5225 
5226         case 3: /* ialias with second output, no first input */
5227             tcg_debug_assert(arg_ct->ialias);
5228             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5229 
5230             if (IS_DEAD_ARG(i) &&
5231                 !temp_readonly(ts) &&
5232                 ts->val_type == TEMP_VAL_REG &&
5233                 reg > 0 &&
5234                 s->reg_to_temp[reg - 1] == NULL &&
5235                 tcg_regset_test_reg(i_required_regs, reg) &&
5236                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5237                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5238                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5239                 break;
5240             }
5241             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5242                                      i_allocated_regs, 0,
5243                                      ts->indirect_base);
5244             tcg_regset_set_reg(i_allocated_regs, reg);
5245             reg += 1;
5246             goto do_pair;
5247 
5248         do_pair:
5249             /*
5250              * If an aliased input is not dead after the instruction,
5251              * we must allocate a new register and move it.
5252              */
5253             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5254                 TCGRegSet t_allocated_regs = i_allocated_regs;
5255 
5256                 /*
5257                  * Because of the alias, and the continued life, make sure
5258                  * that the temp is somewhere *other* than the reg pair,
5259                  * and we get a copy in reg.
5260                  */
5261                 tcg_regset_set_reg(t_allocated_regs, reg);
5262                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5263                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5264                     /* If ts was already in reg, copy it somewhere else. */
5265                     TCGReg nr;
5266                     bool ok;
5267 
5268                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5269                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5270                                        t_allocated_regs, 0, ts->indirect_base);
5271                     ok = tcg_out_mov(s, ts->type, nr, reg);
5272                     tcg_debug_assert(ok);
5273 
5274                     set_temp_val_reg(s, ts, nr);
5275                 } else {
5276                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5277                               t_allocated_regs, 0);
5278                     copyto_new_reg = true;
5279                 }
5280             } else {
5281                 /* Preferably allocate to reg, otherwise copy. */
5282                 i_required_regs = (TCGRegSet)1 << reg;
5283                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5284                           i_preferred_regs);
5285                 copyto_new_reg = ts->reg != reg;
5286             }
5287             break;
5288 
5289         default:
5290             g_assert_not_reached();
5291         }
5292 
5293         if (copyto_new_reg) {
5294             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5295                 /*
5296                  * Cross register class move not supported.  Sync the
5297                  * temp back to its slot and load from there.
5298                  */
5299                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5300                 tcg_out_ld(s, ts->type, reg,
5301                            ts->mem_base->reg, ts->mem_offset);
5302             }
5303         }
5304         new_args[i] = reg;
5305         const_args[i] = 0;
5306         tcg_regset_set_reg(i_allocated_regs, reg);
5307     }
5308 
5309     /* mark dead temporaries and free the associated registers */
5310     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5311         if (IS_DEAD_ARG(i)) {
5312             temp_dead(s, arg_temp(op->args[i]));
5313         }
5314     }
5315 
5316     if (def->flags & TCG_OPF_COND_BRANCH) {
5317         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5318     } else if (def->flags & TCG_OPF_BB_END) {
5319         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5320     } else {
5321         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5322             /* XXX: permit generic clobber register list ? */
5323             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5324                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5325                     tcg_reg_free(s, i, i_allocated_regs);
5326                 }
5327             }
5328         }
5329         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5330             /* sync globals if the op has side effects and might trigger
5331                an exception. */
5332             sync_globals(s, i_allocated_regs);
5333         }
5334 
5335         /* satisfy the output constraints */
5336         for (k = 0; k < nb_oargs; k++) {
5337             i = args_ct[k].sort_index;
5338             arg = op->args[i];
5339             arg_ct = &args_ct[i];
5340             ts = arg_temp(arg);
5341 
5342             /* ENV should not be modified.  */
5343             tcg_debug_assert(!temp_readonly(ts));
5344 
5345             switch (arg_ct->pair) {
5346             case 0: /* not paired */
5347                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5348                     reg = new_args[arg_ct->alias_index];
5349                 } else if (arg_ct->newreg) {
5350                     reg = tcg_reg_alloc(s, arg_ct->regs,
5351                                         i_allocated_regs | o_allocated_regs,
5352                                         output_pref(op, k), ts->indirect_base);
5353                 } else {
5354                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5355                                         output_pref(op, k), ts->indirect_base);
5356                 }
5357                 break;
5358 
5359             case 1: /* first of pair */
5360                 if (arg_ct->oalias) {
5361                     reg = new_args[arg_ct->alias_index];
5362                 } else if (arg_ct->newreg) {
5363                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5364                                              i_allocated_regs | o_allocated_regs,
5365                                              output_pref(op, k),
5366                                              ts->indirect_base);
5367                 } else {
5368                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5369                                              output_pref(op, k),
5370                                              ts->indirect_base);
5371                 }
5372                 break;
5373 
5374             case 2: /* second of pair */
5375                 if (arg_ct->oalias) {
5376                     reg = new_args[arg_ct->alias_index];
5377                 } else {
5378                     reg = new_args[arg_ct->pair_index] + 1;
5379                 }
5380                 break;
5381 
5382             case 3: /* first of pair, aliasing with a second input */
5383                 tcg_debug_assert(!arg_ct->newreg);
5384                 reg = new_args[arg_ct->pair_index] - 1;
5385                 break;
5386 
5387             default:
5388                 g_assert_not_reached();
5389             }
5390             tcg_regset_set_reg(o_allocated_regs, reg);
5391             set_temp_val_reg(s, ts, reg);
5392             ts->mem_coherent = 0;
5393             new_args[i] = reg;
5394         }
5395     }
5396 
5397     /* emit instruction */
5398     TCGType type = TCGOP_TYPE(op);
5399     switch (op->opc) {
5400     case INDEX_op_ext_i32_i64:
5401         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5402         break;
5403     case INDEX_op_extu_i32_i64:
5404         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5405         break;
5406     case INDEX_op_extrl_i64_i32:
5407         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5408         break;
5409 
5410     case INDEX_op_add:
5411     case INDEX_op_and:
5412     case INDEX_op_andc:
5413     case INDEX_op_divs:
5414     case INDEX_op_divu:
5415     case INDEX_op_eqv:
5416     case INDEX_op_mul:
5417     case INDEX_op_mulsh:
5418     case INDEX_op_muluh:
5419     case INDEX_op_nand:
5420     case INDEX_op_nor:
5421     case INDEX_op_or:
5422     case INDEX_op_orc:
5423     case INDEX_op_rems:
5424     case INDEX_op_remu:
5425     case INDEX_op_shl:
5426     case INDEX_op_xor:
5427         {
5428             const TCGOutOpBinary *out =
5429                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5430 
5431             /* Constants should never appear in the first source operand. */
5432             tcg_debug_assert(!const_args[1]);
5433             if (const_args[2]) {
5434                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5435             } else {
5436                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5437             }
5438         }
5439         break;
5440 
5441     case INDEX_op_sub:
5442         {
5443             const TCGOutOpSubtract *out = &outop_sub;
5444 
5445             /*
5446              * Constants should never appear in the second source operand.
5447              * These are folded to add with negative constant.
5448              */
5449             tcg_debug_assert(!const_args[2]);
5450             if (const_args[1]) {
5451                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5452             } else {
5453                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5454             }
5455         }
5456         break;
5457 
5458     case INDEX_op_neg:
5459     case INDEX_op_not:
5460         {
5461             const TCGOutOpUnary *out =
5462                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5463 
5464             /* Constants should have been folded. */
5465             tcg_debug_assert(!const_args[1]);
5466             out->out_rr(s, type, new_args[0], new_args[1]);
5467         }
5468         break;
5469 
5470     case INDEX_op_divs2:
5471     case INDEX_op_divu2:
5472         {
5473             const TCGOutOpDivRem *out =
5474                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5475 
5476             /* Only used by x86 and s390x, which use matching constraints. */
5477             tcg_debug_assert(new_args[0] == new_args[2]);
5478             tcg_debug_assert(new_args[1] == new_args[3]);
5479             tcg_debug_assert(!const_args[4]);
5480             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5481         }
5482         break;
5483 
5484     default:
5485         if (def->flags & TCG_OPF_VECTOR) {
5486             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5487                            TCGOP_VECE(op), new_args, const_args);
5488         } else {
5489             tcg_out_op(s, op->opc, type, new_args, const_args);
5490         }
5491         break;
5492     }
5493 
5494     /* move the outputs in the correct register if needed */
5495     for(i = 0; i < nb_oargs; i++) {
5496         ts = arg_temp(op->args[i]);
5497 
5498         /* ENV should not be modified.  */
5499         tcg_debug_assert(!temp_readonly(ts));
5500 
5501         if (NEED_SYNC_ARG(i)) {
5502             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5503         } else if (IS_DEAD_ARG(i)) {
5504             temp_dead(s, ts);
5505         }
5506     }
5507 }
5508 
5509 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5510 {
5511     const TCGLifeData arg_life = op->life;
5512     TCGTemp *ots, *itsl, *itsh;
5513     TCGType vtype = TCGOP_TYPE(op);
5514 
5515     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5516     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5517     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5518 
5519     ots = arg_temp(op->args[0]);
5520     itsl = arg_temp(op->args[1]);
5521     itsh = arg_temp(op->args[2]);
5522 
5523     /* ENV should not be modified.  */
5524     tcg_debug_assert(!temp_readonly(ots));
5525 
5526     /* Allocate the output register now.  */
5527     if (ots->val_type != TEMP_VAL_REG) {
5528         TCGRegSet allocated_regs = s->reserved_regs;
5529         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5530         TCGReg oreg;
5531 
5532         /* Make sure to not spill the input registers. */
5533         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5534             tcg_regset_set_reg(allocated_regs, itsl->reg);
5535         }
5536         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5537             tcg_regset_set_reg(allocated_regs, itsh->reg);
5538         }
5539 
5540         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5541                              output_pref(op, 0), ots->indirect_base);
5542         set_temp_val_reg(s, ots, oreg);
5543     }
5544 
5545     /* Promote dup2 of immediates to dupi_vec. */
5546     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5547         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5548         MemOp vece = MO_64;
5549 
5550         if (val == dup_const(MO_8, val)) {
5551             vece = MO_8;
5552         } else if (val == dup_const(MO_16, val)) {
5553             vece = MO_16;
5554         } else if (val == dup_const(MO_32, val)) {
5555             vece = MO_32;
5556         }
5557 
5558         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5559         goto done;
5560     }
5561 
5562     /* If the two inputs form one 64-bit value, try dupm_vec. */
5563     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5564         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5565         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5566         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5567 
5568         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5569         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5570 
5571         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5572                              its->mem_base->reg, its->mem_offset)) {
5573             goto done;
5574         }
5575     }
5576 
5577     /* Fall back to generic expansion. */
5578     return false;
5579 
5580  done:
5581     ots->mem_coherent = 0;
5582     if (IS_DEAD_ARG(1)) {
5583         temp_dead(s, itsl);
5584     }
5585     if (IS_DEAD_ARG(2)) {
5586         temp_dead(s, itsh);
5587     }
5588     if (NEED_SYNC_ARG(0)) {
5589         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5590     } else if (IS_DEAD_ARG(0)) {
5591         temp_dead(s, ots);
5592     }
5593     return true;
5594 }
5595 
5596 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5597                          TCGRegSet allocated_regs)
5598 {
5599     if (ts->val_type == TEMP_VAL_REG) {
5600         if (ts->reg != reg) {
5601             tcg_reg_free(s, reg, allocated_regs);
5602             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5603                 /*
5604                  * Cross register class move not supported.  Sync the
5605                  * temp back to its slot and load from there.
5606                  */
5607                 temp_sync(s, ts, allocated_regs, 0, 0);
5608                 tcg_out_ld(s, ts->type, reg,
5609                            ts->mem_base->reg, ts->mem_offset);
5610             }
5611         }
5612     } else {
5613         TCGRegSet arg_set = 0;
5614 
5615         tcg_reg_free(s, reg, allocated_regs);
5616         tcg_regset_set_reg(arg_set, reg);
5617         temp_load(s, ts, arg_set, allocated_regs, 0);
5618     }
5619 }
5620 
5621 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5622                          TCGRegSet allocated_regs)
5623 {
5624     /*
5625      * When the destination is on the stack, load up the temp and store.
5626      * If there are many call-saved registers, the temp might live to
5627      * see another use; otherwise it'll be discarded.
5628      */
5629     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5630     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5631                arg_slot_stk_ofs(arg_slot));
5632 }
5633 
5634 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5635                             TCGTemp *ts, TCGRegSet *allocated_regs)
5636 {
5637     if (arg_slot_reg_p(l->arg_slot)) {
5638         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5639         load_arg_reg(s, reg, ts, *allocated_regs);
5640         tcg_regset_set_reg(*allocated_regs, reg);
5641     } else {
5642         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5643     }
5644 }
5645 
5646 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5647                          intptr_t ref_off, TCGRegSet *allocated_regs)
5648 {
5649     TCGReg reg;
5650 
5651     if (arg_slot_reg_p(arg_slot)) {
5652         reg = tcg_target_call_iarg_regs[arg_slot];
5653         tcg_reg_free(s, reg, *allocated_regs);
5654         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5655         tcg_regset_set_reg(*allocated_regs, reg);
5656     } else {
5657         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5658                             *allocated_regs, 0, false);
5659         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5660         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5661                    arg_slot_stk_ofs(arg_slot));
5662     }
5663 }
5664 
5665 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5666 {
5667     const int nb_oargs = TCGOP_CALLO(op);
5668     const int nb_iargs = TCGOP_CALLI(op);
5669     const TCGLifeData arg_life = op->life;
5670     const TCGHelperInfo *info = tcg_call_info(op);
5671     TCGRegSet allocated_regs = s->reserved_regs;
5672     int i;
5673 
5674     /*
5675      * Move inputs into place in reverse order,
5676      * so that we place stacked arguments first.
5677      */
5678     for (i = nb_iargs - 1; i >= 0; --i) {
5679         const TCGCallArgumentLoc *loc = &info->in[i];
5680         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5681 
5682         switch (loc->kind) {
5683         case TCG_CALL_ARG_NORMAL:
5684         case TCG_CALL_ARG_EXTEND_U:
5685         case TCG_CALL_ARG_EXTEND_S:
5686             load_arg_normal(s, loc, ts, &allocated_regs);
5687             break;
5688         case TCG_CALL_ARG_BY_REF:
5689             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5690             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5691                          arg_slot_stk_ofs(loc->ref_slot),
5692                          &allocated_regs);
5693             break;
5694         case TCG_CALL_ARG_BY_REF_N:
5695             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5696             break;
5697         default:
5698             g_assert_not_reached();
5699         }
5700     }
5701 
5702     /* Mark dead temporaries and free the associated registers.  */
5703     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5704         if (IS_DEAD_ARG(i)) {
5705             temp_dead(s, arg_temp(op->args[i]));
5706         }
5707     }
5708 
5709     /* Clobber call registers.  */
5710     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5711         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5712             tcg_reg_free(s, i, allocated_regs);
5713         }
5714     }
5715 
5716     /*
5717      * Save globals if they might be written by the helper,
5718      * sync them if they might be read.
5719      */
5720     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5721         /* Nothing to do */
5722     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5723         sync_globals(s, allocated_regs);
5724     } else {
5725         save_globals(s, allocated_regs);
5726     }
5727 
5728     /*
5729      * If the ABI passes a pointer to the returned struct as the first
5730      * argument, load that now.  Pass a pointer to the output home slot.
5731      */
5732     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5733         TCGTemp *ts = arg_temp(op->args[0]);
5734 
5735         if (!ts->mem_allocated) {
5736             temp_allocate_frame(s, ts);
5737         }
5738         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5739     }
5740 
5741     tcg_out_call(s, tcg_call_func(op), info);
5742 
5743     /* Assign output registers and emit moves if needed.  */
5744     switch (info->out_kind) {
5745     case TCG_CALL_RET_NORMAL:
5746         for (i = 0; i < nb_oargs; i++) {
5747             TCGTemp *ts = arg_temp(op->args[i]);
5748             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5749 
5750             /* ENV should not be modified.  */
5751             tcg_debug_assert(!temp_readonly(ts));
5752 
5753             set_temp_val_reg(s, ts, reg);
5754             ts->mem_coherent = 0;
5755         }
5756         break;
5757 
5758     case TCG_CALL_RET_BY_VEC:
5759         {
5760             TCGTemp *ts = arg_temp(op->args[0]);
5761 
5762             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5763             tcg_debug_assert(ts->temp_subindex == 0);
5764             if (!ts->mem_allocated) {
5765                 temp_allocate_frame(s, ts);
5766             }
5767             tcg_out_st(s, TCG_TYPE_V128,
5768                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5769                        ts->mem_base->reg, ts->mem_offset);
5770         }
5771         /* fall through to mark all parts in memory */
5772 
5773     case TCG_CALL_RET_BY_REF:
5774         /* The callee has performed a write through the reference. */
5775         for (i = 0; i < nb_oargs; i++) {
5776             TCGTemp *ts = arg_temp(op->args[i]);
5777             ts->val_type = TEMP_VAL_MEM;
5778         }
5779         break;
5780 
5781     default:
5782         g_assert_not_reached();
5783     }
5784 
5785     /* Flush or discard output registers as needed. */
5786     for (i = 0; i < nb_oargs; i++) {
5787         TCGTemp *ts = arg_temp(op->args[i]);
5788         if (NEED_SYNC_ARG(i)) {
5789             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5790         } else if (IS_DEAD_ARG(i)) {
5791             temp_dead(s, ts);
5792         }
5793     }
5794 }
5795 
5796 /**
5797  * atom_and_align_for_opc:
5798  * @s: tcg context
5799  * @opc: memory operation code
5800  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5801  * @allow_two_ops: true if we are prepared to issue two operations
5802  *
5803  * Return the alignment and atomicity to use for the inline fast path
5804  * for the given memory operation.  The alignment may be larger than
5805  * that specified in @opc, and the correct alignment will be diagnosed
5806  * by the slow path helper.
5807  *
5808  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5809  * and issue two loads or stores for subalignment.
5810  */
5811 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5812                                            MemOp host_atom, bool allow_two_ops)
5813 {
5814     MemOp align = memop_alignment_bits(opc);
5815     MemOp size = opc & MO_SIZE;
5816     MemOp half = size ? size - 1 : 0;
5817     MemOp atom = opc & MO_ATOM_MASK;
5818     MemOp atmax;
5819 
5820     switch (atom) {
5821     case MO_ATOM_NONE:
5822         /* The operation requires no specific atomicity. */
5823         atmax = MO_8;
5824         break;
5825 
5826     case MO_ATOM_IFALIGN:
5827         atmax = size;
5828         break;
5829 
5830     case MO_ATOM_IFALIGN_PAIR:
5831         atmax = half;
5832         break;
5833 
5834     case MO_ATOM_WITHIN16:
5835         atmax = size;
5836         if (size == MO_128) {
5837             /* Misalignment implies !within16, and therefore no atomicity. */
5838         } else if (host_atom != MO_ATOM_WITHIN16) {
5839             /* The host does not implement within16, so require alignment. */
5840             align = MAX(align, size);
5841         }
5842         break;
5843 
5844     case MO_ATOM_WITHIN16_PAIR:
5845         atmax = size;
5846         /*
5847          * Misalignment implies !within16, and therefore half atomicity.
5848          * Any host prepared for two operations can implement this with
5849          * half alignment.
5850          */
5851         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5852             align = MAX(align, half);
5853         }
5854         break;
5855 
5856     case MO_ATOM_SUBALIGN:
5857         atmax = size;
5858         if (host_atom != MO_ATOM_SUBALIGN) {
5859             /* If unaligned but not odd, there are subobjects up to half. */
5860             if (allow_two_ops) {
5861                 align = MAX(align, half);
5862             } else {
5863                 align = MAX(align, size);
5864             }
5865         }
5866         break;
5867 
5868     default:
5869         g_assert_not_reached();
5870     }
5871 
5872     return (TCGAtomAlign){ .atom = atmax, .align = align };
5873 }
5874 
5875 /*
5876  * Similarly for qemu_ld/st slow path helpers.
5877  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5878  * using only the provided backend tcg_out_* functions.
5879  */
5880 
5881 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5882 {
5883     int ofs = arg_slot_stk_ofs(slot);
5884 
5885     /*
5886      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5887      * require extension to uint64_t, adjust the address for uint32_t.
5888      */
5889     if (HOST_BIG_ENDIAN &&
5890         TCG_TARGET_REG_BITS == 64 &&
5891         type == TCG_TYPE_I32) {
5892         ofs += 4;
5893     }
5894     return ofs;
5895 }
5896 
5897 static void tcg_out_helper_load_slots(TCGContext *s,
5898                                       unsigned nmov, TCGMovExtend *mov,
5899                                       const TCGLdstHelperParam *parm)
5900 {
5901     unsigned i;
5902     TCGReg dst3;
5903 
5904     /*
5905      * Start from the end, storing to the stack first.
5906      * This frees those registers, so we need not consider overlap.
5907      */
5908     for (i = nmov; i-- > 0; ) {
5909         unsigned slot = mov[i].dst;
5910 
5911         if (arg_slot_reg_p(slot)) {
5912             goto found_reg;
5913         }
5914 
5915         TCGReg src = mov[i].src;
5916         TCGType dst_type = mov[i].dst_type;
5917         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5918 
5919         /* The argument is going onto the stack; extend into scratch. */
5920         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5921             tcg_debug_assert(parm->ntmp != 0);
5922             mov[i].dst = src = parm->tmp[0];
5923             tcg_out_movext1(s, &mov[i]);
5924         }
5925 
5926         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5927                    tcg_out_helper_stk_ofs(dst_type, slot));
5928     }
5929     return;
5930 
5931  found_reg:
5932     /*
5933      * The remaining arguments are in registers.
5934      * Convert slot numbers to argument registers.
5935      */
5936     nmov = i + 1;
5937     for (i = 0; i < nmov; ++i) {
5938         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5939     }
5940 
5941     switch (nmov) {
5942     case 4:
5943         /* The backend must have provided enough temps for the worst case. */
5944         tcg_debug_assert(parm->ntmp >= 2);
5945 
5946         dst3 = mov[3].dst;
5947         for (unsigned j = 0; j < 3; ++j) {
5948             if (dst3 == mov[j].src) {
5949                 /*
5950                  * Conflict. Copy the source to a temporary, perform the
5951                  * remaining moves, then the extension from our scratch
5952                  * on the way out.
5953                  */
5954                 TCGReg scratch = parm->tmp[1];
5955 
5956                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5957                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5958                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5959                 break;
5960             }
5961         }
5962 
5963         /* No conflicts: perform this move and continue. */
5964         tcg_out_movext1(s, &mov[3]);
5965         /* fall through */
5966 
5967     case 3:
5968         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5969                         parm->ntmp ? parm->tmp[0] : -1);
5970         break;
5971     case 2:
5972         tcg_out_movext2(s, mov, mov + 1,
5973                         parm->ntmp ? parm->tmp[0] : -1);
5974         break;
5975     case 1:
5976         tcg_out_movext1(s, mov);
5977         break;
5978     default:
5979         g_assert_not_reached();
5980     }
5981 }
5982 
5983 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5984                                     TCGType type, tcg_target_long imm,
5985                                     const TCGLdstHelperParam *parm)
5986 {
5987     if (arg_slot_reg_p(slot)) {
5988         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5989     } else {
5990         int ofs = tcg_out_helper_stk_ofs(type, slot);
5991         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5992             tcg_debug_assert(parm->ntmp != 0);
5993             tcg_out_movi(s, type, parm->tmp[0], imm);
5994             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5995         }
5996     }
5997 }
5998 
5999 static void tcg_out_helper_load_common_args(TCGContext *s,
6000                                             const TCGLabelQemuLdst *ldst,
6001                                             const TCGLdstHelperParam *parm,
6002                                             const TCGHelperInfo *info,
6003                                             unsigned next_arg)
6004 {
6005     TCGMovExtend ptr_mov = {
6006         .dst_type = TCG_TYPE_PTR,
6007         .src_type = TCG_TYPE_PTR,
6008         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6009     };
6010     const TCGCallArgumentLoc *loc = &info->in[0];
6011     TCGType type;
6012     unsigned slot;
6013     tcg_target_ulong imm;
6014 
6015     /*
6016      * Handle env, which is always first.
6017      */
6018     ptr_mov.dst = loc->arg_slot;
6019     ptr_mov.src = TCG_AREG0;
6020     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6021 
6022     /*
6023      * Handle oi.
6024      */
6025     imm = ldst->oi;
6026     loc = &info->in[next_arg];
6027     type = TCG_TYPE_I32;
6028     switch (loc->kind) {
6029     case TCG_CALL_ARG_NORMAL:
6030         break;
6031     case TCG_CALL_ARG_EXTEND_U:
6032     case TCG_CALL_ARG_EXTEND_S:
6033         /* No extension required for MemOpIdx. */
6034         tcg_debug_assert(imm <= INT32_MAX);
6035         type = TCG_TYPE_REG;
6036         break;
6037     default:
6038         g_assert_not_reached();
6039     }
6040     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6041     next_arg++;
6042 
6043     /*
6044      * Handle ra.
6045      */
6046     loc = &info->in[next_arg];
6047     slot = loc->arg_slot;
6048     if (parm->ra_gen) {
6049         int arg_reg = -1;
6050         TCGReg ra_reg;
6051 
6052         if (arg_slot_reg_p(slot)) {
6053             arg_reg = tcg_target_call_iarg_regs[slot];
6054         }
6055         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6056 
6057         ptr_mov.dst = slot;
6058         ptr_mov.src = ra_reg;
6059         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6060     } else {
6061         imm = (uintptr_t)ldst->raddr;
6062         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6063     }
6064 }
6065 
6066 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6067                                        const TCGCallArgumentLoc *loc,
6068                                        TCGType dst_type, TCGType src_type,
6069                                        TCGReg lo, TCGReg hi)
6070 {
6071     MemOp reg_mo;
6072 
6073     if (dst_type <= TCG_TYPE_REG) {
6074         MemOp src_ext;
6075 
6076         switch (loc->kind) {
6077         case TCG_CALL_ARG_NORMAL:
6078             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6079             break;
6080         case TCG_CALL_ARG_EXTEND_U:
6081             dst_type = TCG_TYPE_REG;
6082             src_ext = MO_UL;
6083             break;
6084         case TCG_CALL_ARG_EXTEND_S:
6085             dst_type = TCG_TYPE_REG;
6086             src_ext = MO_SL;
6087             break;
6088         default:
6089             g_assert_not_reached();
6090         }
6091 
6092         mov[0].dst = loc->arg_slot;
6093         mov[0].dst_type = dst_type;
6094         mov[0].src = lo;
6095         mov[0].src_type = src_type;
6096         mov[0].src_ext = src_ext;
6097         return 1;
6098     }
6099 
6100     if (TCG_TARGET_REG_BITS == 32) {
6101         assert(dst_type == TCG_TYPE_I64);
6102         reg_mo = MO_32;
6103     } else {
6104         assert(dst_type == TCG_TYPE_I128);
6105         reg_mo = MO_64;
6106     }
6107 
6108     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6109     mov[0].src = lo;
6110     mov[0].dst_type = TCG_TYPE_REG;
6111     mov[0].src_type = TCG_TYPE_REG;
6112     mov[0].src_ext = reg_mo;
6113 
6114     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6115     mov[1].src = hi;
6116     mov[1].dst_type = TCG_TYPE_REG;
6117     mov[1].src_type = TCG_TYPE_REG;
6118     mov[1].src_ext = reg_mo;
6119 
6120     return 2;
6121 }
6122 
6123 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6124                                    const TCGLdstHelperParam *parm)
6125 {
6126     const TCGHelperInfo *info;
6127     const TCGCallArgumentLoc *loc;
6128     TCGMovExtend mov[2];
6129     unsigned next_arg, nmov;
6130     MemOp mop = get_memop(ldst->oi);
6131 
6132     switch (mop & MO_SIZE) {
6133     case MO_8:
6134     case MO_16:
6135     case MO_32:
6136         info = &info_helper_ld32_mmu;
6137         break;
6138     case MO_64:
6139         info = &info_helper_ld64_mmu;
6140         break;
6141     case MO_128:
6142         info = &info_helper_ld128_mmu;
6143         break;
6144     default:
6145         g_assert_not_reached();
6146     }
6147 
6148     /* Defer env argument. */
6149     next_arg = 1;
6150 
6151     loc = &info->in[next_arg];
6152     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6153         /*
6154          * 32-bit host with 32-bit guest: zero-extend the guest address
6155          * to 64-bits for the helper by storing the low part, then
6156          * load a zero for the high part.
6157          */
6158         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6159                                TCG_TYPE_I32, TCG_TYPE_I32,
6160                                ldst->addr_reg, -1);
6161         tcg_out_helper_load_slots(s, 1, mov, parm);
6162 
6163         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6164                                 TCG_TYPE_I32, 0, parm);
6165         next_arg += 2;
6166     } else {
6167         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6168                                       ldst->addr_reg, -1);
6169         tcg_out_helper_load_slots(s, nmov, mov, parm);
6170         next_arg += nmov;
6171     }
6172 
6173     switch (info->out_kind) {
6174     case TCG_CALL_RET_NORMAL:
6175     case TCG_CALL_RET_BY_VEC:
6176         break;
6177     case TCG_CALL_RET_BY_REF:
6178         /*
6179          * The return reference is in the first argument slot.
6180          * We need memory in which to return: re-use the top of stack.
6181          */
6182         {
6183             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6184 
6185             if (arg_slot_reg_p(0)) {
6186                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6187                                  TCG_REG_CALL_STACK, ofs_slot0);
6188             } else {
6189                 tcg_debug_assert(parm->ntmp != 0);
6190                 tcg_out_addi_ptr(s, parm->tmp[0],
6191                                  TCG_REG_CALL_STACK, ofs_slot0);
6192                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6193                            TCG_REG_CALL_STACK, ofs_slot0);
6194             }
6195         }
6196         break;
6197     default:
6198         g_assert_not_reached();
6199     }
6200 
6201     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6202 }
6203 
6204 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6205                                   bool load_sign,
6206                                   const TCGLdstHelperParam *parm)
6207 {
6208     MemOp mop = get_memop(ldst->oi);
6209     TCGMovExtend mov[2];
6210     int ofs_slot0;
6211 
6212     switch (ldst->type) {
6213     case TCG_TYPE_I64:
6214         if (TCG_TARGET_REG_BITS == 32) {
6215             break;
6216         }
6217         /* fall through */
6218 
6219     case TCG_TYPE_I32:
6220         mov[0].dst = ldst->datalo_reg;
6221         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6222         mov[0].dst_type = ldst->type;
6223         mov[0].src_type = TCG_TYPE_REG;
6224 
6225         /*
6226          * If load_sign, then we allowed the helper to perform the
6227          * appropriate sign extension to tcg_target_ulong, and all
6228          * we need now is a plain move.
6229          *
6230          * If they do not, then we expect the relevant extension
6231          * instruction to be no more expensive than a move, and
6232          * we thus save the icache etc by only using one of two
6233          * helper functions.
6234          */
6235         if (load_sign || !(mop & MO_SIGN)) {
6236             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6237                 mov[0].src_ext = MO_32;
6238             } else {
6239                 mov[0].src_ext = MO_64;
6240             }
6241         } else {
6242             mov[0].src_ext = mop & MO_SSIZE;
6243         }
6244         tcg_out_movext1(s, mov);
6245         return;
6246 
6247     case TCG_TYPE_I128:
6248         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6249         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6250         switch (TCG_TARGET_CALL_RET_I128) {
6251         case TCG_CALL_RET_NORMAL:
6252             break;
6253         case TCG_CALL_RET_BY_VEC:
6254             tcg_out_st(s, TCG_TYPE_V128,
6255                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6256                        TCG_REG_CALL_STACK, ofs_slot0);
6257             /* fall through */
6258         case TCG_CALL_RET_BY_REF:
6259             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6260                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6261             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6262                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6263             return;
6264         default:
6265             g_assert_not_reached();
6266         }
6267         break;
6268 
6269     default:
6270         g_assert_not_reached();
6271     }
6272 
6273     mov[0].dst = ldst->datalo_reg;
6274     mov[0].src =
6275         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6276     mov[0].dst_type = TCG_TYPE_REG;
6277     mov[0].src_type = TCG_TYPE_REG;
6278     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6279 
6280     mov[1].dst = ldst->datahi_reg;
6281     mov[1].src =
6282         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6283     mov[1].dst_type = TCG_TYPE_REG;
6284     mov[1].src_type = TCG_TYPE_REG;
6285     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6286 
6287     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6288 }
6289 
6290 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6291                                    const TCGLdstHelperParam *parm)
6292 {
6293     const TCGHelperInfo *info;
6294     const TCGCallArgumentLoc *loc;
6295     TCGMovExtend mov[4];
6296     TCGType data_type;
6297     unsigned next_arg, nmov, n;
6298     MemOp mop = get_memop(ldst->oi);
6299 
6300     switch (mop & MO_SIZE) {
6301     case MO_8:
6302     case MO_16:
6303     case MO_32:
6304         info = &info_helper_st32_mmu;
6305         data_type = TCG_TYPE_I32;
6306         break;
6307     case MO_64:
6308         info = &info_helper_st64_mmu;
6309         data_type = TCG_TYPE_I64;
6310         break;
6311     case MO_128:
6312         info = &info_helper_st128_mmu;
6313         data_type = TCG_TYPE_I128;
6314         break;
6315     default:
6316         g_assert_not_reached();
6317     }
6318 
6319     /* Defer env argument. */
6320     next_arg = 1;
6321     nmov = 0;
6322 
6323     /* Handle addr argument. */
6324     loc = &info->in[next_arg];
6325     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6326     if (TCG_TARGET_REG_BITS == 32) {
6327         /*
6328          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6329          * to 64-bits for the helper by storing the low part.  Later,
6330          * after we have processed the register inputs, we will load a
6331          * zero for the high part.
6332          */
6333         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6334                                TCG_TYPE_I32, TCG_TYPE_I32,
6335                                ldst->addr_reg, -1);
6336         next_arg += 2;
6337         nmov += 1;
6338     } else {
6339         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6340                                    ldst->addr_reg, -1);
6341         next_arg += n;
6342         nmov += n;
6343     }
6344 
6345     /* Handle data argument. */
6346     loc = &info->in[next_arg];
6347     switch (loc->kind) {
6348     case TCG_CALL_ARG_NORMAL:
6349     case TCG_CALL_ARG_EXTEND_U:
6350     case TCG_CALL_ARG_EXTEND_S:
6351         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6352                                    ldst->datalo_reg, ldst->datahi_reg);
6353         next_arg += n;
6354         nmov += n;
6355         tcg_out_helper_load_slots(s, nmov, mov, parm);
6356         break;
6357 
6358     case TCG_CALL_ARG_BY_REF:
6359         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6360         tcg_debug_assert(data_type == TCG_TYPE_I128);
6361         tcg_out_st(s, TCG_TYPE_I64,
6362                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6363                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6364         tcg_out_st(s, TCG_TYPE_I64,
6365                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6366                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6367 
6368         tcg_out_helper_load_slots(s, nmov, mov, parm);
6369 
6370         if (arg_slot_reg_p(loc->arg_slot)) {
6371             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6372                              TCG_REG_CALL_STACK,
6373                              arg_slot_stk_ofs(loc->ref_slot));
6374         } else {
6375             tcg_debug_assert(parm->ntmp != 0);
6376             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6377                              arg_slot_stk_ofs(loc->ref_slot));
6378             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6379                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6380         }
6381         next_arg += 2;
6382         break;
6383 
6384     default:
6385         g_assert_not_reached();
6386     }
6387 
6388     if (TCG_TARGET_REG_BITS == 32) {
6389         /* Zero extend the address by loading a zero for the high part. */
6390         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6391         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6392     }
6393 
6394     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6395 }
6396 
6397 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6398 {
6399     int i, start_words, num_insns;
6400     TCGOp *op;
6401 
6402     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6403                  && qemu_log_in_addr_range(pc_start))) {
6404         FILE *logfile = qemu_log_trylock();
6405         if (logfile) {
6406             fprintf(logfile, "OP:\n");
6407             tcg_dump_ops(s, logfile, false);
6408             fprintf(logfile, "\n");
6409             qemu_log_unlock(logfile);
6410         }
6411     }
6412 
6413 #ifdef CONFIG_DEBUG_TCG
6414     /* Ensure all labels referenced have been emitted.  */
6415     {
6416         TCGLabel *l;
6417         bool error = false;
6418 
6419         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6420             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6421                 qemu_log_mask(CPU_LOG_TB_OP,
6422                               "$L%d referenced but not present.\n", l->id);
6423                 error = true;
6424             }
6425         }
6426         assert(!error);
6427     }
6428 #endif
6429 
6430     /* Do not reuse any EBB that may be allocated within the TB. */
6431     tcg_temp_ebb_reset_freed(s);
6432 
6433     tcg_optimize(s);
6434 
6435     reachable_code_pass(s);
6436     liveness_pass_0(s);
6437     liveness_pass_1(s);
6438 
6439     if (s->nb_indirects > 0) {
6440         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6441                      && qemu_log_in_addr_range(pc_start))) {
6442             FILE *logfile = qemu_log_trylock();
6443             if (logfile) {
6444                 fprintf(logfile, "OP before indirect lowering:\n");
6445                 tcg_dump_ops(s, logfile, false);
6446                 fprintf(logfile, "\n");
6447                 qemu_log_unlock(logfile);
6448             }
6449         }
6450 
6451         /* Replace indirect temps with direct temps.  */
6452         if (liveness_pass_2(s)) {
6453             /* If changes were made, re-run liveness.  */
6454             liveness_pass_1(s);
6455         }
6456     }
6457 
6458     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6459                  && qemu_log_in_addr_range(pc_start))) {
6460         FILE *logfile = qemu_log_trylock();
6461         if (logfile) {
6462             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6463             tcg_dump_ops(s, logfile, true);
6464             fprintf(logfile, "\n");
6465             qemu_log_unlock(logfile);
6466         }
6467     }
6468 
6469     /* Initialize goto_tb jump offsets. */
6470     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6471     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6472     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6473     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6474 
6475     tcg_reg_alloc_start(s);
6476 
6477     /*
6478      * Reset the buffer pointers when restarting after overflow.
6479      * TODO: Move this into translate-all.c with the rest of the
6480      * buffer management.  Having only this done here is confusing.
6481      */
6482     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6483     s->code_ptr = s->code_buf;
6484     s->data_gen_ptr = NULL;
6485 
6486     QSIMPLEQ_INIT(&s->ldst_labels);
6487     s->pool_labels = NULL;
6488 
6489     start_words = s->insn_start_words;
6490     s->gen_insn_data =
6491         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6492 
6493     tcg_out_tb_start(s);
6494 
6495     num_insns = -1;
6496     QTAILQ_FOREACH(op, &s->ops, link) {
6497         TCGOpcode opc = op->opc;
6498 
6499         switch (opc) {
6500         case INDEX_op_mov:
6501         case INDEX_op_mov_vec:
6502             tcg_reg_alloc_mov(s, op);
6503             break;
6504         case INDEX_op_dup_vec:
6505             tcg_reg_alloc_dup(s, op);
6506             break;
6507         case INDEX_op_insn_start:
6508             if (num_insns >= 0) {
6509                 size_t off = tcg_current_code_size(s);
6510                 s->gen_insn_end_off[num_insns] = off;
6511                 /* Assert that we do not overflow our stored offset.  */
6512                 assert(s->gen_insn_end_off[num_insns] == off);
6513             }
6514             num_insns++;
6515             for (i = 0; i < start_words; ++i) {
6516                 s->gen_insn_data[num_insns * start_words + i] =
6517                     tcg_get_insn_start_param(op, i);
6518             }
6519             break;
6520         case INDEX_op_discard:
6521             temp_dead(s, arg_temp(op->args[0]));
6522             break;
6523         case INDEX_op_set_label:
6524             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6525             tcg_out_label(s, arg_label(op->args[0]));
6526             break;
6527         case INDEX_op_call:
6528             tcg_reg_alloc_call(s, op);
6529             break;
6530         case INDEX_op_exit_tb:
6531             tcg_out_exit_tb(s, op->args[0]);
6532             break;
6533         case INDEX_op_goto_tb:
6534             tcg_out_goto_tb(s, op->args[0]);
6535             break;
6536         case INDEX_op_dup2_vec:
6537             if (tcg_reg_alloc_dup2(s, op)) {
6538                 break;
6539             }
6540             /* fall through */
6541         default:
6542             /* Sanity check that we've not introduced any unhandled opcodes. */
6543             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6544                                               TCGOP_FLAGS(op)));
6545             /* Note: in order to speed up the code, it would be much
6546                faster to have specialized register allocator functions for
6547                some common argument patterns */
6548             tcg_reg_alloc_op(s, op);
6549             break;
6550         }
6551         /* Test for (pending) buffer overflow.  The assumption is that any
6552            one operation beginning below the high water mark cannot overrun
6553            the buffer completely.  Thus we can test for overflow after
6554            generating code without having to check during generation.  */
6555         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6556             return -1;
6557         }
6558         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6559         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6560             return -2;
6561         }
6562     }
6563     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6564     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6565 
6566     /* Generate TB finalization at the end of block */
6567     i = tcg_out_ldst_finalize(s);
6568     if (i < 0) {
6569         return i;
6570     }
6571     i = tcg_out_pool_finalize(s);
6572     if (i < 0) {
6573         return i;
6574     }
6575     if (!tcg_resolve_relocs(s)) {
6576         return -2;
6577     }
6578 
6579 #ifndef CONFIG_TCG_INTERPRETER
6580     /* flush instruction cache */
6581     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6582                         (uintptr_t)s->code_buf,
6583                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6584 #endif
6585 
6586     return tcg_current_code_size(s);
6587 }
6588 
6589 #ifdef ELF_HOST_MACHINE
6590 /* In order to use this feature, the backend needs to do three things:
6591 
6592    (1) Define ELF_HOST_MACHINE to indicate both what value to
6593        put into the ELF image and to indicate support for the feature.
6594 
6595    (2) Define tcg_register_jit.  This should create a buffer containing
6596        the contents of a .debug_frame section that describes the post-
6597        prologue unwind info for the tcg machine.
6598 
6599    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6600 */
6601 
6602 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6603 typedef enum {
6604     JIT_NOACTION = 0,
6605     JIT_REGISTER_FN,
6606     JIT_UNREGISTER_FN
6607 } jit_actions_t;
6608 
6609 struct jit_code_entry {
6610     struct jit_code_entry *next_entry;
6611     struct jit_code_entry *prev_entry;
6612     const void *symfile_addr;
6613     uint64_t symfile_size;
6614 };
6615 
6616 struct jit_descriptor {
6617     uint32_t version;
6618     uint32_t action_flag;
6619     struct jit_code_entry *relevant_entry;
6620     struct jit_code_entry *first_entry;
6621 };
6622 
6623 void __jit_debug_register_code(void) __attribute__((noinline));
6624 void __jit_debug_register_code(void)
6625 {
6626     asm("");
6627 }
6628 
6629 /* Must statically initialize the version, because GDB may check
6630    the version before we can set it.  */
6631 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6632 
6633 /* End GDB interface.  */
6634 
6635 static int find_string(const char *strtab, const char *str)
6636 {
6637     const char *p = strtab + 1;
6638 
6639     while (1) {
6640         if (strcmp(p, str) == 0) {
6641             return p - strtab;
6642         }
6643         p += strlen(p) + 1;
6644     }
6645 }
6646 
6647 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6648                                  const void *debug_frame,
6649                                  size_t debug_frame_size)
6650 {
6651     struct __attribute__((packed)) DebugInfo {
6652         uint32_t  len;
6653         uint16_t  version;
6654         uint32_t  abbrev;
6655         uint8_t   ptr_size;
6656         uint8_t   cu_die;
6657         uint16_t  cu_lang;
6658         uintptr_t cu_low_pc;
6659         uintptr_t cu_high_pc;
6660         uint8_t   fn_die;
6661         char      fn_name[16];
6662         uintptr_t fn_low_pc;
6663         uintptr_t fn_high_pc;
6664         uint8_t   cu_eoc;
6665     };
6666 
6667     struct ElfImage {
6668         ElfW(Ehdr) ehdr;
6669         ElfW(Phdr) phdr;
6670         ElfW(Shdr) shdr[7];
6671         ElfW(Sym)  sym[2];
6672         struct DebugInfo di;
6673         uint8_t    da[24];
6674         char       str[80];
6675     };
6676 
6677     struct ElfImage *img;
6678 
6679     static const struct ElfImage img_template = {
6680         .ehdr = {
6681             .e_ident[EI_MAG0] = ELFMAG0,
6682             .e_ident[EI_MAG1] = ELFMAG1,
6683             .e_ident[EI_MAG2] = ELFMAG2,
6684             .e_ident[EI_MAG3] = ELFMAG3,
6685             .e_ident[EI_CLASS] = ELF_CLASS,
6686             .e_ident[EI_DATA] = ELF_DATA,
6687             .e_ident[EI_VERSION] = EV_CURRENT,
6688             .e_type = ET_EXEC,
6689             .e_machine = ELF_HOST_MACHINE,
6690             .e_version = EV_CURRENT,
6691             .e_phoff = offsetof(struct ElfImage, phdr),
6692             .e_shoff = offsetof(struct ElfImage, shdr),
6693             .e_ehsize = sizeof(ElfW(Shdr)),
6694             .e_phentsize = sizeof(ElfW(Phdr)),
6695             .e_phnum = 1,
6696             .e_shentsize = sizeof(ElfW(Shdr)),
6697             .e_shnum = ARRAY_SIZE(img->shdr),
6698             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6699 #ifdef ELF_HOST_FLAGS
6700             .e_flags = ELF_HOST_FLAGS,
6701 #endif
6702 #ifdef ELF_OSABI
6703             .e_ident[EI_OSABI] = ELF_OSABI,
6704 #endif
6705         },
6706         .phdr = {
6707             .p_type = PT_LOAD,
6708             .p_flags = PF_X,
6709         },
6710         .shdr = {
6711             [0] = { .sh_type = SHT_NULL },
6712             /* Trick: The contents of code_gen_buffer are not present in
6713                this fake ELF file; that got allocated elsewhere.  Therefore
6714                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6715                will not look for contents.  We can record any address.  */
6716             [1] = { /* .text */
6717                 .sh_type = SHT_NOBITS,
6718                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6719             },
6720             [2] = { /* .debug_info */
6721                 .sh_type = SHT_PROGBITS,
6722                 .sh_offset = offsetof(struct ElfImage, di),
6723                 .sh_size = sizeof(struct DebugInfo),
6724             },
6725             [3] = { /* .debug_abbrev */
6726                 .sh_type = SHT_PROGBITS,
6727                 .sh_offset = offsetof(struct ElfImage, da),
6728                 .sh_size = sizeof(img->da),
6729             },
6730             [4] = { /* .debug_frame */
6731                 .sh_type = SHT_PROGBITS,
6732                 .sh_offset = sizeof(struct ElfImage),
6733             },
6734             [5] = { /* .symtab */
6735                 .sh_type = SHT_SYMTAB,
6736                 .sh_offset = offsetof(struct ElfImage, sym),
6737                 .sh_size = sizeof(img->sym),
6738                 .sh_info = 1,
6739                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6740                 .sh_entsize = sizeof(ElfW(Sym)),
6741             },
6742             [6] = { /* .strtab */
6743                 .sh_type = SHT_STRTAB,
6744                 .sh_offset = offsetof(struct ElfImage, str),
6745                 .sh_size = sizeof(img->str),
6746             }
6747         },
6748         .sym = {
6749             [1] = { /* code_gen_buffer */
6750                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6751                 .st_shndx = 1,
6752             }
6753         },
6754         .di = {
6755             .len = sizeof(struct DebugInfo) - 4,
6756             .version = 2,
6757             .ptr_size = sizeof(void *),
6758             .cu_die = 1,
6759             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6760             .fn_die = 2,
6761             .fn_name = "code_gen_buffer"
6762         },
6763         .da = {
6764             1,          /* abbrev number (the cu) */
6765             0x11, 1,    /* DW_TAG_compile_unit, has children */
6766             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6767             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6768             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6769             0, 0,       /* end of abbrev */
6770             2,          /* abbrev number (the fn) */
6771             0x2e, 0,    /* DW_TAG_subprogram, no children */
6772             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6773             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6774             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6775             0, 0,       /* end of abbrev */
6776             0           /* no more abbrev */
6777         },
6778         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6779                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6780     };
6781 
6782     /* We only need a single jit entry; statically allocate it.  */
6783     static struct jit_code_entry one_entry;
6784 
6785     uintptr_t buf = (uintptr_t)buf_ptr;
6786     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6787     DebugFrameHeader *dfh;
6788 
6789     img = g_malloc(img_size);
6790     *img = img_template;
6791 
6792     img->phdr.p_vaddr = buf;
6793     img->phdr.p_paddr = buf;
6794     img->phdr.p_memsz = buf_size;
6795 
6796     img->shdr[1].sh_name = find_string(img->str, ".text");
6797     img->shdr[1].sh_addr = buf;
6798     img->shdr[1].sh_size = buf_size;
6799 
6800     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6801     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6802 
6803     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6804     img->shdr[4].sh_size = debug_frame_size;
6805 
6806     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6807     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6808 
6809     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6810     img->sym[1].st_value = buf;
6811     img->sym[1].st_size = buf_size;
6812 
6813     img->di.cu_low_pc = buf;
6814     img->di.cu_high_pc = buf + buf_size;
6815     img->di.fn_low_pc = buf;
6816     img->di.fn_high_pc = buf + buf_size;
6817 
6818     dfh = (DebugFrameHeader *)(img + 1);
6819     memcpy(dfh, debug_frame, debug_frame_size);
6820     dfh->fde.func_start = buf;
6821     dfh->fde.func_len = buf_size;
6822 
6823 #ifdef DEBUG_JIT
6824     /* Enable this block to be able to debug the ELF image file creation.
6825        One can use readelf, objdump, or other inspection utilities.  */
6826     {
6827         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6828         FILE *f = fopen(jit, "w+b");
6829         if (f) {
6830             if (fwrite(img, img_size, 1, f) != img_size) {
6831                 /* Avoid stupid unused return value warning for fwrite.  */
6832             }
6833             fclose(f);
6834         }
6835     }
6836 #endif
6837 
6838     one_entry.symfile_addr = img;
6839     one_entry.symfile_size = img_size;
6840 
6841     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6842     __jit_debug_descriptor.relevant_entry = &one_entry;
6843     __jit_debug_descriptor.first_entry = &one_entry;
6844     __jit_debug_register_code();
6845 }
6846 #else
6847 /* No support for the feature.  Provide the entry point expected by exec.c,
6848    and implement the internal function we declared earlier.  */
6849 
6850 static void tcg_register_jit_int(const void *buf, size_t size,
6851                                  const void *debug_frame,
6852                                  size_t debug_frame_size)
6853 {
6854 }
6855 
6856 void tcg_register_jit(const void *buf, size_t buf_size)
6857 {
6858 }
6859 #endif /* ELF_HOST_MACHINE */
6860 
6861 #if !TCG_TARGET_MAYBE_vec
6862 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6863 {
6864     g_assert_not_reached();
6865 }
6866 #endif
6867