xref: /openbmc/qemu/tcg/tcg.c (revision a363e1e179445102d7940e92d394d6c00c126f13)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpDivRem {
990     TCGOutOp base;
991     void (*out_rr01r)(TCGContext *s, TCGType type,
992                       TCGReg a0, TCGReg a1, TCGReg a4);
993 } TCGOutOpDivRem;
994 
995 typedef struct TCGOutOpMul2 {
996     TCGOutOp base;
997     void (*out_rrrr)(TCGContext *s, TCGType type,
998                      TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3);
999 } TCGOutOpMul2;
1000 
1001 typedef struct TCGOutOpUnary {
1002     TCGOutOp base;
1003     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
1004 } TCGOutOpUnary;
1005 
1006 typedef struct TCGOutOpSetcond {
1007     TCGOutOp base;
1008     void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond,
1009                     TCGReg ret, TCGReg a1, TCGReg a2);
1010     void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond,
1011                     TCGReg ret, TCGReg a1, tcg_target_long a2);
1012 } TCGOutOpSetcond;
1013 
1014 typedef struct TCGOutOpSubtract {
1015     TCGOutOp base;
1016     void (*out_rrr)(TCGContext *s, TCGType type,
1017                     TCGReg a0, TCGReg a1, TCGReg a2);
1018     void (*out_rir)(TCGContext *s, TCGType type,
1019                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1020 } TCGOutOpSubtract;
1021 
1022 #include "tcg-target.c.inc"
1023 
1024 #ifndef CONFIG_TCG_INTERPRETER
1025 /* Validate CPUTLBDescFast placement. */
1026 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1027                         sizeof(CPUNegativeOffsetState))
1028                   < MIN_TLB_MASK_TABLE_OFS);
1029 #endif
1030 
1031 /*
1032  * Register V as the TCGOutOp for O.
1033  * This verifies that V is of type T, otherwise give a nice compiler error.
1034  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1035  */
1036 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1037 
1038 /* Register allocation descriptions for every TCGOpcode. */
1039 static const TCGOutOp * const all_outop[NB_OPS] = {
1040     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1041     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1042     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1043     OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz),
1044     OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop),
1045     OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz),
1046     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1047     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1048     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1049     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1050     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1051     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1052     OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2),
1053     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1054     OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2),
1055     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1056     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1057     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1058     OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond),
1059     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1060     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1061     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1062     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1063     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1064     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1065     OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl),
1066     OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr),
1067     OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1068     OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond),
1069     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1070     OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1071     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1072     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1073 };
1074 
1075 #undef OUTOP
1076 
1077 /*
1078  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1079  * and registered the target's TCG globals) must register with this function
1080  * before initiating translation.
1081  *
1082  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1083  * of tcg_region_init() for the reasoning behind this.
1084  *
1085  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1086  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1087  * is not used anymore for translation once this function is called.
1088  *
1089  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1090  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1091  * modes.
1092  */
1093 #ifdef CONFIG_USER_ONLY
1094 void tcg_register_thread(void)
1095 {
1096     tcg_ctx = &tcg_init_ctx;
1097 }
1098 #else
1099 void tcg_register_thread(void)
1100 {
1101     TCGContext *s = g_malloc(sizeof(*s));
1102     unsigned int i, n;
1103 
1104     *s = tcg_init_ctx;
1105 
1106     /* Relink mem_base.  */
1107     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1108         if (tcg_init_ctx.temps[i].mem_base) {
1109             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1110             tcg_debug_assert(b >= 0 && b < n);
1111             s->temps[i].mem_base = &s->temps[b];
1112         }
1113     }
1114 
1115     /* Claim an entry in tcg_ctxs */
1116     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1117     g_assert(n < tcg_max_ctxs);
1118     qatomic_set(&tcg_ctxs[n], s);
1119 
1120     if (n > 0) {
1121         tcg_region_initial_alloc(s);
1122     }
1123 
1124     tcg_ctx = s;
1125 }
1126 #endif /* !CONFIG_USER_ONLY */
1127 
1128 /* pool based memory allocation */
1129 void *tcg_malloc_internal(TCGContext *s, int size)
1130 {
1131     TCGPool *p;
1132     int pool_size;
1133 
1134     if (size > TCG_POOL_CHUNK_SIZE) {
1135         /* big malloc: insert a new pool (XXX: could optimize) */
1136         p = g_malloc(sizeof(TCGPool) + size);
1137         p->size = size;
1138         p->next = s->pool_first_large;
1139         s->pool_first_large = p;
1140         return p->data;
1141     } else {
1142         p = s->pool_current;
1143         if (!p) {
1144             p = s->pool_first;
1145             if (!p)
1146                 goto new_pool;
1147         } else {
1148             if (!p->next) {
1149             new_pool:
1150                 pool_size = TCG_POOL_CHUNK_SIZE;
1151                 p = g_malloc(sizeof(TCGPool) + pool_size);
1152                 p->size = pool_size;
1153                 p->next = NULL;
1154                 if (s->pool_current) {
1155                     s->pool_current->next = p;
1156                 } else {
1157                     s->pool_first = p;
1158                 }
1159             } else {
1160                 p = p->next;
1161             }
1162         }
1163     }
1164     s->pool_current = p;
1165     s->pool_cur = p->data + size;
1166     s->pool_end = p->data + p->size;
1167     return p->data;
1168 }
1169 
1170 void tcg_pool_reset(TCGContext *s)
1171 {
1172     TCGPool *p, *t;
1173     for (p = s->pool_first_large; p; p = t) {
1174         t = p->next;
1175         g_free(p);
1176     }
1177     s->pool_first_large = NULL;
1178     s->pool_cur = s->pool_end = NULL;
1179     s->pool_current = NULL;
1180 }
1181 
1182 /*
1183  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1184  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1185  * We only use these for layout in tcg_out_ld_helper_ret and
1186  * tcg_out_st_helper_args, and share them between several of
1187  * the helpers, with the end result that it's easier to build manually.
1188  */
1189 
1190 #if TCG_TARGET_REG_BITS == 32
1191 # define dh_typecode_ttl  dh_typecode_i32
1192 #else
1193 # define dh_typecode_ttl  dh_typecode_i64
1194 #endif
1195 
1196 static TCGHelperInfo info_helper_ld32_mmu = {
1197     .flags = TCG_CALL_NO_WG,
1198     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1199               | dh_typemask(env, 1)
1200               | dh_typemask(i64, 2)  /* uint64_t addr */
1201               | dh_typemask(i32, 3)  /* unsigned oi */
1202               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1203 };
1204 
1205 static TCGHelperInfo info_helper_ld64_mmu = {
1206     .flags = TCG_CALL_NO_WG,
1207     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1208               | dh_typemask(env, 1)
1209               | dh_typemask(i64, 2)  /* uint64_t addr */
1210               | dh_typemask(i32, 3)  /* unsigned oi */
1211               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1212 };
1213 
1214 static TCGHelperInfo info_helper_ld128_mmu = {
1215     .flags = TCG_CALL_NO_WG,
1216     .typemask = dh_typemask(i128, 0) /* return Int128 */
1217               | dh_typemask(env, 1)
1218               | dh_typemask(i64, 2)  /* uint64_t addr */
1219               | dh_typemask(i32, 3)  /* unsigned oi */
1220               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1221 };
1222 
1223 static TCGHelperInfo info_helper_st32_mmu = {
1224     .flags = TCG_CALL_NO_WG,
1225     .typemask = dh_typemask(void, 0)
1226               | dh_typemask(env, 1)
1227               | dh_typemask(i64, 2)  /* uint64_t addr */
1228               | dh_typemask(i32, 3)  /* uint32_t data */
1229               | dh_typemask(i32, 4)  /* unsigned oi */
1230               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1231 };
1232 
1233 static TCGHelperInfo info_helper_st64_mmu = {
1234     .flags = TCG_CALL_NO_WG,
1235     .typemask = dh_typemask(void, 0)
1236               | dh_typemask(env, 1)
1237               | dh_typemask(i64, 2)  /* uint64_t addr */
1238               | dh_typemask(i64, 3)  /* uint64_t data */
1239               | dh_typemask(i32, 4)  /* unsigned oi */
1240               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1241 };
1242 
1243 static TCGHelperInfo info_helper_st128_mmu = {
1244     .flags = TCG_CALL_NO_WG,
1245     .typemask = dh_typemask(void, 0)
1246               | dh_typemask(env, 1)
1247               | dh_typemask(i64, 2)  /* uint64_t addr */
1248               | dh_typemask(i128, 3) /* Int128 data */
1249               | dh_typemask(i32, 4)  /* unsigned oi */
1250               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1251 };
1252 
1253 #ifdef CONFIG_TCG_INTERPRETER
1254 static ffi_type *typecode_to_ffi(int argmask)
1255 {
1256     /*
1257      * libffi does not support __int128_t, so we have forced Int128
1258      * to use the structure definition instead of the builtin type.
1259      */
1260     static ffi_type *ffi_type_i128_elements[3] = {
1261         &ffi_type_uint64,
1262         &ffi_type_uint64,
1263         NULL
1264     };
1265     static ffi_type ffi_type_i128 = {
1266         .size = 16,
1267         .alignment = __alignof__(Int128),
1268         .type = FFI_TYPE_STRUCT,
1269         .elements = ffi_type_i128_elements,
1270     };
1271 
1272     switch (argmask) {
1273     case dh_typecode_void:
1274         return &ffi_type_void;
1275     case dh_typecode_i32:
1276         return &ffi_type_uint32;
1277     case dh_typecode_s32:
1278         return &ffi_type_sint32;
1279     case dh_typecode_i64:
1280         return &ffi_type_uint64;
1281     case dh_typecode_s64:
1282         return &ffi_type_sint64;
1283     case dh_typecode_ptr:
1284         return &ffi_type_pointer;
1285     case dh_typecode_i128:
1286         return &ffi_type_i128;
1287     }
1288     g_assert_not_reached();
1289 }
1290 
1291 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1292 {
1293     unsigned typemask = info->typemask;
1294     struct {
1295         ffi_cif cif;
1296         ffi_type *args[];
1297     } *ca;
1298     ffi_status status;
1299     int nargs;
1300 
1301     /* Ignoring the return type, find the last non-zero field. */
1302     nargs = 32 - clz32(typemask >> 3);
1303     nargs = DIV_ROUND_UP(nargs, 3);
1304     assert(nargs <= MAX_CALL_IARGS);
1305 
1306     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1307     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1308     ca->cif.nargs = nargs;
1309 
1310     if (nargs != 0) {
1311         ca->cif.arg_types = ca->args;
1312         for (int j = 0; j < nargs; ++j) {
1313             int typecode = extract32(typemask, (j + 1) * 3, 3);
1314             ca->args[j] = typecode_to_ffi(typecode);
1315         }
1316     }
1317 
1318     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1319                           ca->cif.rtype, ca->cif.arg_types);
1320     assert(status == FFI_OK);
1321 
1322     return &ca->cif;
1323 }
1324 
1325 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1326 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1327 #else
1328 #define HELPER_INFO_INIT(I)      (&(I)->init)
1329 #define HELPER_INFO_INIT_VAL(I)  1
1330 #endif /* CONFIG_TCG_INTERPRETER */
1331 
1332 static inline bool arg_slot_reg_p(unsigned arg_slot)
1333 {
1334     /*
1335      * Split the sizeof away from the comparison to avoid Werror from
1336      * "unsigned < 0 is always false", when iarg_regs is empty.
1337      */
1338     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1339     return arg_slot < nreg;
1340 }
1341 
1342 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1343 {
1344     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1345     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1346 
1347     tcg_debug_assert(stk_slot < max);
1348     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1349 }
1350 
1351 typedef struct TCGCumulativeArgs {
1352     int arg_idx;                /* tcg_gen_callN args[] */
1353     int info_in_idx;            /* TCGHelperInfo in[] */
1354     int arg_slot;               /* regs+stack slot */
1355     int ref_slot;               /* stack slots for references */
1356 } TCGCumulativeArgs;
1357 
1358 static void layout_arg_even(TCGCumulativeArgs *cum)
1359 {
1360     cum->arg_slot += cum->arg_slot & 1;
1361 }
1362 
1363 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1364                          TCGCallArgumentKind kind)
1365 {
1366     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1367 
1368     *loc = (TCGCallArgumentLoc){
1369         .kind = kind,
1370         .arg_idx = cum->arg_idx,
1371         .arg_slot = cum->arg_slot,
1372     };
1373     cum->info_in_idx++;
1374     cum->arg_slot++;
1375 }
1376 
1377 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1378                                 TCGHelperInfo *info, int n)
1379 {
1380     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1381 
1382     for (int i = 0; i < n; ++i) {
1383         /* Layout all using the same arg_idx, adjusting the subindex. */
1384         loc[i] = (TCGCallArgumentLoc){
1385             .kind = TCG_CALL_ARG_NORMAL,
1386             .arg_idx = cum->arg_idx,
1387             .tmp_subindex = i,
1388             .arg_slot = cum->arg_slot + i,
1389         };
1390     }
1391     cum->info_in_idx += n;
1392     cum->arg_slot += n;
1393 }
1394 
1395 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1396 {
1397     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1398     int n = 128 / TCG_TARGET_REG_BITS;
1399 
1400     /* The first subindex carries the pointer. */
1401     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1402 
1403     /*
1404      * The callee is allowed to clobber memory associated with
1405      * structure pass by-reference.  Therefore we must make copies.
1406      * Allocate space from "ref_slot", which will be adjusted to
1407      * follow the parameters on the stack.
1408      */
1409     loc[0].ref_slot = cum->ref_slot;
1410 
1411     /*
1412      * Subsequent words also go into the reference slot, but
1413      * do not accumulate into the regular arguments.
1414      */
1415     for (int i = 1; i < n; ++i) {
1416         loc[i] = (TCGCallArgumentLoc){
1417             .kind = TCG_CALL_ARG_BY_REF_N,
1418             .arg_idx = cum->arg_idx,
1419             .tmp_subindex = i,
1420             .ref_slot = cum->ref_slot + i,
1421         };
1422     }
1423     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1424     cum->ref_slot += n;
1425 }
1426 
1427 static void init_call_layout(TCGHelperInfo *info)
1428 {
1429     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1430     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1431     unsigned typemask = info->typemask;
1432     unsigned typecode;
1433     TCGCumulativeArgs cum = { };
1434 
1435     /*
1436      * Parse and place any function return value.
1437      */
1438     typecode = typemask & 7;
1439     switch (typecode) {
1440     case dh_typecode_void:
1441         info->nr_out = 0;
1442         break;
1443     case dh_typecode_i32:
1444     case dh_typecode_s32:
1445     case dh_typecode_ptr:
1446         info->nr_out = 1;
1447         info->out_kind = TCG_CALL_RET_NORMAL;
1448         break;
1449     case dh_typecode_i64:
1450     case dh_typecode_s64:
1451         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1452         info->out_kind = TCG_CALL_RET_NORMAL;
1453         /* Query the last register now to trigger any assert early. */
1454         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1455         break;
1456     case dh_typecode_i128:
1457         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1458         info->out_kind = TCG_TARGET_CALL_RET_I128;
1459         switch (TCG_TARGET_CALL_RET_I128) {
1460         case TCG_CALL_RET_NORMAL:
1461             /* Query the last register now to trigger any assert early. */
1462             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1463             break;
1464         case TCG_CALL_RET_BY_VEC:
1465             /* Query the single register now to trigger any assert early. */
1466             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1467             break;
1468         case TCG_CALL_RET_BY_REF:
1469             /*
1470              * Allocate the first argument to the output.
1471              * We don't need to store this anywhere, just make it
1472              * unavailable for use in the input loop below.
1473              */
1474             cum.arg_slot = 1;
1475             break;
1476         default:
1477             qemu_build_not_reached();
1478         }
1479         break;
1480     default:
1481         g_assert_not_reached();
1482     }
1483 
1484     /*
1485      * Parse and place function arguments.
1486      */
1487     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1488         TCGCallArgumentKind kind;
1489         TCGType type;
1490 
1491         typecode = typemask & 7;
1492         switch (typecode) {
1493         case dh_typecode_i32:
1494         case dh_typecode_s32:
1495             type = TCG_TYPE_I32;
1496             break;
1497         case dh_typecode_i64:
1498         case dh_typecode_s64:
1499             type = TCG_TYPE_I64;
1500             break;
1501         case dh_typecode_ptr:
1502             type = TCG_TYPE_PTR;
1503             break;
1504         case dh_typecode_i128:
1505             type = TCG_TYPE_I128;
1506             break;
1507         default:
1508             g_assert_not_reached();
1509         }
1510 
1511         switch (type) {
1512         case TCG_TYPE_I32:
1513             switch (TCG_TARGET_CALL_ARG_I32) {
1514             case TCG_CALL_ARG_EVEN:
1515                 layout_arg_even(&cum);
1516                 /* fall through */
1517             case TCG_CALL_ARG_NORMAL:
1518                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1519                 break;
1520             case TCG_CALL_ARG_EXTEND:
1521                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1522                 layout_arg_1(&cum, info, kind);
1523                 break;
1524             default:
1525                 qemu_build_not_reached();
1526             }
1527             break;
1528 
1529         case TCG_TYPE_I64:
1530             switch (TCG_TARGET_CALL_ARG_I64) {
1531             case TCG_CALL_ARG_EVEN:
1532                 layout_arg_even(&cum);
1533                 /* fall through */
1534             case TCG_CALL_ARG_NORMAL:
1535                 if (TCG_TARGET_REG_BITS == 32) {
1536                     layout_arg_normal_n(&cum, info, 2);
1537                 } else {
1538                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1539                 }
1540                 break;
1541             default:
1542                 qemu_build_not_reached();
1543             }
1544             break;
1545 
1546         case TCG_TYPE_I128:
1547             switch (TCG_TARGET_CALL_ARG_I128) {
1548             case TCG_CALL_ARG_EVEN:
1549                 layout_arg_even(&cum);
1550                 /* fall through */
1551             case TCG_CALL_ARG_NORMAL:
1552                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1553                 break;
1554             case TCG_CALL_ARG_BY_REF:
1555                 layout_arg_by_ref(&cum, info);
1556                 break;
1557             default:
1558                 qemu_build_not_reached();
1559             }
1560             break;
1561 
1562         default:
1563             g_assert_not_reached();
1564         }
1565     }
1566     info->nr_in = cum.info_in_idx;
1567 
1568     /* Validate that we didn't overrun the input array. */
1569     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1570     /* Validate the backend has enough argument space. */
1571     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1572 
1573     /*
1574      * Relocate the "ref_slot" area to the end of the parameters.
1575      * Minimizing this stack offset helps code size for x86,
1576      * which has a signed 8-bit offset encoding.
1577      */
1578     if (cum.ref_slot != 0) {
1579         int ref_base = 0;
1580 
1581         if (cum.arg_slot > max_reg_slots) {
1582             int align = __alignof(Int128) / sizeof(tcg_target_long);
1583 
1584             ref_base = cum.arg_slot - max_reg_slots;
1585             if (align > 1) {
1586                 ref_base = ROUND_UP(ref_base, align);
1587             }
1588         }
1589         assert(ref_base + cum.ref_slot <= max_stk_slots);
1590         ref_base += max_reg_slots;
1591 
1592         if (ref_base != 0) {
1593             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1594                 TCGCallArgumentLoc *loc = &info->in[i];
1595                 switch (loc->kind) {
1596                 case TCG_CALL_ARG_BY_REF:
1597                 case TCG_CALL_ARG_BY_REF_N:
1598                     loc->ref_slot += ref_base;
1599                     break;
1600                 default:
1601                     break;
1602                 }
1603             }
1604         }
1605     }
1606 }
1607 
1608 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1609 static void process_constraint_sets(void);
1610 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1611                                             TCGReg reg, const char *name);
1612 
1613 static void tcg_context_init(unsigned max_threads)
1614 {
1615     TCGContext *s = &tcg_init_ctx;
1616     int n, i;
1617     TCGTemp *ts;
1618 
1619     memset(s, 0, sizeof(*s));
1620     s->nb_globals = 0;
1621 
1622     init_call_layout(&info_helper_ld32_mmu);
1623     init_call_layout(&info_helper_ld64_mmu);
1624     init_call_layout(&info_helper_ld128_mmu);
1625     init_call_layout(&info_helper_st32_mmu);
1626     init_call_layout(&info_helper_st64_mmu);
1627     init_call_layout(&info_helper_st128_mmu);
1628 
1629     tcg_target_init(s);
1630     process_constraint_sets();
1631 
1632     /* Reverse the order of the saved registers, assuming they're all at
1633        the start of tcg_target_reg_alloc_order.  */
1634     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1635         int r = tcg_target_reg_alloc_order[n];
1636         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1637             break;
1638         }
1639     }
1640     for (i = 0; i < n; ++i) {
1641         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1642     }
1643     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1644         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1645     }
1646 
1647     tcg_ctx = s;
1648     /*
1649      * In user-mode we simply share the init context among threads, since we
1650      * use a single region. See the documentation tcg_region_init() for the
1651      * reasoning behind this.
1652      * In system-mode we will have at most max_threads TCG threads.
1653      */
1654 #ifdef CONFIG_USER_ONLY
1655     tcg_ctxs = &tcg_ctx;
1656     tcg_cur_ctxs = 1;
1657     tcg_max_ctxs = 1;
1658 #else
1659     tcg_max_ctxs = max_threads;
1660     tcg_ctxs = g_new0(TCGContext *, max_threads);
1661 #endif
1662 
1663     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1664     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1665     tcg_env = temp_tcgv_ptr(ts);
1666 }
1667 
1668 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1669 {
1670     tcg_context_init(max_threads);
1671     tcg_region_init(tb_size, splitwx, max_threads);
1672 }
1673 
1674 /*
1675  * Allocate TBs right before their corresponding translated code, making
1676  * sure that TBs and code are on different cache lines.
1677  */
1678 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1679 {
1680     uintptr_t align = qemu_icache_linesize;
1681     TranslationBlock *tb;
1682     void *next;
1683 
1684  retry:
1685     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1686     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1687 
1688     if (unlikely(next > s->code_gen_highwater)) {
1689         if (tcg_region_alloc(s)) {
1690             return NULL;
1691         }
1692         goto retry;
1693     }
1694     qatomic_set(&s->code_gen_ptr, next);
1695     return tb;
1696 }
1697 
1698 void tcg_prologue_init(void)
1699 {
1700     TCGContext *s = tcg_ctx;
1701     size_t prologue_size;
1702 
1703     s->code_ptr = s->code_gen_ptr;
1704     s->code_buf = s->code_gen_ptr;
1705     s->data_gen_ptr = NULL;
1706 
1707 #ifndef CONFIG_TCG_INTERPRETER
1708     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1709 #endif
1710 
1711     s->pool_labels = NULL;
1712 
1713     qemu_thread_jit_write();
1714     /* Generate the prologue.  */
1715     tcg_target_qemu_prologue(s);
1716 
1717     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1718     {
1719         int result = tcg_out_pool_finalize(s);
1720         tcg_debug_assert(result == 0);
1721     }
1722 
1723     prologue_size = tcg_current_code_size(s);
1724     perf_report_prologue(s->code_gen_ptr, prologue_size);
1725 
1726 #ifndef CONFIG_TCG_INTERPRETER
1727     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1728                         (uintptr_t)s->code_buf, prologue_size);
1729 #endif
1730 
1731     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1732         FILE *logfile = qemu_log_trylock();
1733         if (logfile) {
1734             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1735             if (s->data_gen_ptr) {
1736                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1737                 size_t data_size = prologue_size - code_size;
1738                 size_t i;
1739 
1740                 disas(logfile, s->code_gen_ptr, code_size);
1741 
1742                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1743                     if (sizeof(tcg_target_ulong) == 8) {
1744                         fprintf(logfile,
1745                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1746                                 (uintptr_t)s->data_gen_ptr + i,
1747                                 *(uint64_t *)(s->data_gen_ptr + i));
1748                     } else {
1749                         fprintf(logfile,
1750                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1751                                 (uintptr_t)s->data_gen_ptr + i,
1752                                 *(uint32_t *)(s->data_gen_ptr + i));
1753                     }
1754                 }
1755             } else {
1756                 disas(logfile, s->code_gen_ptr, prologue_size);
1757             }
1758             fprintf(logfile, "\n");
1759             qemu_log_unlock(logfile);
1760         }
1761     }
1762 
1763 #ifndef CONFIG_TCG_INTERPRETER
1764     /*
1765      * Assert that goto_ptr is implemented completely, setting an epilogue.
1766      * For tci, we use NULL as the signal to return from the interpreter,
1767      * so skip this check.
1768      */
1769     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1770 #endif
1771 
1772     tcg_region_prologue_set(s);
1773 }
1774 
1775 void tcg_func_start(TCGContext *s)
1776 {
1777     tcg_pool_reset(s);
1778     s->nb_temps = s->nb_globals;
1779 
1780     /* No temps have been previously allocated for size or locality.  */
1781     tcg_temp_ebb_reset_freed(s);
1782 
1783     /* No constant temps have been previously allocated. */
1784     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1785         if (s->const_table[i]) {
1786             g_hash_table_remove_all(s->const_table[i]);
1787         }
1788     }
1789 
1790     s->nb_ops = 0;
1791     s->nb_labels = 0;
1792     s->current_frame_offset = s->frame_start;
1793 
1794 #ifdef CONFIG_DEBUG_TCG
1795     s->goto_tb_issue_mask = 0;
1796 #endif
1797 
1798     QTAILQ_INIT(&s->ops);
1799     QTAILQ_INIT(&s->free_ops);
1800     s->emit_before_op = NULL;
1801     QSIMPLEQ_INIT(&s->labels);
1802 
1803     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1804     tcg_debug_assert(s->insn_start_words > 0);
1805 }
1806 
1807 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1808 {
1809     int n = s->nb_temps++;
1810 
1811     if (n >= TCG_MAX_TEMPS) {
1812         tcg_raise_tb_overflow(s);
1813     }
1814     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1815 }
1816 
1817 static TCGTemp *tcg_global_alloc(TCGContext *s)
1818 {
1819     TCGTemp *ts;
1820 
1821     tcg_debug_assert(s->nb_globals == s->nb_temps);
1822     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1823     s->nb_globals++;
1824     ts = tcg_temp_alloc(s);
1825     ts->kind = TEMP_GLOBAL;
1826 
1827     return ts;
1828 }
1829 
1830 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1831                                             TCGReg reg, const char *name)
1832 {
1833     TCGTemp *ts;
1834 
1835     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1836 
1837     ts = tcg_global_alloc(s);
1838     ts->base_type = type;
1839     ts->type = type;
1840     ts->kind = TEMP_FIXED;
1841     ts->reg = reg;
1842     ts->name = name;
1843     tcg_regset_set_reg(s->reserved_regs, reg);
1844 
1845     return ts;
1846 }
1847 
1848 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1849 {
1850     s->frame_start = start;
1851     s->frame_end = start + size;
1852     s->frame_temp
1853         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1854 }
1855 
1856 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1857                                             const char *name, TCGType type)
1858 {
1859     TCGContext *s = tcg_ctx;
1860     TCGTemp *base_ts = tcgv_ptr_temp(base);
1861     TCGTemp *ts = tcg_global_alloc(s);
1862     int indirect_reg = 0;
1863 
1864     switch (base_ts->kind) {
1865     case TEMP_FIXED:
1866         break;
1867     case TEMP_GLOBAL:
1868         /* We do not support double-indirect registers.  */
1869         tcg_debug_assert(!base_ts->indirect_reg);
1870         base_ts->indirect_base = 1;
1871         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1872                             ? 2 : 1);
1873         indirect_reg = 1;
1874         break;
1875     default:
1876         g_assert_not_reached();
1877     }
1878 
1879     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1880         TCGTemp *ts2 = tcg_global_alloc(s);
1881         char buf[64];
1882 
1883         ts->base_type = TCG_TYPE_I64;
1884         ts->type = TCG_TYPE_I32;
1885         ts->indirect_reg = indirect_reg;
1886         ts->mem_allocated = 1;
1887         ts->mem_base = base_ts;
1888         ts->mem_offset = offset;
1889         pstrcpy(buf, sizeof(buf), name);
1890         pstrcat(buf, sizeof(buf), "_0");
1891         ts->name = strdup(buf);
1892 
1893         tcg_debug_assert(ts2 == ts + 1);
1894         ts2->base_type = TCG_TYPE_I64;
1895         ts2->type = TCG_TYPE_I32;
1896         ts2->indirect_reg = indirect_reg;
1897         ts2->mem_allocated = 1;
1898         ts2->mem_base = base_ts;
1899         ts2->mem_offset = offset + 4;
1900         ts2->temp_subindex = 1;
1901         pstrcpy(buf, sizeof(buf), name);
1902         pstrcat(buf, sizeof(buf), "_1");
1903         ts2->name = strdup(buf);
1904     } else {
1905         ts->base_type = type;
1906         ts->type = type;
1907         ts->indirect_reg = indirect_reg;
1908         ts->mem_allocated = 1;
1909         ts->mem_base = base_ts;
1910         ts->mem_offset = offset;
1911         ts->name = name;
1912     }
1913     return ts;
1914 }
1915 
1916 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1917 {
1918     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1919     return temp_tcgv_i32(ts);
1920 }
1921 
1922 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1923 {
1924     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1925     return temp_tcgv_i64(ts);
1926 }
1927 
1928 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1929 {
1930     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1931     return temp_tcgv_ptr(ts);
1932 }
1933 
1934 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1935 {
1936     TCGContext *s = tcg_ctx;
1937     TCGTemp *ts;
1938     int n;
1939 
1940     if (kind == TEMP_EBB) {
1941         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1942 
1943         if (idx < TCG_MAX_TEMPS) {
1944             /* There is already an available temp with the right type.  */
1945             clear_bit(idx, s->free_temps[type].l);
1946 
1947             ts = &s->temps[idx];
1948             ts->temp_allocated = 1;
1949             tcg_debug_assert(ts->base_type == type);
1950             tcg_debug_assert(ts->kind == kind);
1951             return ts;
1952         }
1953     } else {
1954         tcg_debug_assert(kind == TEMP_TB);
1955     }
1956 
1957     switch (type) {
1958     case TCG_TYPE_I32:
1959     case TCG_TYPE_V64:
1960     case TCG_TYPE_V128:
1961     case TCG_TYPE_V256:
1962         n = 1;
1963         break;
1964     case TCG_TYPE_I64:
1965         n = 64 / TCG_TARGET_REG_BITS;
1966         break;
1967     case TCG_TYPE_I128:
1968         n = 128 / TCG_TARGET_REG_BITS;
1969         break;
1970     default:
1971         g_assert_not_reached();
1972     }
1973 
1974     ts = tcg_temp_alloc(s);
1975     ts->base_type = type;
1976     ts->temp_allocated = 1;
1977     ts->kind = kind;
1978 
1979     if (n == 1) {
1980         ts->type = type;
1981     } else {
1982         ts->type = TCG_TYPE_REG;
1983 
1984         for (int i = 1; i < n; ++i) {
1985             TCGTemp *ts2 = tcg_temp_alloc(s);
1986 
1987             tcg_debug_assert(ts2 == ts + i);
1988             ts2->base_type = type;
1989             ts2->type = TCG_TYPE_REG;
1990             ts2->temp_allocated = 1;
1991             ts2->temp_subindex = i;
1992             ts2->kind = kind;
1993         }
1994     }
1995     return ts;
1996 }
1997 
1998 TCGv_i32 tcg_temp_new_i32(void)
1999 {
2000     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
2001 }
2002 
2003 TCGv_i32 tcg_temp_ebb_new_i32(void)
2004 {
2005     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
2006 }
2007 
2008 TCGv_i64 tcg_temp_new_i64(void)
2009 {
2010     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
2011 }
2012 
2013 TCGv_i64 tcg_temp_ebb_new_i64(void)
2014 {
2015     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
2016 }
2017 
2018 TCGv_ptr tcg_temp_new_ptr(void)
2019 {
2020     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
2021 }
2022 
2023 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2024 {
2025     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2026 }
2027 
2028 TCGv_i128 tcg_temp_new_i128(void)
2029 {
2030     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2031 }
2032 
2033 TCGv_i128 tcg_temp_ebb_new_i128(void)
2034 {
2035     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2036 }
2037 
2038 TCGv_vec tcg_temp_new_vec(TCGType type)
2039 {
2040     TCGTemp *t;
2041 
2042 #ifdef CONFIG_DEBUG_TCG
2043     switch (type) {
2044     case TCG_TYPE_V64:
2045         assert(TCG_TARGET_HAS_v64);
2046         break;
2047     case TCG_TYPE_V128:
2048         assert(TCG_TARGET_HAS_v128);
2049         break;
2050     case TCG_TYPE_V256:
2051         assert(TCG_TARGET_HAS_v256);
2052         break;
2053     default:
2054         g_assert_not_reached();
2055     }
2056 #endif
2057 
2058     t = tcg_temp_new_internal(type, TEMP_EBB);
2059     return temp_tcgv_vec(t);
2060 }
2061 
2062 /* Create a new temp of the same type as an existing temp.  */
2063 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2064 {
2065     TCGTemp *t = tcgv_vec_temp(match);
2066 
2067     tcg_debug_assert(t->temp_allocated != 0);
2068 
2069     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2070     return temp_tcgv_vec(t);
2071 }
2072 
2073 void tcg_temp_free_internal(TCGTemp *ts)
2074 {
2075     TCGContext *s = tcg_ctx;
2076 
2077     switch (ts->kind) {
2078     case TEMP_CONST:
2079     case TEMP_TB:
2080         /* Silently ignore free. */
2081         break;
2082     case TEMP_EBB:
2083         tcg_debug_assert(ts->temp_allocated != 0);
2084         ts->temp_allocated = 0;
2085         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2086         break;
2087     default:
2088         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2089         g_assert_not_reached();
2090     }
2091 }
2092 
2093 void tcg_temp_free_i32(TCGv_i32 arg)
2094 {
2095     tcg_temp_free_internal(tcgv_i32_temp(arg));
2096 }
2097 
2098 void tcg_temp_free_i64(TCGv_i64 arg)
2099 {
2100     tcg_temp_free_internal(tcgv_i64_temp(arg));
2101 }
2102 
2103 void tcg_temp_free_i128(TCGv_i128 arg)
2104 {
2105     tcg_temp_free_internal(tcgv_i128_temp(arg));
2106 }
2107 
2108 void tcg_temp_free_ptr(TCGv_ptr arg)
2109 {
2110     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2111 }
2112 
2113 void tcg_temp_free_vec(TCGv_vec arg)
2114 {
2115     tcg_temp_free_internal(tcgv_vec_temp(arg));
2116 }
2117 
2118 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2119 {
2120     TCGContext *s = tcg_ctx;
2121     GHashTable *h = s->const_table[type];
2122     TCGTemp *ts;
2123 
2124     if (h == NULL) {
2125         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2126         s->const_table[type] = h;
2127     }
2128 
2129     ts = g_hash_table_lookup(h, &val);
2130     if (ts == NULL) {
2131         int64_t *val_ptr;
2132 
2133         ts = tcg_temp_alloc(s);
2134 
2135         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2136             TCGTemp *ts2 = tcg_temp_alloc(s);
2137 
2138             tcg_debug_assert(ts2 == ts + 1);
2139 
2140             ts->base_type = TCG_TYPE_I64;
2141             ts->type = TCG_TYPE_I32;
2142             ts->kind = TEMP_CONST;
2143             ts->temp_allocated = 1;
2144 
2145             ts2->base_type = TCG_TYPE_I64;
2146             ts2->type = TCG_TYPE_I32;
2147             ts2->kind = TEMP_CONST;
2148             ts2->temp_allocated = 1;
2149             ts2->temp_subindex = 1;
2150 
2151             /*
2152              * Retain the full value of the 64-bit constant in the low
2153              * part, so that the hash table works.  Actual uses will
2154              * truncate the value to the low part.
2155              */
2156             ts[HOST_BIG_ENDIAN].val = val;
2157             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2158             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2159         } else {
2160             ts->base_type = type;
2161             ts->type = type;
2162             ts->kind = TEMP_CONST;
2163             ts->temp_allocated = 1;
2164             ts->val = val;
2165             val_ptr = &ts->val;
2166         }
2167         g_hash_table_insert(h, val_ptr, ts);
2168     }
2169 
2170     return ts;
2171 }
2172 
2173 TCGv_i32 tcg_constant_i32(int32_t val)
2174 {
2175     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2176 }
2177 
2178 TCGv_i64 tcg_constant_i64(int64_t val)
2179 {
2180     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2181 }
2182 
2183 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2184 {
2185     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2186 }
2187 
2188 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2189 {
2190     val = dup_const(vece, val);
2191     return temp_tcgv_vec(tcg_constant_internal(type, val));
2192 }
2193 
2194 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2195 {
2196     TCGTemp *t = tcgv_vec_temp(match);
2197 
2198     tcg_debug_assert(t->temp_allocated != 0);
2199     return tcg_constant_vec(t->base_type, vece, val);
2200 }
2201 
2202 #ifdef CONFIG_DEBUG_TCG
2203 size_t temp_idx(TCGTemp *ts)
2204 {
2205     ptrdiff_t n = ts - tcg_ctx->temps;
2206     assert(n >= 0 && n < tcg_ctx->nb_temps);
2207     return n;
2208 }
2209 
2210 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2211 {
2212     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2213 
2214     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2215     assert(o % sizeof(TCGTemp) == 0);
2216 
2217     return (void *)tcg_ctx + (uintptr_t)v;
2218 }
2219 #endif /* CONFIG_DEBUG_TCG */
2220 
2221 /*
2222  * Return true if OP may appear in the opcode stream with TYPE.
2223  * Test the runtime variable that controls each opcode.
2224  */
2225 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2226 {
2227     bool has_type;
2228 
2229     switch (type) {
2230     case TCG_TYPE_I32:
2231         has_type = true;
2232         break;
2233     case TCG_TYPE_I64:
2234         has_type = TCG_TARGET_REG_BITS == 64;
2235         break;
2236     case TCG_TYPE_V64:
2237         has_type = TCG_TARGET_HAS_v64;
2238         break;
2239     case TCG_TYPE_V128:
2240         has_type = TCG_TARGET_HAS_v128;
2241         break;
2242     case TCG_TYPE_V256:
2243         has_type = TCG_TARGET_HAS_v256;
2244         break;
2245     default:
2246         has_type = false;
2247         break;
2248     }
2249 
2250     switch (op) {
2251     case INDEX_op_discard:
2252     case INDEX_op_set_label:
2253     case INDEX_op_call:
2254     case INDEX_op_br:
2255     case INDEX_op_mb:
2256     case INDEX_op_insn_start:
2257     case INDEX_op_exit_tb:
2258     case INDEX_op_goto_tb:
2259     case INDEX_op_goto_ptr:
2260     case INDEX_op_qemu_ld_i32:
2261     case INDEX_op_qemu_st_i32:
2262     case INDEX_op_qemu_ld_i64:
2263     case INDEX_op_qemu_st_i64:
2264         return true;
2265 
2266     case INDEX_op_qemu_st8_i32:
2267         return TCG_TARGET_HAS_qemu_st8_i32;
2268 
2269     case INDEX_op_qemu_ld_i128:
2270     case INDEX_op_qemu_st_i128:
2271         return TCG_TARGET_HAS_qemu_ldst_i128;
2272 
2273     case INDEX_op_add:
2274     case INDEX_op_and:
2275     case INDEX_op_mov:
2276     case INDEX_op_negsetcond:
2277     case INDEX_op_or:
2278     case INDEX_op_setcond:
2279     case INDEX_op_xor:
2280         return has_type;
2281 
2282     case INDEX_op_brcond_i32:
2283     case INDEX_op_movcond_i32:
2284     case INDEX_op_ld8u_i32:
2285     case INDEX_op_ld8s_i32:
2286     case INDEX_op_ld16u_i32:
2287     case INDEX_op_ld16s_i32:
2288     case INDEX_op_ld_i32:
2289     case INDEX_op_st8_i32:
2290     case INDEX_op_st16_i32:
2291     case INDEX_op_st_i32:
2292     case INDEX_op_extract_i32:
2293     case INDEX_op_sextract_i32:
2294     case INDEX_op_deposit_i32:
2295         return true;
2296 
2297     case INDEX_op_extract2_i32:
2298         return TCG_TARGET_HAS_extract2_i32;
2299     case INDEX_op_add2_i32:
2300         return TCG_TARGET_HAS_add2_i32;
2301     case INDEX_op_sub2_i32:
2302         return TCG_TARGET_HAS_sub2_i32;
2303     case INDEX_op_bswap16_i32:
2304         return TCG_TARGET_HAS_bswap16_i32;
2305     case INDEX_op_bswap32_i32:
2306         return TCG_TARGET_HAS_bswap32_i32;
2307 
2308     case INDEX_op_brcond2_i32:
2309     case INDEX_op_setcond2_i32:
2310         return TCG_TARGET_REG_BITS == 32;
2311 
2312     case INDEX_op_brcond_i64:
2313     case INDEX_op_movcond_i64:
2314     case INDEX_op_ld8u_i64:
2315     case INDEX_op_ld8s_i64:
2316     case INDEX_op_ld16u_i64:
2317     case INDEX_op_ld16s_i64:
2318     case INDEX_op_ld32u_i64:
2319     case INDEX_op_ld32s_i64:
2320     case INDEX_op_ld_i64:
2321     case INDEX_op_st8_i64:
2322     case INDEX_op_st16_i64:
2323     case INDEX_op_st32_i64:
2324     case INDEX_op_st_i64:
2325     case INDEX_op_ext_i32_i64:
2326     case INDEX_op_extu_i32_i64:
2327     case INDEX_op_extract_i64:
2328     case INDEX_op_sextract_i64:
2329     case INDEX_op_deposit_i64:
2330         return TCG_TARGET_REG_BITS == 64;
2331 
2332     case INDEX_op_extract2_i64:
2333         return TCG_TARGET_HAS_extract2_i64;
2334     case INDEX_op_extrl_i64_i32:
2335     case INDEX_op_extrh_i64_i32:
2336         return TCG_TARGET_HAS_extr_i64_i32;
2337     case INDEX_op_bswap16_i64:
2338         return TCG_TARGET_HAS_bswap16_i64;
2339     case INDEX_op_bswap32_i64:
2340         return TCG_TARGET_HAS_bswap32_i64;
2341     case INDEX_op_bswap64_i64:
2342         return TCG_TARGET_HAS_bswap64_i64;
2343     case INDEX_op_add2_i64:
2344         return TCG_TARGET_HAS_add2_i64;
2345     case INDEX_op_sub2_i64:
2346         return TCG_TARGET_HAS_sub2_i64;
2347 
2348     case INDEX_op_mov_vec:
2349     case INDEX_op_dup_vec:
2350     case INDEX_op_dupm_vec:
2351     case INDEX_op_ld_vec:
2352     case INDEX_op_st_vec:
2353     case INDEX_op_add_vec:
2354     case INDEX_op_sub_vec:
2355     case INDEX_op_and_vec:
2356     case INDEX_op_or_vec:
2357     case INDEX_op_xor_vec:
2358     case INDEX_op_cmp_vec:
2359         return has_type;
2360     case INDEX_op_dup2_vec:
2361         return has_type && TCG_TARGET_REG_BITS == 32;
2362     case INDEX_op_not_vec:
2363         return has_type && TCG_TARGET_HAS_not_vec;
2364     case INDEX_op_neg_vec:
2365         return has_type && TCG_TARGET_HAS_neg_vec;
2366     case INDEX_op_abs_vec:
2367         return has_type && TCG_TARGET_HAS_abs_vec;
2368     case INDEX_op_andc_vec:
2369         return has_type && TCG_TARGET_HAS_andc_vec;
2370     case INDEX_op_orc_vec:
2371         return has_type && TCG_TARGET_HAS_orc_vec;
2372     case INDEX_op_nand_vec:
2373         return has_type && TCG_TARGET_HAS_nand_vec;
2374     case INDEX_op_nor_vec:
2375         return has_type && TCG_TARGET_HAS_nor_vec;
2376     case INDEX_op_eqv_vec:
2377         return has_type && TCG_TARGET_HAS_eqv_vec;
2378     case INDEX_op_mul_vec:
2379         return has_type && TCG_TARGET_HAS_mul_vec;
2380     case INDEX_op_shli_vec:
2381     case INDEX_op_shri_vec:
2382     case INDEX_op_sari_vec:
2383         return has_type && TCG_TARGET_HAS_shi_vec;
2384     case INDEX_op_shls_vec:
2385     case INDEX_op_shrs_vec:
2386     case INDEX_op_sars_vec:
2387         return has_type && TCG_TARGET_HAS_shs_vec;
2388     case INDEX_op_shlv_vec:
2389     case INDEX_op_shrv_vec:
2390     case INDEX_op_sarv_vec:
2391         return has_type && TCG_TARGET_HAS_shv_vec;
2392     case INDEX_op_rotli_vec:
2393         return has_type && TCG_TARGET_HAS_roti_vec;
2394     case INDEX_op_rotls_vec:
2395         return has_type && TCG_TARGET_HAS_rots_vec;
2396     case INDEX_op_rotlv_vec:
2397     case INDEX_op_rotrv_vec:
2398         return has_type && TCG_TARGET_HAS_rotv_vec;
2399     case INDEX_op_ssadd_vec:
2400     case INDEX_op_usadd_vec:
2401     case INDEX_op_sssub_vec:
2402     case INDEX_op_ussub_vec:
2403         return has_type && TCG_TARGET_HAS_sat_vec;
2404     case INDEX_op_smin_vec:
2405     case INDEX_op_umin_vec:
2406     case INDEX_op_smax_vec:
2407     case INDEX_op_umax_vec:
2408         return has_type && TCG_TARGET_HAS_minmax_vec;
2409     case INDEX_op_bitsel_vec:
2410         return has_type && TCG_TARGET_HAS_bitsel_vec;
2411     case INDEX_op_cmpsel_vec:
2412         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2413 
2414     default:
2415         if (op < INDEX_op_last_generic) {
2416             const TCGOutOp *outop;
2417             TCGConstraintSetIndex con_set;
2418 
2419             if (!has_type) {
2420                 return false;
2421             }
2422 
2423             outop = all_outop[op];
2424             tcg_debug_assert(outop != NULL);
2425 
2426             con_set = outop->static_constraint;
2427             if (con_set == C_Dynamic) {
2428                 con_set = outop->dynamic_constraint(type, flags);
2429             }
2430             if (con_set >= 0) {
2431                 return true;
2432             }
2433             tcg_debug_assert(con_set == C_NotImplemented);
2434             return false;
2435         }
2436         tcg_debug_assert(op < NB_OPS);
2437         return true;
2438 
2439     case INDEX_op_last_generic:
2440         g_assert_not_reached();
2441     }
2442 }
2443 
2444 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2445 {
2446     unsigned width;
2447 
2448     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2449     width = (type == TCG_TYPE_I32 ? 32 : 64);
2450 
2451     tcg_debug_assert(ofs < width);
2452     tcg_debug_assert(len > 0);
2453     tcg_debug_assert(len <= width - ofs);
2454 
2455     return TCG_TARGET_deposit_valid(type, ofs, len);
2456 }
2457 
2458 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2459 
2460 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2461                           TCGTemp *ret, TCGTemp **args)
2462 {
2463     TCGv_i64 extend_free[MAX_CALL_IARGS];
2464     int n_extend = 0;
2465     TCGOp *op;
2466     int i, n, pi = 0, total_args;
2467 
2468     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2469         init_call_layout(info);
2470         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2471     }
2472 
2473     total_args = info->nr_out + info->nr_in + 2;
2474     op = tcg_op_alloc(INDEX_op_call, total_args);
2475 
2476 #ifdef CONFIG_PLUGIN
2477     /* Flag helpers that may affect guest state */
2478     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2479         tcg_ctx->plugin_insn->calls_helpers = true;
2480     }
2481 #endif
2482 
2483     TCGOP_CALLO(op) = n = info->nr_out;
2484     switch (n) {
2485     case 0:
2486         tcg_debug_assert(ret == NULL);
2487         break;
2488     case 1:
2489         tcg_debug_assert(ret != NULL);
2490         op->args[pi++] = temp_arg(ret);
2491         break;
2492     case 2:
2493     case 4:
2494         tcg_debug_assert(ret != NULL);
2495         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2496         tcg_debug_assert(ret->temp_subindex == 0);
2497         for (i = 0; i < n; ++i) {
2498             op->args[pi++] = temp_arg(ret + i);
2499         }
2500         break;
2501     default:
2502         g_assert_not_reached();
2503     }
2504 
2505     TCGOP_CALLI(op) = n = info->nr_in;
2506     for (i = 0; i < n; i++) {
2507         const TCGCallArgumentLoc *loc = &info->in[i];
2508         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2509 
2510         switch (loc->kind) {
2511         case TCG_CALL_ARG_NORMAL:
2512         case TCG_CALL_ARG_BY_REF:
2513         case TCG_CALL_ARG_BY_REF_N:
2514             op->args[pi++] = temp_arg(ts);
2515             break;
2516 
2517         case TCG_CALL_ARG_EXTEND_U:
2518         case TCG_CALL_ARG_EXTEND_S:
2519             {
2520                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2521                 TCGv_i32 orig = temp_tcgv_i32(ts);
2522 
2523                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2524                     tcg_gen_ext_i32_i64(temp, orig);
2525                 } else {
2526                     tcg_gen_extu_i32_i64(temp, orig);
2527                 }
2528                 op->args[pi++] = tcgv_i64_arg(temp);
2529                 extend_free[n_extend++] = temp;
2530             }
2531             break;
2532 
2533         default:
2534             g_assert_not_reached();
2535         }
2536     }
2537     op->args[pi++] = (uintptr_t)func;
2538     op->args[pi++] = (uintptr_t)info;
2539     tcg_debug_assert(pi == total_args);
2540 
2541     if (tcg_ctx->emit_before_op) {
2542         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2543     } else {
2544         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2545     }
2546 
2547     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2548     for (i = 0; i < n_extend; ++i) {
2549         tcg_temp_free_i64(extend_free[i]);
2550     }
2551 }
2552 
2553 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2554 {
2555     tcg_gen_callN(func, info, ret, NULL);
2556 }
2557 
2558 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2559 {
2560     tcg_gen_callN(func, info, ret, &t1);
2561 }
2562 
2563 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2564                    TCGTemp *t1, TCGTemp *t2)
2565 {
2566     TCGTemp *args[2] = { t1, t2 };
2567     tcg_gen_callN(func, info, ret, args);
2568 }
2569 
2570 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2571                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2572 {
2573     TCGTemp *args[3] = { t1, t2, t3 };
2574     tcg_gen_callN(func, info, ret, args);
2575 }
2576 
2577 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2578                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2579 {
2580     TCGTemp *args[4] = { t1, t2, t3, t4 };
2581     tcg_gen_callN(func, info, ret, args);
2582 }
2583 
2584 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2585                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2586 {
2587     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2588     tcg_gen_callN(func, info, ret, args);
2589 }
2590 
2591 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2592                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2593                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2594 {
2595     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2596     tcg_gen_callN(func, info, ret, args);
2597 }
2598 
2599 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2600                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2601                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2602 {
2603     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2604     tcg_gen_callN(func, info, ret, args);
2605 }
2606 
2607 static void tcg_reg_alloc_start(TCGContext *s)
2608 {
2609     int i, n;
2610 
2611     for (i = 0, n = s->nb_temps; i < n; i++) {
2612         TCGTemp *ts = &s->temps[i];
2613         TCGTempVal val = TEMP_VAL_MEM;
2614 
2615         switch (ts->kind) {
2616         case TEMP_CONST:
2617             val = TEMP_VAL_CONST;
2618             break;
2619         case TEMP_FIXED:
2620             val = TEMP_VAL_REG;
2621             break;
2622         case TEMP_GLOBAL:
2623             break;
2624         case TEMP_EBB:
2625             val = TEMP_VAL_DEAD;
2626             /* fall through */
2627         case TEMP_TB:
2628             ts->mem_allocated = 0;
2629             break;
2630         default:
2631             g_assert_not_reached();
2632         }
2633         ts->val_type = val;
2634     }
2635 
2636     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2637 }
2638 
2639 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2640                                  TCGTemp *ts)
2641 {
2642     int idx = temp_idx(ts);
2643 
2644     switch (ts->kind) {
2645     case TEMP_FIXED:
2646     case TEMP_GLOBAL:
2647         pstrcpy(buf, buf_size, ts->name);
2648         break;
2649     case TEMP_TB:
2650         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2651         break;
2652     case TEMP_EBB:
2653         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2654         break;
2655     case TEMP_CONST:
2656         switch (ts->type) {
2657         case TCG_TYPE_I32:
2658             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2659             break;
2660 #if TCG_TARGET_REG_BITS > 32
2661         case TCG_TYPE_I64:
2662             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2663             break;
2664 #endif
2665         case TCG_TYPE_V64:
2666         case TCG_TYPE_V128:
2667         case TCG_TYPE_V256:
2668             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2669                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2670             break;
2671         default:
2672             g_assert_not_reached();
2673         }
2674         break;
2675     }
2676     return buf;
2677 }
2678 
2679 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2680                              int buf_size, TCGArg arg)
2681 {
2682     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2683 }
2684 
2685 static const char * const cond_name[] =
2686 {
2687     [TCG_COND_NEVER] = "never",
2688     [TCG_COND_ALWAYS] = "always",
2689     [TCG_COND_EQ] = "eq",
2690     [TCG_COND_NE] = "ne",
2691     [TCG_COND_LT] = "lt",
2692     [TCG_COND_GE] = "ge",
2693     [TCG_COND_LE] = "le",
2694     [TCG_COND_GT] = "gt",
2695     [TCG_COND_LTU] = "ltu",
2696     [TCG_COND_GEU] = "geu",
2697     [TCG_COND_LEU] = "leu",
2698     [TCG_COND_GTU] = "gtu",
2699     [TCG_COND_TSTEQ] = "tsteq",
2700     [TCG_COND_TSTNE] = "tstne",
2701 };
2702 
2703 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2704 {
2705     [MO_UB]   = "ub",
2706     [MO_SB]   = "sb",
2707     [MO_LEUW] = "leuw",
2708     [MO_LESW] = "lesw",
2709     [MO_LEUL] = "leul",
2710     [MO_LESL] = "lesl",
2711     [MO_LEUQ] = "leq",
2712     [MO_BEUW] = "beuw",
2713     [MO_BESW] = "besw",
2714     [MO_BEUL] = "beul",
2715     [MO_BESL] = "besl",
2716     [MO_BEUQ] = "beq",
2717     [MO_128 + MO_BE] = "beo",
2718     [MO_128 + MO_LE] = "leo",
2719 };
2720 
2721 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2722     [MO_UNALN >> MO_ASHIFT]    = "un+",
2723     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2724     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2725     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2726     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2727     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2728     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2729     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2730 };
2731 
2732 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2733     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2734     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2735     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2736     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2737     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2738     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2739 };
2740 
2741 static const char bswap_flag_name[][6] = {
2742     [TCG_BSWAP_IZ] = "iz",
2743     [TCG_BSWAP_OZ] = "oz",
2744     [TCG_BSWAP_OS] = "os",
2745     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2746     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2747 };
2748 
2749 #ifdef CONFIG_PLUGIN
2750 static const char * const plugin_from_name[] = {
2751     "from-tb",
2752     "from-insn",
2753     "after-insn",
2754     "after-tb",
2755 };
2756 #endif
2757 
2758 static inline bool tcg_regset_single(TCGRegSet d)
2759 {
2760     return (d & (d - 1)) == 0;
2761 }
2762 
2763 static inline TCGReg tcg_regset_first(TCGRegSet d)
2764 {
2765     if (TCG_TARGET_NB_REGS <= 32) {
2766         return ctz32(d);
2767     } else {
2768         return ctz64(d);
2769     }
2770 }
2771 
2772 /* Return only the number of characters output -- no error return. */
2773 #define ne_fprintf(...) \
2774     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2775 
2776 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2777 {
2778     char buf[128];
2779     TCGOp *op;
2780 
2781     QTAILQ_FOREACH(op, &s->ops, link) {
2782         int i, k, nb_oargs, nb_iargs, nb_cargs;
2783         const TCGOpDef *def;
2784         TCGOpcode c;
2785         int col = 0;
2786 
2787         c = op->opc;
2788         def = &tcg_op_defs[c];
2789 
2790         if (c == INDEX_op_insn_start) {
2791             nb_oargs = 0;
2792             col += ne_fprintf(f, "\n ----");
2793 
2794             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2795                 col += ne_fprintf(f, " %016" PRIx64,
2796                                   tcg_get_insn_start_param(op, i));
2797             }
2798         } else if (c == INDEX_op_call) {
2799             const TCGHelperInfo *info = tcg_call_info(op);
2800             void *func = tcg_call_func(op);
2801 
2802             /* variable number of arguments */
2803             nb_oargs = TCGOP_CALLO(op);
2804             nb_iargs = TCGOP_CALLI(op);
2805             nb_cargs = def->nb_cargs;
2806 
2807             col += ne_fprintf(f, " %s ", def->name);
2808 
2809             /*
2810              * Print the function name from TCGHelperInfo, if available.
2811              * Note that plugins have a template function for the info,
2812              * but the actual function pointer comes from the plugin.
2813              */
2814             if (func == info->func) {
2815                 col += ne_fprintf(f, "%s", info->name);
2816             } else {
2817                 col += ne_fprintf(f, "plugin(%p)", func);
2818             }
2819 
2820             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2821             for (i = 0; i < nb_oargs; i++) {
2822                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2823                                                             op->args[i]));
2824             }
2825             for (i = 0; i < nb_iargs; i++) {
2826                 TCGArg arg = op->args[nb_oargs + i];
2827                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2828                 col += ne_fprintf(f, ",%s", t);
2829             }
2830         } else {
2831             if (def->flags & TCG_OPF_INT) {
2832                 col += ne_fprintf(f, " %s_i%d ",
2833                                   def->name,
2834                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2835             } else if (def->flags & TCG_OPF_VECTOR) {
2836                 col += ne_fprintf(f, "%s v%d,e%d,",
2837                                   def->name,
2838                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2839                                   8 << TCGOP_VECE(op));
2840             } else {
2841                 col += ne_fprintf(f, " %s ", def->name);
2842             }
2843 
2844             nb_oargs = def->nb_oargs;
2845             nb_iargs = def->nb_iargs;
2846             nb_cargs = def->nb_cargs;
2847 
2848             k = 0;
2849             for (i = 0; i < nb_oargs; i++) {
2850                 const char *sep =  k ? "," : "";
2851                 col += ne_fprintf(f, "%s%s", sep,
2852                                   tcg_get_arg_str(s, buf, sizeof(buf),
2853                                                   op->args[k++]));
2854             }
2855             for (i = 0; i < nb_iargs; i++) {
2856                 const char *sep =  k ? "," : "";
2857                 col += ne_fprintf(f, "%s%s", sep,
2858                                   tcg_get_arg_str(s, buf, sizeof(buf),
2859                                                   op->args[k++]));
2860             }
2861             switch (c) {
2862             case INDEX_op_brcond_i32:
2863             case INDEX_op_setcond:
2864             case INDEX_op_negsetcond:
2865             case INDEX_op_movcond_i32:
2866             case INDEX_op_brcond2_i32:
2867             case INDEX_op_setcond2_i32:
2868             case INDEX_op_brcond_i64:
2869             case INDEX_op_movcond_i64:
2870             case INDEX_op_cmp_vec:
2871             case INDEX_op_cmpsel_vec:
2872                 if (op->args[k] < ARRAY_SIZE(cond_name)
2873                     && cond_name[op->args[k]]) {
2874                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2875                 } else {
2876                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2877                 }
2878                 i = 1;
2879                 break;
2880             case INDEX_op_qemu_ld_i32:
2881             case INDEX_op_qemu_st_i32:
2882             case INDEX_op_qemu_st8_i32:
2883             case INDEX_op_qemu_ld_i64:
2884             case INDEX_op_qemu_st_i64:
2885             case INDEX_op_qemu_ld_i128:
2886             case INDEX_op_qemu_st_i128:
2887                 {
2888                     const char *s_al, *s_op, *s_at;
2889                     MemOpIdx oi = op->args[k++];
2890                     MemOp mop = get_memop(oi);
2891                     unsigned ix = get_mmuidx(oi);
2892 
2893                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2894                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2895                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2896                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2897 
2898                     /* If all fields are accounted for, print symbolically. */
2899                     if (!mop && s_al && s_op && s_at) {
2900                         col += ne_fprintf(f, ",%s%s%s,%u",
2901                                           s_at, s_al, s_op, ix);
2902                     } else {
2903                         mop = get_memop(oi);
2904                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2905                     }
2906                     i = 1;
2907                 }
2908                 break;
2909             case INDEX_op_bswap16_i32:
2910             case INDEX_op_bswap16_i64:
2911             case INDEX_op_bswap32_i32:
2912             case INDEX_op_bswap32_i64:
2913             case INDEX_op_bswap64_i64:
2914                 {
2915                     TCGArg flags = op->args[k];
2916                     const char *name = NULL;
2917 
2918                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2919                         name = bswap_flag_name[flags];
2920                     }
2921                     if (name) {
2922                         col += ne_fprintf(f, ",%s", name);
2923                     } else {
2924                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2925                     }
2926                     i = k = 1;
2927                 }
2928                 break;
2929 #ifdef CONFIG_PLUGIN
2930             case INDEX_op_plugin_cb:
2931                 {
2932                     TCGArg from = op->args[k++];
2933                     const char *name = NULL;
2934 
2935                     if (from < ARRAY_SIZE(plugin_from_name)) {
2936                         name = plugin_from_name[from];
2937                     }
2938                     if (name) {
2939                         col += ne_fprintf(f, "%s", name);
2940                     } else {
2941                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2942                     }
2943                     i = 1;
2944                 }
2945                 break;
2946 #endif
2947             default:
2948                 i = 0;
2949                 break;
2950             }
2951             switch (c) {
2952             case INDEX_op_set_label:
2953             case INDEX_op_br:
2954             case INDEX_op_brcond_i32:
2955             case INDEX_op_brcond_i64:
2956             case INDEX_op_brcond2_i32:
2957                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2958                                   arg_label(op->args[k])->id);
2959                 i++, k++;
2960                 break;
2961             case INDEX_op_mb:
2962                 {
2963                     TCGBar membar = op->args[k];
2964                     const char *b_op, *m_op;
2965 
2966                     switch (membar & TCG_BAR_SC) {
2967                     case 0:
2968                         b_op = "none";
2969                         break;
2970                     case TCG_BAR_LDAQ:
2971                         b_op = "acq";
2972                         break;
2973                     case TCG_BAR_STRL:
2974                         b_op = "rel";
2975                         break;
2976                     case TCG_BAR_SC:
2977                         b_op = "seq";
2978                         break;
2979                     default:
2980                         g_assert_not_reached();
2981                     }
2982 
2983                     switch (membar & TCG_MO_ALL) {
2984                     case 0:
2985                         m_op = "none";
2986                         break;
2987                     case TCG_MO_LD_LD:
2988                         m_op = "rr";
2989                         break;
2990                     case TCG_MO_LD_ST:
2991                         m_op = "rw";
2992                         break;
2993                     case TCG_MO_ST_LD:
2994                         m_op = "wr";
2995                         break;
2996                     case TCG_MO_ST_ST:
2997                         m_op = "ww";
2998                         break;
2999                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3000                         m_op = "rr+rw";
3001                         break;
3002                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3003                         m_op = "rr+wr";
3004                         break;
3005                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3006                         m_op = "rr+ww";
3007                         break;
3008                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3009                         m_op = "rw+wr";
3010                         break;
3011                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3012                         m_op = "rw+ww";
3013                         break;
3014                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3015                         m_op = "wr+ww";
3016                         break;
3017                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3018                         m_op = "rr+rw+wr";
3019                         break;
3020                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3021                         m_op = "rr+rw+ww";
3022                         break;
3023                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3024                         m_op = "rr+wr+ww";
3025                         break;
3026                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3027                         m_op = "rw+wr+ww";
3028                         break;
3029                     case TCG_MO_ALL:
3030                         m_op = "all";
3031                         break;
3032                     default:
3033                         g_assert_not_reached();
3034                     }
3035 
3036                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3037                     i++, k++;
3038                 }
3039                 break;
3040             default:
3041                 break;
3042             }
3043             for (; i < nb_cargs; i++, k++) {
3044                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3045                                   op->args[k]);
3046             }
3047         }
3048 
3049         if (have_prefs || op->life) {
3050             for (; col < 40; ++col) {
3051                 putc(' ', f);
3052             }
3053         }
3054 
3055         if (op->life) {
3056             unsigned life = op->life;
3057 
3058             if (life & (SYNC_ARG * 3)) {
3059                 ne_fprintf(f, "  sync:");
3060                 for (i = 0; i < 2; ++i) {
3061                     if (life & (SYNC_ARG << i)) {
3062                         ne_fprintf(f, " %d", i);
3063                     }
3064                 }
3065             }
3066             life /= DEAD_ARG;
3067             if (life) {
3068                 ne_fprintf(f, "  dead:");
3069                 for (i = 0; life; ++i, life >>= 1) {
3070                     if (life & 1) {
3071                         ne_fprintf(f, " %d", i);
3072                     }
3073                 }
3074             }
3075         }
3076 
3077         if (have_prefs) {
3078             for (i = 0; i < nb_oargs; ++i) {
3079                 TCGRegSet set = output_pref(op, i);
3080 
3081                 if (i == 0) {
3082                     ne_fprintf(f, "  pref=");
3083                 } else {
3084                     ne_fprintf(f, ",");
3085                 }
3086                 if (set == 0) {
3087                     ne_fprintf(f, "none");
3088                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3089                     ne_fprintf(f, "all");
3090 #ifdef CONFIG_DEBUG_TCG
3091                 } else if (tcg_regset_single(set)) {
3092                     TCGReg reg = tcg_regset_first(set);
3093                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3094 #endif
3095                 } else if (TCG_TARGET_NB_REGS <= 32) {
3096                     ne_fprintf(f, "0x%x", (uint32_t)set);
3097                 } else {
3098                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3099                 }
3100             }
3101         }
3102 
3103         putc('\n', f);
3104     }
3105 }
3106 
3107 /* we give more priority to constraints with less registers */
3108 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3109 {
3110     int n;
3111 
3112     arg_ct += k;
3113     n = ctpop64(arg_ct->regs);
3114 
3115     /*
3116      * Sort constraints of a single register first, which includes output
3117      * aliases (which must exactly match the input already allocated).
3118      */
3119     if (n == 1 || arg_ct->oalias) {
3120         return INT_MAX;
3121     }
3122 
3123     /*
3124      * Sort register pairs next, first then second immediately after.
3125      * Arbitrarily sort multiple pairs by the index of the first reg;
3126      * there shouldn't be many pairs.
3127      */
3128     switch (arg_ct->pair) {
3129     case 1:
3130     case 3:
3131         return (k + 1) * 2;
3132     case 2:
3133         return (arg_ct->pair_index + 1) * 2 - 1;
3134     }
3135 
3136     /* Finally, sort by decreasing register count. */
3137     assert(n > 1);
3138     return -n;
3139 }
3140 
3141 /* sort from highest priority to lowest */
3142 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3143 {
3144     int i, j;
3145 
3146     for (i = 0; i < n; i++) {
3147         a[start + i].sort_index = start + i;
3148     }
3149     if (n <= 1) {
3150         return;
3151     }
3152     for (i = 0; i < n - 1; i++) {
3153         for (j = i + 1; j < n; j++) {
3154             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3155             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3156             if (p1 < p2) {
3157                 int tmp = a[start + i].sort_index;
3158                 a[start + i].sort_index = a[start + j].sort_index;
3159                 a[start + j].sort_index = tmp;
3160             }
3161         }
3162     }
3163 }
3164 
3165 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3166 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3167 
3168 static void process_constraint_sets(void)
3169 {
3170     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3171         const TCGConstraintSet *tdefs = &constraint_sets[c];
3172         TCGArgConstraint *args_ct = all_cts[c];
3173         int nb_oargs = tdefs->nb_oargs;
3174         int nb_iargs = tdefs->nb_iargs;
3175         int nb_args = nb_oargs + nb_iargs;
3176         bool saw_alias_pair = false;
3177 
3178         for (int i = 0; i < nb_args; i++) {
3179             const char *ct_str = tdefs->args_ct_str[i];
3180             bool input_p = i >= nb_oargs;
3181             int o;
3182 
3183             switch (*ct_str) {
3184             case '0' ... '9':
3185                 o = *ct_str - '0';
3186                 tcg_debug_assert(input_p);
3187                 tcg_debug_assert(o < nb_oargs);
3188                 tcg_debug_assert(args_ct[o].regs != 0);
3189                 tcg_debug_assert(!args_ct[o].oalias);
3190                 args_ct[i] = args_ct[o];
3191                 /* The output sets oalias.  */
3192                 args_ct[o].oalias = 1;
3193                 args_ct[o].alias_index = i;
3194                 /* The input sets ialias. */
3195                 args_ct[i].ialias = 1;
3196                 args_ct[i].alias_index = o;
3197                 if (args_ct[i].pair) {
3198                     saw_alias_pair = true;
3199                 }
3200                 tcg_debug_assert(ct_str[1] == '\0');
3201                 continue;
3202 
3203             case '&':
3204                 tcg_debug_assert(!input_p);
3205                 args_ct[i].newreg = true;
3206                 ct_str++;
3207                 break;
3208 
3209             case 'p': /* plus */
3210                 /* Allocate to the register after the previous. */
3211                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3212                 o = i - 1;
3213                 tcg_debug_assert(!args_ct[o].pair);
3214                 tcg_debug_assert(!args_ct[o].ct);
3215                 args_ct[i] = (TCGArgConstraint){
3216                     .pair = 2,
3217                     .pair_index = o,
3218                     .regs = args_ct[o].regs << 1,
3219                     .newreg = args_ct[o].newreg,
3220                 };
3221                 args_ct[o].pair = 1;
3222                 args_ct[o].pair_index = i;
3223                 tcg_debug_assert(ct_str[1] == '\0');
3224                 continue;
3225 
3226             case 'm': /* minus */
3227                 /* Allocate to the register before the previous. */
3228                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3229                 o = i - 1;
3230                 tcg_debug_assert(!args_ct[o].pair);
3231                 tcg_debug_assert(!args_ct[o].ct);
3232                 args_ct[i] = (TCGArgConstraint){
3233                     .pair = 1,
3234                     .pair_index = o,
3235                     .regs = args_ct[o].regs >> 1,
3236                     .newreg = args_ct[o].newreg,
3237                 };
3238                 args_ct[o].pair = 2;
3239                 args_ct[o].pair_index = i;
3240                 tcg_debug_assert(ct_str[1] == '\0');
3241                 continue;
3242             }
3243 
3244             do {
3245                 switch (*ct_str) {
3246                 case 'i':
3247                     args_ct[i].ct |= TCG_CT_CONST;
3248                     break;
3249 #ifdef TCG_REG_ZERO
3250                 case 'z':
3251                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3252                     break;
3253 #endif
3254 
3255                 /* Include all of the target-specific constraints. */
3256 
3257 #undef CONST
3258 #define CONST(CASE, MASK) \
3259     case CASE: args_ct[i].ct |= MASK; break;
3260 #define REGS(CASE, MASK) \
3261     case CASE: args_ct[i].regs |= MASK; break;
3262 
3263 #include "tcg-target-con-str.h"
3264 
3265 #undef REGS
3266 #undef CONST
3267                 default:
3268                 case '0' ... '9':
3269                 case '&':
3270                 case 'p':
3271                 case 'm':
3272                     /* Typo in TCGConstraintSet constraint. */
3273                     g_assert_not_reached();
3274                 }
3275             } while (*++ct_str != '\0');
3276         }
3277 
3278         /*
3279          * Fix up output pairs that are aliased with inputs.
3280          * When we created the alias, we copied pair from the output.
3281          * There are three cases:
3282          *    (1a) Pairs of inputs alias pairs of outputs.
3283          *    (1b) One input aliases the first of a pair of outputs.
3284          *    (2)  One input aliases the second of a pair of outputs.
3285          *
3286          * Case 1a is handled by making sure that the pair_index'es are
3287          * properly updated so that they appear the same as a pair of inputs.
3288          *
3289          * Case 1b is handled by setting the pair_index of the input to
3290          * itself, simply so it doesn't point to an unrelated argument.
3291          * Since we don't encounter the "second" during the input allocation
3292          * phase, nothing happens with the second half of the input pair.
3293          *
3294          * Case 2 is handled by setting the second input to pair=3, the
3295          * first output to pair=3, and the pair_index'es to match.
3296          */
3297         if (saw_alias_pair) {
3298             for (int i = nb_oargs; i < nb_args; i++) {
3299                 int o, o2, i2;
3300 
3301                 /*
3302                  * Since [0-9pm] must be alone in the constraint string,
3303                  * the only way they can both be set is if the pair comes
3304                  * from the output alias.
3305                  */
3306                 if (!args_ct[i].ialias) {
3307                     continue;
3308                 }
3309                 switch (args_ct[i].pair) {
3310                 case 0:
3311                     break;
3312                 case 1:
3313                     o = args_ct[i].alias_index;
3314                     o2 = args_ct[o].pair_index;
3315                     tcg_debug_assert(args_ct[o].pair == 1);
3316                     tcg_debug_assert(args_ct[o2].pair == 2);
3317                     if (args_ct[o2].oalias) {
3318                         /* Case 1a */
3319                         i2 = args_ct[o2].alias_index;
3320                         tcg_debug_assert(args_ct[i2].pair == 2);
3321                         args_ct[i2].pair_index = i;
3322                         args_ct[i].pair_index = i2;
3323                     } else {
3324                         /* Case 1b */
3325                         args_ct[i].pair_index = i;
3326                     }
3327                     break;
3328                 case 2:
3329                     o = args_ct[i].alias_index;
3330                     o2 = args_ct[o].pair_index;
3331                     tcg_debug_assert(args_ct[o].pair == 2);
3332                     tcg_debug_assert(args_ct[o2].pair == 1);
3333                     if (args_ct[o2].oalias) {
3334                         /* Case 1a */
3335                         i2 = args_ct[o2].alias_index;
3336                         tcg_debug_assert(args_ct[i2].pair == 1);
3337                         args_ct[i2].pair_index = i;
3338                         args_ct[i].pair_index = i2;
3339                     } else {
3340                         /* Case 2 */
3341                         args_ct[i].pair = 3;
3342                         args_ct[o2].pair = 3;
3343                         args_ct[i].pair_index = o2;
3344                         args_ct[o2].pair_index = i;
3345                     }
3346                     break;
3347                 default:
3348                     g_assert_not_reached();
3349                 }
3350             }
3351         }
3352 
3353         /* sort the constraints (XXX: this is just an heuristic) */
3354         sort_constraints(args_ct, 0, nb_oargs);
3355         sort_constraints(args_ct, nb_oargs, nb_iargs);
3356     }
3357 }
3358 
3359 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3360 {
3361     TCGOpcode opc = op->opc;
3362     TCGType type = TCGOP_TYPE(op);
3363     unsigned flags = TCGOP_FLAGS(op);
3364     const TCGOpDef *def = &tcg_op_defs[opc];
3365     const TCGOutOp *outop = all_outop[opc];
3366     TCGConstraintSetIndex con_set;
3367 
3368     if (def->flags & TCG_OPF_NOT_PRESENT) {
3369         return empty_cts;
3370     }
3371 
3372     if (outop) {
3373         con_set = outop->static_constraint;
3374         if (con_set == C_Dynamic) {
3375             con_set = outop->dynamic_constraint(type, flags);
3376         }
3377     } else {
3378         con_set = tcg_target_op_def(opc, type, flags);
3379     }
3380     tcg_debug_assert(con_set >= 0);
3381     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3382 
3383     /* The constraint arguments must match TCGOpcode arguments. */
3384     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3385     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3386 
3387     return all_cts[con_set];
3388 }
3389 
3390 static void remove_label_use(TCGOp *op, int idx)
3391 {
3392     TCGLabel *label = arg_label(op->args[idx]);
3393     TCGLabelUse *use;
3394 
3395     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3396         if (use->op == op) {
3397             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3398             return;
3399         }
3400     }
3401     g_assert_not_reached();
3402 }
3403 
3404 void tcg_op_remove(TCGContext *s, TCGOp *op)
3405 {
3406     switch (op->opc) {
3407     case INDEX_op_br:
3408         remove_label_use(op, 0);
3409         break;
3410     case INDEX_op_brcond_i32:
3411     case INDEX_op_brcond_i64:
3412         remove_label_use(op, 3);
3413         break;
3414     case INDEX_op_brcond2_i32:
3415         remove_label_use(op, 5);
3416         break;
3417     default:
3418         break;
3419     }
3420 
3421     QTAILQ_REMOVE(&s->ops, op, link);
3422     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3423     s->nb_ops--;
3424 }
3425 
3426 void tcg_remove_ops_after(TCGOp *op)
3427 {
3428     TCGContext *s = tcg_ctx;
3429 
3430     while (true) {
3431         TCGOp *last = tcg_last_op();
3432         if (last == op) {
3433             return;
3434         }
3435         tcg_op_remove(s, last);
3436     }
3437 }
3438 
3439 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3440 {
3441     TCGContext *s = tcg_ctx;
3442     TCGOp *op = NULL;
3443 
3444     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3445         QTAILQ_FOREACH(op, &s->free_ops, link) {
3446             if (nargs <= op->nargs) {
3447                 QTAILQ_REMOVE(&s->free_ops, op, link);
3448                 nargs = op->nargs;
3449                 goto found;
3450             }
3451         }
3452     }
3453 
3454     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3455     nargs = MAX(4, nargs);
3456     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3457 
3458  found:
3459     memset(op, 0, offsetof(TCGOp, link));
3460     op->opc = opc;
3461     op->nargs = nargs;
3462 
3463     /* Check for bitfield overflow. */
3464     tcg_debug_assert(op->nargs == nargs);
3465 
3466     s->nb_ops++;
3467     return op;
3468 }
3469 
3470 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3471 {
3472     TCGOp *op = tcg_op_alloc(opc, nargs);
3473 
3474     if (tcg_ctx->emit_before_op) {
3475         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3476     } else {
3477         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3478     }
3479     return op;
3480 }
3481 
3482 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3483                             TCGOpcode opc, TCGType type, unsigned nargs)
3484 {
3485     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3486 
3487     TCGOP_TYPE(new_op) = type;
3488     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3489     return new_op;
3490 }
3491 
3492 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3493                            TCGOpcode opc, TCGType type, unsigned nargs)
3494 {
3495     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3496 
3497     TCGOP_TYPE(new_op) = type;
3498     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3499     return new_op;
3500 }
3501 
3502 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3503 {
3504     TCGLabelUse *u;
3505 
3506     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3507         TCGOp *op = u->op;
3508         switch (op->opc) {
3509         case INDEX_op_br:
3510             op->args[0] = label_arg(to);
3511             break;
3512         case INDEX_op_brcond_i32:
3513         case INDEX_op_brcond_i64:
3514             op->args[3] = label_arg(to);
3515             break;
3516         case INDEX_op_brcond2_i32:
3517             op->args[5] = label_arg(to);
3518             break;
3519         default:
3520             g_assert_not_reached();
3521         }
3522     }
3523 
3524     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3525 }
3526 
3527 /* Reachable analysis : remove unreachable code.  */
3528 static void __attribute__((noinline))
3529 reachable_code_pass(TCGContext *s)
3530 {
3531     TCGOp *op, *op_next, *op_prev;
3532     bool dead = false;
3533 
3534     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3535         bool remove = dead;
3536         TCGLabel *label;
3537 
3538         switch (op->opc) {
3539         case INDEX_op_set_label:
3540             label = arg_label(op->args[0]);
3541 
3542             /*
3543              * Note that the first op in the TB is always a load,
3544              * so there is always something before a label.
3545              */
3546             op_prev = QTAILQ_PREV(op, link);
3547 
3548             /*
3549              * If we find two sequential labels, move all branches to
3550              * reference the second label and remove the first label.
3551              * Do this before branch to next optimization, so that the
3552              * middle label is out of the way.
3553              */
3554             if (op_prev->opc == INDEX_op_set_label) {
3555                 move_label_uses(label, arg_label(op_prev->args[0]));
3556                 tcg_op_remove(s, op_prev);
3557                 op_prev = QTAILQ_PREV(op, link);
3558             }
3559 
3560             /*
3561              * Optimization can fold conditional branches to unconditional.
3562              * If we find a label which is preceded by an unconditional
3563              * branch to next, remove the branch.  We couldn't do this when
3564              * processing the branch because any dead code between the branch
3565              * and label had not yet been removed.
3566              */
3567             if (op_prev->opc == INDEX_op_br &&
3568                 label == arg_label(op_prev->args[0])) {
3569                 tcg_op_remove(s, op_prev);
3570                 /* Fall through means insns become live again.  */
3571                 dead = false;
3572             }
3573 
3574             if (QSIMPLEQ_EMPTY(&label->branches)) {
3575                 /*
3576                  * While there is an occasional backward branch, virtually
3577                  * all branches generated by the translators are forward.
3578                  * Which means that generally we will have already removed
3579                  * all references to the label that will be, and there is
3580                  * little to be gained by iterating.
3581                  */
3582                 remove = true;
3583             } else {
3584                 /* Once we see a label, insns become live again.  */
3585                 dead = false;
3586                 remove = false;
3587             }
3588             break;
3589 
3590         case INDEX_op_br:
3591         case INDEX_op_exit_tb:
3592         case INDEX_op_goto_ptr:
3593             /* Unconditional branches; everything following is dead.  */
3594             dead = true;
3595             break;
3596 
3597         case INDEX_op_call:
3598             /* Notice noreturn helper calls, raising exceptions.  */
3599             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3600                 dead = true;
3601             }
3602             break;
3603 
3604         case INDEX_op_insn_start:
3605             /* Never remove -- we need to keep these for unwind.  */
3606             remove = false;
3607             break;
3608 
3609         default:
3610             break;
3611         }
3612 
3613         if (remove) {
3614             tcg_op_remove(s, op);
3615         }
3616     }
3617 }
3618 
3619 #define TS_DEAD  1
3620 #define TS_MEM   2
3621 
3622 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3623 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3624 
3625 /* For liveness_pass_1, the register preferences for a given temp.  */
3626 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3627 {
3628     return ts->state_ptr;
3629 }
3630 
3631 /* For liveness_pass_1, reset the preferences for a given temp to the
3632  * maximal regset for its type.
3633  */
3634 static inline void la_reset_pref(TCGTemp *ts)
3635 {
3636     *la_temp_pref(ts)
3637         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3638 }
3639 
3640 /* liveness analysis: end of function: all temps are dead, and globals
3641    should be in memory. */
3642 static void la_func_end(TCGContext *s, int ng, int nt)
3643 {
3644     int i;
3645 
3646     for (i = 0; i < ng; ++i) {
3647         s->temps[i].state = TS_DEAD | TS_MEM;
3648         la_reset_pref(&s->temps[i]);
3649     }
3650     for (i = ng; i < nt; ++i) {
3651         s->temps[i].state = TS_DEAD;
3652         la_reset_pref(&s->temps[i]);
3653     }
3654 }
3655 
3656 /* liveness analysis: end of basic block: all temps are dead, globals
3657    and local temps should be in memory. */
3658 static void la_bb_end(TCGContext *s, int ng, int nt)
3659 {
3660     int i;
3661 
3662     for (i = 0; i < nt; ++i) {
3663         TCGTemp *ts = &s->temps[i];
3664         int state;
3665 
3666         switch (ts->kind) {
3667         case TEMP_FIXED:
3668         case TEMP_GLOBAL:
3669         case TEMP_TB:
3670             state = TS_DEAD | TS_MEM;
3671             break;
3672         case TEMP_EBB:
3673         case TEMP_CONST:
3674             state = TS_DEAD;
3675             break;
3676         default:
3677             g_assert_not_reached();
3678         }
3679         ts->state = state;
3680         la_reset_pref(ts);
3681     }
3682 }
3683 
3684 /* liveness analysis: sync globals back to memory.  */
3685 static void la_global_sync(TCGContext *s, int ng)
3686 {
3687     int i;
3688 
3689     for (i = 0; i < ng; ++i) {
3690         int state = s->temps[i].state;
3691         s->temps[i].state = state | TS_MEM;
3692         if (state == TS_DEAD) {
3693             /* If the global was previously dead, reset prefs.  */
3694             la_reset_pref(&s->temps[i]);
3695         }
3696     }
3697 }
3698 
3699 /*
3700  * liveness analysis: conditional branch: all temps are dead unless
3701  * explicitly live-across-conditional-branch, globals and local temps
3702  * should be synced.
3703  */
3704 static void la_bb_sync(TCGContext *s, int ng, int nt)
3705 {
3706     la_global_sync(s, ng);
3707 
3708     for (int i = ng; i < nt; ++i) {
3709         TCGTemp *ts = &s->temps[i];
3710         int state;
3711 
3712         switch (ts->kind) {
3713         case TEMP_TB:
3714             state = ts->state;
3715             ts->state = state | TS_MEM;
3716             if (state != TS_DEAD) {
3717                 continue;
3718             }
3719             break;
3720         case TEMP_EBB:
3721         case TEMP_CONST:
3722             continue;
3723         default:
3724             g_assert_not_reached();
3725         }
3726         la_reset_pref(&s->temps[i]);
3727     }
3728 }
3729 
3730 /* liveness analysis: sync globals back to memory and kill.  */
3731 static void la_global_kill(TCGContext *s, int ng)
3732 {
3733     int i;
3734 
3735     for (i = 0; i < ng; i++) {
3736         s->temps[i].state = TS_DEAD | TS_MEM;
3737         la_reset_pref(&s->temps[i]);
3738     }
3739 }
3740 
3741 /* liveness analysis: note live globals crossing calls.  */
3742 static void la_cross_call(TCGContext *s, int nt)
3743 {
3744     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3745     int i;
3746 
3747     for (i = 0; i < nt; i++) {
3748         TCGTemp *ts = &s->temps[i];
3749         if (!(ts->state & TS_DEAD)) {
3750             TCGRegSet *pset = la_temp_pref(ts);
3751             TCGRegSet set = *pset;
3752 
3753             set &= mask;
3754             /* If the combination is not possible, restart.  */
3755             if (set == 0) {
3756                 set = tcg_target_available_regs[ts->type] & mask;
3757             }
3758             *pset = set;
3759         }
3760     }
3761 }
3762 
3763 /*
3764  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3765  * to TEMP_EBB, if possible.
3766  */
3767 static void __attribute__((noinline))
3768 liveness_pass_0(TCGContext *s)
3769 {
3770     void * const multiple_ebb = (void *)(uintptr_t)-1;
3771     int nb_temps = s->nb_temps;
3772     TCGOp *op, *ebb;
3773 
3774     for (int i = s->nb_globals; i < nb_temps; ++i) {
3775         s->temps[i].state_ptr = NULL;
3776     }
3777 
3778     /*
3779      * Represent each EBB by the op at which it begins.  In the case of
3780      * the first EBB, this is the first op, otherwise it is a label.
3781      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3782      * within a single EBB, else MULTIPLE_EBB.
3783      */
3784     ebb = QTAILQ_FIRST(&s->ops);
3785     QTAILQ_FOREACH(op, &s->ops, link) {
3786         const TCGOpDef *def;
3787         int nb_oargs, nb_iargs;
3788 
3789         switch (op->opc) {
3790         case INDEX_op_set_label:
3791             ebb = op;
3792             continue;
3793         case INDEX_op_discard:
3794             continue;
3795         case INDEX_op_call:
3796             nb_oargs = TCGOP_CALLO(op);
3797             nb_iargs = TCGOP_CALLI(op);
3798             break;
3799         default:
3800             def = &tcg_op_defs[op->opc];
3801             nb_oargs = def->nb_oargs;
3802             nb_iargs = def->nb_iargs;
3803             break;
3804         }
3805 
3806         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3807             TCGTemp *ts = arg_temp(op->args[i]);
3808 
3809             if (ts->kind != TEMP_TB) {
3810                 continue;
3811             }
3812             if (ts->state_ptr == NULL) {
3813                 ts->state_ptr = ebb;
3814             } else if (ts->state_ptr != ebb) {
3815                 ts->state_ptr = multiple_ebb;
3816             }
3817         }
3818     }
3819 
3820     /*
3821      * For TEMP_TB that turned out not to be used beyond one EBB,
3822      * reduce the liveness to TEMP_EBB.
3823      */
3824     for (int i = s->nb_globals; i < nb_temps; ++i) {
3825         TCGTemp *ts = &s->temps[i];
3826         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3827             ts->kind = TEMP_EBB;
3828         }
3829     }
3830 }
3831 
3832 /* Liveness analysis : update the opc_arg_life array to tell if a
3833    given input arguments is dead. Instructions updating dead
3834    temporaries are removed. */
3835 static void __attribute__((noinline))
3836 liveness_pass_1(TCGContext *s)
3837 {
3838     int nb_globals = s->nb_globals;
3839     int nb_temps = s->nb_temps;
3840     TCGOp *op, *op_prev;
3841     TCGRegSet *prefs;
3842     int i;
3843 
3844     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3845     for (i = 0; i < nb_temps; ++i) {
3846         s->temps[i].state_ptr = prefs + i;
3847     }
3848 
3849     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3850     la_func_end(s, nb_globals, nb_temps);
3851 
3852     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3853         int nb_iargs, nb_oargs;
3854         TCGOpcode opc_new, opc_new2;
3855         TCGLifeData arg_life = 0;
3856         TCGTemp *ts;
3857         TCGOpcode opc = op->opc;
3858         const TCGOpDef *def = &tcg_op_defs[opc];
3859         const TCGArgConstraint *args_ct;
3860 
3861         switch (opc) {
3862         case INDEX_op_call:
3863             {
3864                 const TCGHelperInfo *info = tcg_call_info(op);
3865                 int call_flags = tcg_call_flags(op);
3866 
3867                 nb_oargs = TCGOP_CALLO(op);
3868                 nb_iargs = TCGOP_CALLI(op);
3869 
3870                 /* pure functions can be removed if their result is unused */
3871                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3872                     for (i = 0; i < nb_oargs; i++) {
3873                         ts = arg_temp(op->args[i]);
3874                         if (ts->state != TS_DEAD) {
3875                             goto do_not_remove_call;
3876                         }
3877                     }
3878                     goto do_remove;
3879                 }
3880             do_not_remove_call:
3881 
3882                 /* Output args are dead.  */
3883                 for (i = 0; i < nb_oargs; i++) {
3884                     ts = arg_temp(op->args[i]);
3885                     if (ts->state & TS_DEAD) {
3886                         arg_life |= DEAD_ARG << i;
3887                     }
3888                     if (ts->state & TS_MEM) {
3889                         arg_life |= SYNC_ARG << i;
3890                     }
3891                     ts->state = TS_DEAD;
3892                     la_reset_pref(ts);
3893                 }
3894 
3895                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3896                 memset(op->output_pref, 0, sizeof(op->output_pref));
3897 
3898                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3899                                     TCG_CALL_NO_READ_GLOBALS))) {
3900                     la_global_kill(s, nb_globals);
3901                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3902                     la_global_sync(s, nb_globals);
3903                 }
3904 
3905                 /* Record arguments that die in this helper.  */
3906                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3907                     ts = arg_temp(op->args[i]);
3908                     if (ts->state & TS_DEAD) {
3909                         arg_life |= DEAD_ARG << i;
3910                     }
3911                 }
3912 
3913                 /* For all live registers, remove call-clobbered prefs.  */
3914                 la_cross_call(s, nb_temps);
3915 
3916                 /*
3917                  * Input arguments are live for preceding opcodes.
3918                  *
3919                  * For those arguments that die, and will be allocated in
3920                  * registers, clear the register set for that arg, to be
3921                  * filled in below.  For args that will be on the stack,
3922                  * reset to any available reg.  Process arguments in reverse
3923                  * order so that if a temp is used more than once, the stack
3924                  * reset to max happens before the register reset to 0.
3925                  */
3926                 for (i = nb_iargs - 1; i >= 0; i--) {
3927                     const TCGCallArgumentLoc *loc = &info->in[i];
3928                     ts = arg_temp(op->args[nb_oargs + i]);
3929 
3930                     if (ts->state & TS_DEAD) {
3931                         switch (loc->kind) {
3932                         case TCG_CALL_ARG_NORMAL:
3933                         case TCG_CALL_ARG_EXTEND_U:
3934                         case TCG_CALL_ARG_EXTEND_S:
3935                             if (arg_slot_reg_p(loc->arg_slot)) {
3936                                 *la_temp_pref(ts) = 0;
3937                                 break;
3938                             }
3939                             /* fall through */
3940                         default:
3941                             *la_temp_pref(ts) =
3942                                 tcg_target_available_regs[ts->type];
3943                             break;
3944                         }
3945                         ts->state &= ~TS_DEAD;
3946                     }
3947                 }
3948 
3949                 /*
3950                  * For each input argument, add its input register to prefs.
3951                  * If a temp is used once, this produces a single set bit;
3952                  * if a temp is used multiple times, this produces a set.
3953                  */
3954                 for (i = 0; i < nb_iargs; i++) {
3955                     const TCGCallArgumentLoc *loc = &info->in[i];
3956                     ts = arg_temp(op->args[nb_oargs + i]);
3957 
3958                     switch (loc->kind) {
3959                     case TCG_CALL_ARG_NORMAL:
3960                     case TCG_CALL_ARG_EXTEND_U:
3961                     case TCG_CALL_ARG_EXTEND_S:
3962                         if (arg_slot_reg_p(loc->arg_slot)) {
3963                             tcg_regset_set_reg(*la_temp_pref(ts),
3964                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3965                         }
3966                         break;
3967                     default:
3968                         break;
3969                     }
3970                 }
3971             }
3972             break;
3973         case INDEX_op_insn_start:
3974             break;
3975         case INDEX_op_discard:
3976             /* mark the temporary as dead */
3977             ts = arg_temp(op->args[0]);
3978             ts->state = TS_DEAD;
3979             la_reset_pref(ts);
3980             break;
3981 
3982         case INDEX_op_add2_i32:
3983         case INDEX_op_add2_i64:
3984             opc_new = INDEX_op_add;
3985             goto do_addsub2;
3986         case INDEX_op_sub2_i32:
3987         case INDEX_op_sub2_i64:
3988             opc_new = INDEX_op_sub;
3989         do_addsub2:
3990             nb_iargs = 4;
3991             nb_oargs = 2;
3992             /* Test if the high part of the operation is dead, but not
3993                the low part.  The result can be optimized to a simple
3994                add or sub.  This happens often for x86_64 guest when the
3995                cpu mode is set to 32 bit.  */
3996             if (arg_temp(op->args[1])->state == TS_DEAD) {
3997                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3998                     goto do_remove;
3999                 }
4000                 /* Replace the opcode and adjust the args in place,
4001                    leaving 3 unused args at the end.  */
4002                 op->opc = opc = opc_new;
4003                 op->args[1] = op->args[2];
4004                 op->args[2] = op->args[4];
4005                 /* Fall through and mark the single-word operation live.  */
4006                 nb_iargs = 2;
4007                 nb_oargs = 1;
4008             }
4009             goto do_not_remove;
4010 
4011         case INDEX_op_muls2:
4012             opc_new = INDEX_op_mul;
4013             opc_new2 = INDEX_op_mulsh;
4014             goto do_mul2;
4015         case INDEX_op_mulu2:
4016             opc_new = INDEX_op_mul;
4017             opc_new2 = INDEX_op_muluh;
4018         do_mul2:
4019             nb_iargs = 2;
4020             nb_oargs = 2;
4021             if (arg_temp(op->args[1])->state == TS_DEAD) {
4022                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4023                     /* Both parts of the operation are dead.  */
4024                     goto do_remove;
4025                 }
4026                 /* The high part of the operation is dead; generate the low. */
4027                 op->opc = opc = opc_new;
4028                 op->args[1] = op->args[2];
4029                 op->args[2] = op->args[3];
4030             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4031                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4032                 /* The low part of the operation is dead; generate the high. */
4033                 op->opc = opc = opc_new2;
4034                 op->args[0] = op->args[1];
4035                 op->args[1] = op->args[2];
4036                 op->args[2] = op->args[3];
4037             } else {
4038                 goto do_not_remove;
4039             }
4040             /* Mark the single-word operation live.  */
4041             nb_oargs = 1;
4042             goto do_not_remove;
4043 
4044         default:
4045             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4046             nb_iargs = def->nb_iargs;
4047             nb_oargs = def->nb_oargs;
4048 
4049             /* Test if the operation can be removed because all
4050                its outputs are dead. We assume that nb_oargs == 0
4051                implies side effects */
4052             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4053                 for (i = 0; i < nb_oargs; i++) {
4054                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4055                         goto do_not_remove;
4056                     }
4057                 }
4058                 goto do_remove;
4059             }
4060             goto do_not_remove;
4061 
4062         do_remove:
4063             tcg_op_remove(s, op);
4064             break;
4065 
4066         do_not_remove:
4067             for (i = 0; i < nb_oargs; i++) {
4068                 ts = arg_temp(op->args[i]);
4069 
4070                 /* Remember the preference of the uses that followed.  */
4071                 if (i < ARRAY_SIZE(op->output_pref)) {
4072                     op->output_pref[i] = *la_temp_pref(ts);
4073                 }
4074 
4075                 /* Output args are dead.  */
4076                 if (ts->state & TS_DEAD) {
4077                     arg_life |= DEAD_ARG << i;
4078                 }
4079                 if (ts->state & TS_MEM) {
4080                     arg_life |= SYNC_ARG << i;
4081                 }
4082                 ts->state = TS_DEAD;
4083                 la_reset_pref(ts);
4084             }
4085 
4086             /* If end of basic block, update.  */
4087             if (def->flags & TCG_OPF_BB_EXIT) {
4088                 la_func_end(s, nb_globals, nb_temps);
4089             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4090                 la_bb_sync(s, nb_globals, nb_temps);
4091             } else if (def->flags & TCG_OPF_BB_END) {
4092                 la_bb_end(s, nb_globals, nb_temps);
4093             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4094                 la_global_sync(s, nb_globals);
4095                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4096                     la_cross_call(s, nb_temps);
4097                 }
4098             }
4099 
4100             /* Record arguments that die in this opcode.  */
4101             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4102                 ts = arg_temp(op->args[i]);
4103                 if (ts->state & TS_DEAD) {
4104                     arg_life |= DEAD_ARG << i;
4105                 }
4106             }
4107 
4108             /* Input arguments are live for preceding opcodes.  */
4109             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4110                 ts = arg_temp(op->args[i]);
4111                 if (ts->state & TS_DEAD) {
4112                     /* For operands that were dead, initially allow
4113                        all regs for the type.  */
4114                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4115                     ts->state &= ~TS_DEAD;
4116                 }
4117             }
4118 
4119             /* Incorporate constraints for this operand.  */
4120             switch (opc) {
4121             case INDEX_op_mov:
4122                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4123                    have proper constraints.  That said, special case
4124                    moves to propagate preferences backward.  */
4125                 if (IS_DEAD_ARG(1)) {
4126                     *la_temp_pref(arg_temp(op->args[0]))
4127                         = *la_temp_pref(arg_temp(op->args[1]));
4128                 }
4129                 break;
4130 
4131             default:
4132                 args_ct = opcode_args_ct(op);
4133                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4134                     const TCGArgConstraint *ct = &args_ct[i];
4135                     TCGRegSet set, *pset;
4136 
4137                     ts = arg_temp(op->args[i]);
4138                     pset = la_temp_pref(ts);
4139                     set = *pset;
4140 
4141                     set &= ct->regs;
4142                     if (ct->ialias) {
4143                         set &= output_pref(op, ct->alias_index);
4144                     }
4145                     /* If the combination is not possible, restart.  */
4146                     if (set == 0) {
4147                         set = ct->regs;
4148                     }
4149                     *pset = set;
4150                 }
4151                 break;
4152             }
4153             break;
4154         }
4155         op->life = arg_life;
4156     }
4157 }
4158 
4159 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4160 static bool __attribute__((noinline))
4161 liveness_pass_2(TCGContext *s)
4162 {
4163     int nb_globals = s->nb_globals;
4164     int nb_temps, i;
4165     bool changes = false;
4166     TCGOp *op, *op_next;
4167 
4168     /* Create a temporary for each indirect global.  */
4169     for (i = 0; i < nb_globals; ++i) {
4170         TCGTemp *its = &s->temps[i];
4171         if (its->indirect_reg) {
4172             TCGTemp *dts = tcg_temp_alloc(s);
4173             dts->type = its->type;
4174             dts->base_type = its->base_type;
4175             dts->temp_subindex = its->temp_subindex;
4176             dts->kind = TEMP_EBB;
4177             its->state_ptr = dts;
4178         } else {
4179             its->state_ptr = NULL;
4180         }
4181         /* All globals begin dead.  */
4182         its->state = TS_DEAD;
4183     }
4184     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4185         TCGTemp *its = &s->temps[i];
4186         its->state_ptr = NULL;
4187         its->state = TS_DEAD;
4188     }
4189 
4190     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4191         TCGOpcode opc = op->opc;
4192         const TCGOpDef *def = &tcg_op_defs[opc];
4193         TCGLifeData arg_life = op->life;
4194         int nb_iargs, nb_oargs, call_flags;
4195         TCGTemp *arg_ts, *dir_ts;
4196 
4197         if (opc == INDEX_op_call) {
4198             nb_oargs = TCGOP_CALLO(op);
4199             nb_iargs = TCGOP_CALLI(op);
4200             call_flags = tcg_call_flags(op);
4201         } else {
4202             nb_iargs = def->nb_iargs;
4203             nb_oargs = def->nb_oargs;
4204 
4205             /* Set flags similar to how calls require.  */
4206             if (def->flags & TCG_OPF_COND_BRANCH) {
4207                 /* Like reading globals: sync_globals */
4208                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4209             } else if (def->flags & TCG_OPF_BB_END) {
4210                 /* Like writing globals: save_globals */
4211                 call_flags = 0;
4212             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4213                 /* Like reading globals: sync_globals */
4214                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4215             } else {
4216                 /* No effect on globals.  */
4217                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4218                               TCG_CALL_NO_WRITE_GLOBALS);
4219             }
4220         }
4221 
4222         /* Make sure that input arguments are available.  */
4223         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4224             arg_ts = arg_temp(op->args[i]);
4225             dir_ts = arg_ts->state_ptr;
4226             if (dir_ts && arg_ts->state == TS_DEAD) {
4227                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4228                                   ? INDEX_op_ld_i32
4229                                   : INDEX_op_ld_i64);
4230                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4231                                                   arg_ts->type, 3);
4232 
4233                 lop->args[0] = temp_arg(dir_ts);
4234                 lop->args[1] = temp_arg(arg_ts->mem_base);
4235                 lop->args[2] = arg_ts->mem_offset;
4236 
4237                 /* Loaded, but synced with memory.  */
4238                 arg_ts->state = TS_MEM;
4239             }
4240         }
4241 
4242         /* Perform input replacement, and mark inputs that became dead.
4243            No action is required except keeping temp_state up to date
4244            so that we reload when needed.  */
4245         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4246             arg_ts = arg_temp(op->args[i]);
4247             dir_ts = arg_ts->state_ptr;
4248             if (dir_ts) {
4249                 op->args[i] = temp_arg(dir_ts);
4250                 changes = true;
4251                 if (IS_DEAD_ARG(i)) {
4252                     arg_ts->state = TS_DEAD;
4253                 }
4254             }
4255         }
4256 
4257         /* Liveness analysis should ensure that the following are
4258            all correct, for call sites and basic block end points.  */
4259         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4260             /* Nothing to do */
4261         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4262             for (i = 0; i < nb_globals; ++i) {
4263                 /* Liveness should see that globals are synced back,
4264                    that is, either TS_DEAD or TS_MEM.  */
4265                 arg_ts = &s->temps[i];
4266                 tcg_debug_assert(arg_ts->state_ptr == 0
4267                                  || arg_ts->state != 0);
4268             }
4269         } else {
4270             for (i = 0; i < nb_globals; ++i) {
4271                 /* Liveness should see that globals are saved back,
4272                    that is, TS_DEAD, waiting to be reloaded.  */
4273                 arg_ts = &s->temps[i];
4274                 tcg_debug_assert(arg_ts->state_ptr == 0
4275                                  || arg_ts->state == TS_DEAD);
4276             }
4277         }
4278 
4279         /* Outputs become available.  */
4280         if (opc == INDEX_op_mov) {
4281             arg_ts = arg_temp(op->args[0]);
4282             dir_ts = arg_ts->state_ptr;
4283             if (dir_ts) {
4284                 op->args[0] = temp_arg(dir_ts);
4285                 changes = true;
4286 
4287                 /* The output is now live and modified.  */
4288                 arg_ts->state = 0;
4289 
4290                 if (NEED_SYNC_ARG(0)) {
4291                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4292                                       ? INDEX_op_st_i32
4293                                       : INDEX_op_st_i64);
4294                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4295                                                      arg_ts->type, 3);
4296                     TCGTemp *out_ts = dir_ts;
4297 
4298                     if (IS_DEAD_ARG(0)) {
4299                         out_ts = arg_temp(op->args[1]);
4300                         arg_ts->state = TS_DEAD;
4301                         tcg_op_remove(s, op);
4302                     } else {
4303                         arg_ts->state = TS_MEM;
4304                     }
4305 
4306                     sop->args[0] = temp_arg(out_ts);
4307                     sop->args[1] = temp_arg(arg_ts->mem_base);
4308                     sop->args[2] = arg_ts->mem_offset;
4309                 } else {
4310                     tcg_debug_assert(!IS_DEAD_ARG(0));
4311                 }
4312             }
4313         } else {
4314             for (i = 0; i < nb_oargs; i++) {
4315                 arg_ts = arg_temp(op->args[i]);
4316                 dir_ts = arg_ts->state_ptr;
4317                 if (!dir_ts) {
4318                     continue;
4319                 }
4320                 op->args[i] = temp_arg(dir_ts);
4321                 changes = true;
4322 
4323                 /* The output is now live and modified.  */
4324                 arg_ts->state = 0;
4325 
4326                 /* Sync outputs upon their last write.  */
4327                 if (NEED_SYNC_ARG(i)) {
4328                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4329                                       ? INDEX_op_st_i32
4330                                       : INDEX_op_st_i64);
4331                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4332                                                      arg_ts->type, 3);
4333 
4334                     sop->args[0] = temp_arg(dir_ts);
4335                     sop->args[1] = temp_arg(arg_ts->mem_base);
4336                     sop->args[2] = arg_ts->mem_offset;
4337 
4338                     arg_ts->state = TS_MEM;
4339                 }
4340                 /* Drop outputs that are dead.  */
4341                 if (IS_DEAD_ARG(i)) {
4342                     arg_ts->state = TS_DEAD;
4343                 }
4344             }
4345         }
4346     }
4347 
4348     return changes;
4349 }
4350 
4351 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4352 {
4353     intptr_t off;
4354     int size, align;
4355 
4356     /* When allocating an object, look at the full type. */
4357     size = tcg_type_size(ts->base_type);
4358     switch (ts->base_type) {
4359     case TCG_TYPE_I32:
4360         align = 4;
4361         break;
4362     case TCG_TYPE_I64:
4363     case TCG_TYPE_V64:
4364         align = 8;
4365         break;
4366     case TCG_TYPE_I128:
4367     case TCG_TYPE_V128:
4368     case TCG_TYPE_V256:
4369         /*
4370          * Note that we do not require aligned storage for V256,
4371          * and that we provide alignment for I128 to match V128,
4372          * even if that's above what the host ABI requires.
4373          */
4374         align = 16;
4375         break;
4376     default:
4377         g_assert_not_reached();
4378     }
4379 
4380     /*
4381      * Assume the stack is sufficiently aligned.
4382      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4383      * and do not require 16 byte vector alignment.  This seems slightly
4384      * easier than fully parameterizing the above switch statement.
4385      */
4386     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4387     off = ROUND_UP(s->current_frame_offset, align);
4388 
4389     /* If we've exhausted the stack frame, restart with a smaller TB. */
4390     if (off + size > s->frame_end) {
4391         tcg_raise_tb_overflow(s);
4392     }
4393     s->current_frame_offset = off + size;
4394 #if defined(__sparc__)
4395     off += TCG_TARGET_STACK_BIAS;
4396 #endif
4397 
4398     /* If the object was subdivided, assign memory to all the parts. */
4399     if (ts->base_type != ts->type) {
4400         int part_size = tcg_type_size(ts->type);
4401         int part_count = size / part_size;
4402 
4403         /*
4404          * Each part is allocated sequentially in tcg_temp_new_internal.
4405          * Jump back to the first part by subtracting the current index.
4406          */
4407         ts -= ts->temp_subindex;
4408         for (int i = 0; i < part_count; ++i) {
4409             ts[i].mem_offset = off + i * part_size;
4410             ts[i].mem_base = s->frame_temp;
4411             ts[i].mem_allocated = 1;
4412         }
4413     } else {
4414         ts->mem_offset = off;
4415         ts->mem_base = s->frame_temp;
4416         ts->mem_allocated = 1;
4417     }
4418 }
4419 
4420 /* Assign @reg to @ts, and update reg_to_temp[]. */
4421 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4422 {
4423     if (ts->val_type == TEMP_VAL_REG) {
4424         TCGReg old = ts->reg;
4425         tcg_debug_assert(s->reg_to_temp[old] == ts);
4426         if (old == reg) {
4427             return;
4428         }
4429         s->reg_to_temp[old] = NULL;
4430     }
4431     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4432     s->reg_to_temp[reg] = ts;
4433     ts->val_type = TEMP_VAL_REG;
4434     ts->reg = reg;
4435 }
4436 
4437 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4438 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4439 {
4440     tcg_debug_assert(type != TEMP_VAL_REG);
4441     if (ts->val_type == TEMP_VAL_REG) {
4442         TCGReg reg = ts->reg;
4443         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4444         s->reg_to_temp[reg] = NULL;
4445     }
4446     ts->val_type = type;
4447 }
4448 
4449 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4450 
4451 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4452    mark it free; otherwise mark it dead.  */
4453 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4454 {
4455     TCGTempVal new_type;
4456 
4457     switch (ts->kind) {
4458     case TEMP_FIXED:
4459         return;
4460     case TEMP_GLOBAL:
4461     case TEMP_TB:
4462         new_type = TEMP_VAL_MEM;
4463         break;
4464     case TEMP_EBB:
4465         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4466         break;
4467     case TEMP_CONST:
4468         new_type = TEMP_VAL_CONST;
4469         break;
4470     default:
4471         g_assert_not_reached();
4472     }
4473     set_temp_val_nonreg(s, ts, new_type);
4474 }
4475 
4476 /* Mark a temporary as dead.  */
4477 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4478 {
4479     temp_free_or_dead(s, ts, 1);
4480 }
4481 
4482 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4483    registers needs to be allocated to store a constant.  If 'free_or_dead'
4484    is non-zero, subsequently release the temporary; if it is positive, the
4485    temp is dead; if it is negative, the temp is free.  */
4486 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4487                       TCGRegSet preferred_regs, int free_or_dead)
4488 {
4489     if (!temp_readonly(ts) && !ts->mem_coherent) {
4490         if (!ts->mem_allocated) {
4491             temp_allocate_frame(s, ts);
4492         }
4493         switch (ts->val_type) {
4494         case TEMP_VAL_CONST:
4495             /* If we're going to free the temp immediately, then we won't
4496                require it later in a register, so attempt to store the
4497                constant to memory directly.  */
4498             if (free_or_dead
4499                 && tcg_out_sti(s, ts->type, ts->val,
4500                                ts->mem_base->reg, ts->mem_offset)) {
4501                 break;
4502             }
4503             temp_load(s, ts, tcg_target_available_regs[ts->type],
4504                       allocated_regs, preferred_regs);
4505             /* fallthrough */
4506 
4507         case TEMP_VAL_REG:
4508             tcg_out_st(s, ts->type, ts->reg,
4509                        ts->mem_base->reg, ts->mem_offset);
4510             break;
4511 
4512         case TEMP_VAL_MEM:
4513             break;
4514 
4515         case TEMP_VAL_DEAD:
4516         default:
4517             g_assert_not_reached();
4518         }
4519         ts->mem_coherent = 1;
4520     }
4521     if (free_or_dead) {
4522         temp_free_or_dead(s, ts, free_or_dead);
4523     }
4524 }
4525 
4526 /* free register 'reg' by spilling the corresponding temporary if necessary */
4527 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4528 {
4529     TCGTemp *ts = s->reg_to_temp[reg];
4530     if (ts != NULL) {
4531         temp_sync(s, ts, allocated_regs, 0, -1);
4532     }
4533 }
4534 
4535 /**
4536  * tcg_reg_alloc:
4537  * @required_regs: Set of registers in which we must allocate.
4538  * @allocated_regs: Set of registers which must be avoided.
4539  * @preferred_regs: Set of registers we should prefer.
4540  * @rev: True if we search the registers in "indirect" order.
4541  *
4542  * The allocated register must be in @required_regs & ~@allocated_regs,
4543  * but if we can put it in @preferred_regs we may save a move later.
4544  */
4545 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4546                             TCGRegSet allocated_regs,
4547                             TCGRegSet preferred_regs, bool rev)
4548 {
4549     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4550     TCGRegSet reg_ct[2];
4551     const int *order;
4552 
4553     reg_ct[1] = required_regs & ~allocated_regs;
4554     tcg_debug_assert(reg_ct[1] != 0);
4555     reg_ct[0] = reg_ct[1] & preferred_regs;
4556 
4557     /* Skip the preferred_regs option if it cannot be satisfied,
4558        or if the preference made no difference.  */
4559     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4560 
4561     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4562 
4563     /* Try free registers, preferences first.  */
4564     for (j = f; j < 2; j++) {
4565         TCGRegSet set = reg_ct[j];
4566 
4567         if (tcg_regset_single(set)) {
4568             /* One register in the set.  */
4569             TCGReg reg = tcg_regset_first(set);
4570             if (s->reg_to_temp[reg] == NULL) {
4571                 return reg;
4572             }
4573         } else {
4574             for (i = 0; i < n; i++) {
4575                 TCGReg reg = order[i];
4576                 if (s->reg_to_temp[reg] == NULL &&
4577                     tcg_regset_test_reg(set, reg)) {
4578                     return reg;
4579                 }
4580             }
4581         }
4582     }
4583 
4584     /* We must spill something.  */
4585     for (j = f; j < 2; j++) {
4586         TCGRegSet set = reg_ct[j];
4587 
4588         if (tcg_regset_single(set)) {
4589             /* One register in the set.  */
4590             TCGReg reg = tcg_regset_first(set);
4591             tcg_reg_free(s, reg, allocated_regs);
4592             return reg;
4593         } else {
4594             for (i = 0; i < n; i++) {
4595                 TCGReg reg = order[i];
4596                 if (tcg_regset_test_reg(set, reg)) {
4597                     tcg_reg_free(s, reg, allocated_regs);
4598                     return reg;
4599                 }
4600             }
4601         }
4602     }
4603 
4604     g_assert_not_reached();
4605 }
4606 
4607 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4608                                  TCGRegSet allocated_regs,
4609                                  TCGRegSet preferred_regs, bool rev)
4610 {
4611     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4612     TCGRegSet reg_ct[2];
4613     const int *order;
4614 
4615     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4616     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4617     tcg_debug_assert(reg_ct[1] != 0);
4618     reg_ct[0] = reg_ct[1] & preferred_regs;
4619 
4620     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4621 
4622     /*
4623      * Skip the preferred_regs option if it cannot be satisfied,
4624      * or if the preference made no difference.
4625      */
4626     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4627 
4628     /*
4629      * Minimize the number of flushes by looking for 2 free registers first,
4630      * then a single flush, then two flushes.
4631      */
4632     for (fmin = 2; fmin >= 0; fmin--) {
4633         for (j = k; j < 2; j++) {
4634             TCGRegSet set = reg_ct[j];
4635 
4636             for (i = 0; i < n; i++) {
4637                 TCGReg reg = order[i];
4638 
4639                 if (tcg_regset_test_reg(set, reg)) {
4640                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4641                     if (f >= fmin) {
4642                         tcg_reg_free(s, reg, allocated_regs);
4643                         tcg_reg_free(s, reg + 1, allocated_regs);
4644                         return reg;
4645                     }
4646                 }
4647             }
4648         }
4649     }
4650     g_assert_not_reached();
4651 }
4652 
4653 /* Make sure the temporary is in a register.  If needed, allocate the register
4654    from DESIRED while avoiding ALLOCATED.  */
4655 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4656                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4657 {
4658     TCGReg reg;
4659 
4660     switch (ts->val_type) {
4661     case TEMP_VAL_REG:
4662         return;
4663     case TEMP_VAL_CONST:
4664         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4665                             preferred_regs, ts->indirect_base);
4666         if (ts->type <= TCG_TYPE_I64) {
4667             tcg_out_movi(s, ts->type, reg, ts->val);
4668         } else {
4669             uint64_t val = ts->val;
4670             MemOp vece = MO_64;
4671 
4672             /*
4673              * Find the minimal vector element that matches the constant.
4674              * The targets will, in general, have to do this search anyway,
4675              * do this generically.
4676              */
4677             if (val == dup_const(MO_8, val)) {
4678                 vece = MO_8;
4679             } else if (val == dup_const(MO_16, val)) {
4680                 vece = MO_16;
4681             } else if (val == dup_const(MO_32, val)) {
4682                 vece = MO_32;
4683             }
4684 
4685             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4686         }
4687         ts->mem_coherent = 0;
4688         break;
4689     case TEMP_VAL_MEM:
4690         if (!ts->mem_allocated) {
4691             temp_allocate_frame(s, ts);
4692         }
4693         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4694                             preferred_regs, ts->indirect_base);
4695         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4696         ts->mem_coherent = 1;
4697         break;
4698     case TEMP_VAL_DEAD:
4699     default:
4700         g_assert_not_reached();
4701     }
4702     set_temp_val_reg(s, ts, reg);
4703 }
4704 
4705 /* Save a temporary to memory. 'allocated_regs' is used in case a
4706    temporary registers needs to be allocated to store a constant.  */
4707 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4708 {
4709     /* The liveness analysis already ensures that globals are back
4710        in memory. Keep an tcg_debug_assert for safety. */
4711     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4712 }
4713 
4714 /* save globals to their canonical location and assume they can be
4715    modified be the following code. 'allocated_regs' is used in case a
4716    temporary registers needs to be allocated to store a constant. */
4717 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4718 {
4719     int i, n;
4720 
4721     for (i = 0, n = s->nb_globals; i < n; i++) {
4722         temp_save(s, &s->temps[i], allocated_regs);
4723     }
4724 }
4725 
4726 /* sync globals to their canonical location and assume they can be
4727    read by the following code. 'allocated_regs' is used in case a
4728    temporary registers needs to be allocated to store a constant. */
4729 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4730 {
4731     int i, n;
4732 
4733     for (i = 0, n = s->nb_globals; i < n; i++) {
4734         TCGTemp *ts = &s->temps[i];
4735         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4736                          || ts->kind == TEMP_FIXED
4737                          || ts->mem_coherent);
4738     }
4739 }
4740 
4741 /* at the end of a basic block, we assume all temporaries are dead and
4742    all globals are stored at their canonical location. */
4743 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4744 {
4745     int i;
4746 
4747     for (i = s->nb_globals; i < s->nb_temps; i++) {
4748         TCGTemp *ts = &s->temps[i];
4749 
4750         switch (ts->kind) {
4751         case TEMP_TB:
4752             temp_save(s, ts, allocated_regs);
4753             break;
4754         case TEMP_EBB:
4755             /* The liveness analysis already ensures that temps are dead.
4756                Keep an tcg_debug_assert for safety. */
4757             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4758             break;
4759         case TEMP_CONST:
4760             /* Similarly, we should have freed any allocated register. */
4761             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4762             break;
4763         default:
4764             g_assert_not_reached();
4765         }
4766     }
4767 
4768     save_globals(s, allocated_regs);
4769 }
4770 
4771 /*
4772  * At a conditional branch, we assume all temporaries are dead unless
4773  * explicitly live-across-conditional-branch; all globals and local
4774  * temps are synced to their location.
4775  */
4776 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4777 {
4778     sync_globals(s, allocated_regs);
4779 
4780     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4781         TCGTemp *ts = &s->temps[i];
4782         /*
4783          * The liveness analysis already ensures that temps are dead.
4784          * Keep tcg_debug_asserts for safety.
4785          */
4786         switch (ts->kind) {
4787         case TEMP_TB:
4788             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4789             break;
4790         case TEMP_EBB:
4791         case TEMP_CONST:
4792             break;
4793         default:
4794             g_assert_not_reached();
4795         }
4796     }
4797 }
4798 
4799 /*
4800  * Specialized code generation for INDEX_op_mov_* with a constant.
4801  */
4802 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4803                                   tcg_target_ulong val, TCGLifeData arg_life,
4804                                   TCGRegSet preferred_regs)
4805 {
4806     /* ENV should not be modified.  */
4807     tcg_debug_assert(!temp_readonly(ots));
4808 
4809     /* The movi is not explicitly generated here.  */
4810     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4811     ots->val = val;
4812     ots->mem_coherent = 0;
4813     if (NEED_SYNC_ARG(0)) {
4814         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4815     } else if (IS_DEAD_ARG(0)) {
4816         temp_dead(s, ots);
4817     }
4818 }
4819 
4820 /*
4821  * Specialized code generation for INDEX_op_mov_*.
4822  */
4823 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4824 {
4825     const TCGLifeData arg_life = op->life;
4826     TCGRegSet allocated_regs, preferred_regs;
4827     TCGTemp *ts, *ots;
4828     TCGType otype, itype;
4829     TCGReg oreg, ireg;
4830 
4831     allocated_regs = s->reserved_regs;
4832     preferred_regs = output_pref(op, 0);
4833     ots = arg_temp(op->args[0]);
4834     ts = arg_temp(op->args[1]);
4835 
4836     /* ENV should not be modified.  */
4837     tcg_debug_assert(!temp_readonly(ots));
4838 
4839     /* Note that otype != itype for no-op truncation.  */
4840     otype = ots->type;
4841     itype = ts->type;
4842 
4843     if (ts->val_type == TEMP_VAL_CONST) {
4844         /* propagate constant or generate sti */
4845         tcg_target_ulong val = ts->val;
4846         if (IS_DEAD_ARG(1)) {
4847             temp_dead(s, ts);
4848         }
4849         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4850         return;
4851     }
4852 
4853     /* If the source value is in memory we're going to be forced
4854        to have it in a register in order to perform the copy.  Copy
4855        the SOURCE value into its own register first, that way we
4856        don't have to reload SOURCE the next time it is used. */
4857     if (ts->val_type == TEMP_VAL_MEM) {
4858         temp_load(s, ts, tcg_target_available_regs[itype],
4859                   allocated_regs, preferred_regs);
4860     }
4861     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4862     ireg = ts->reg;
4863 
4864     if (IS_DEAD_ARG(0)) {
4865         /* mov to a non-saved dead register makes no sense (even with
4866            liveness analysis disabled). */
4867         tcg_debug_assert(NEED_SYNC_ARG(0));
4868         if (!ots->mem_allocated) {
4869             temp_allocate_frame(s, ots);
4870         }
4871         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4872         if (IS_DEAD_ARG(1)) {
4873             temp_dead(s, ts);
4874         }
4875         temp_dead(s, ots);
4876         return;
4877     }
4878 
4879     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4880         /*
4881          * The mov can be suppressed.  Kill input first, so that it
4882          * is unlinked from reg_to_temp, then set the output to the
4883          * reg that we saved from the input.
4884          */
4885         temp_dead(s, ts);
4886         oreg = ireg;
4887     } else {
4888         if (ots->val_type == TEMP_VAL_REG) {
4889             oreg = ots->reg;
4890         } else {
4891             /* Make sure to not spill the input register during allocation. */
4892             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4893                                  allocated_regs | ((TCGRegSet)1 << ireg),
4894                                  preferred_regs, ots->indirect_base);
4895         }
4896         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4897             /*
4898              * Cross register class move not supported.
4899              * Store the source register into the destination slot
4900              * and leave the destination temp as TEMP_VAL_MEM.
4901              */
4902             assert(!temp_readonly(ots));
4903             if (!ts->mem_allocated) {
4904                 temp_allocate_frame(s, ots);
4905             }
4906             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4907             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4908             ots->mem_coherent = 1;
4909             return;
4910         }
4911     }
4912     set_temp_val_reg(s, ots, oreg);
4913     ots->mem_coherent = 0;
4914 
4915     if (NEED_SYNC_ARG(0)) {
4916         temp_sync(s, ots, allocated_regs, 0, 0);
4917     }
4918 }
4919 
4920 /*
4921  * Specialized code generation for INDEX_op_dup_vec.
4922  */
4923 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4924 {
4925     const TCGLifeData arg_life = op->life;
4926     TCGRegSet dup_out_regs, dup_in_regs;
4927     const TCGArgConstraint *dup_args_ct;
4928     TCGTemp *its, *ots;
4929     TCGType itype, vtype;
4930     unsigned vece;
4931     int lowpart_ofs;
4932     bool ok;
4933 
4934     ots = arg_temp(op->args[0]);
4935     its = arg_temp(op->args[1]);
4936 
4937     /* ENV should not be modified.  */
4938     tcg_debug_assert(!temp_readonly(ots));
4939 
4940     itype = its->type;
4941     vece = TCGOP_VECE(op);
4942     vtype = TCGOP_TYPE(op);
4943 
4944     if (its->val_type == TEMP_VAL_CONST) {
4945         /* Propagate constant via movi -> dupi.  */
4946         tcg_target_ulong val = its->val;
4947         if (IS_DEAD_ARG(1)) {
4948             temp_dead(s, its);
4949         }
4950         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4951         return;
4952     }
4953 
4954     dup_args_ct = opcode_args_ct(op);
4955     dup_out_regs = dup_args_ct[0].regs;
4956     dup_in_regs = dup_args_ct[1].regs;
4957 
4958     /* Allocate the output register now.  */
4959     if (ots->val_type != TEMP_VAL_REG) {
4960         TCGRegSet allocated_regs = s->reserved_regs;
4961         TCGReg oreg;
4962 
4963         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4964             /* Make sure to not spill the input register. */
4965             tcg_regset_set_reg(allocated_regs, its->reg);
4966         }
4967         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4968                              output_pref(op, 0), ots->indirect_base);
4969         set_temp_val_reg(s, ots, oreg);
4970     }
4971 
4972     switch (its->val_type) {
4973     case TEMP_VAL_REG:
4974         /*
4975          * The dup constriaints must be broad, covering all possible VECE.
4976          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4977          * to fail, indicating that extra moves are required for that case.
4978          */
4979         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4980             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4981                 goto done;
4982             }
4983             /* Try again from memory or a vector input register.  */
4984         }
4985         if (!its->mem_coherent) {
4986             /*
4987              * The input register is not synced, and so an extra store
4988              * would be required to use memory.  Attempt an integer-vector
4989              * register move first.  We do not have a TCGRegSet for this.
4990              */
4991             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4992                 break;
4993             }
4994             /* Sync the temp back to its slot and load from there.  */
4995             temp_sync(s, its, s->reserved_regs, 0, 0);
4996         }
4997         /* fall through */
4998 
4999     case TEMP_VAL_MEM:
5000         lowpart_ofs = 0;
5001         if (HOST_BIG_ENDIAN) {
5002             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5003         }
5004         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5005                              its->mem_offset + lowpart_ofs)) {
5006             goto done;
5007         }
5008         /* Load the input into the destination vector register. */
5009         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5010         break;
5011 
5012     default:
5013         g_assert_not_reached();
5014     }
5015 
5016     /* We now have a vector input register, so dup must succeed. */
5017     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5018     tcg_debug_assert(ok);
5019 
5020  done:
5021     ots->mem_coherent = 0;
5022     if (IS_DEAD_ARG(1)) {
5023         temp_dead(s, its);
5024     }
5025     if (NEED_SYNC_ARG(0)) {
5026         temp_sync(s, ots, s->reserved_regs, 0, 0);
5027     }
5028     if (IS_DEAD_ARG(0)) {
5029         temp_dead(s, ots);
5030     }
5031 }
5032 
5033 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5034 {
5035     const TCGLifeData arg_life = op->life;
5036     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5037     TCGRegSet i_allocated_regs;
5038     TCGRegSet o_allocated_regs;
5039     int i, k, nb_iargs, nb_oargs;
5040     TCGReg reg;
5041     TCGArg arg;
5042     const TCGArgConstraint *args_ct;
5043     const TCGArgConstraint *arg_ct;
5044     TCGTemp *ts;
5045     TCGArg new_args[TCG_MAX_OP_ARGS];
5046     int const_args[TCG_MAX_OP_ARGS];
5047     TCGCond op_cond;
5048 
5049     nb_oargs = def->nb_oargs;
5050     nb_iargs = def->nb_iargs;
5051 
5052     /* copy constants */
5053     memcpy(new_args + nb_oargs + nb_iargs,
5054            op->args + nb_oargs + nb_iargs,
5055            sizeof(TCGArg) * def->nb_cargs);
5056 
5057     i_allocated_regs = s->reserved_regs;
5058     o_allocated_regs = s->reserved_regs;
5059 
5060     switch (op->opc) {
5061     case INDEX_op_brcond_i32:
5062     case INDEX_op_brcond_i64:
5063         op_cond = op->args[2];
5064         break;
5065     case INDEX_op_setcond:
5066     case INDEX_op_negsetcond:
5067     case INDEX_op_cmp_vec:
5068         op_cond = op->args[3];
5069         break;
5070     case INDEX_op_brcond2_i32:
5071         op_cond = op->args[4];
5072         break;
5073     case INDEX_op_movcond_i32:
5074     case INDEX_op_movcond_i64:
5075     case INDEX_op_setcond2_i32:
5076     case INDEX_op_cmpsel_vec:
5077         op_cond = op->args[5];
5078         break;
5079     default:
5080         /* No condition within opcode. */
5081         op_cond = TCG_COND_ALWAYS;
5082         break;
5083     }
5084 
5085     args_ct = opcode_args_ct(op);
5086 
5087     /* satisfy input constraints */
5088     for (k = 0; k < nb_iargs; k++) {
5089         TCGRegSet i_preferred_regs, i_required_regs;
5090         bool allocate_new_reg, copyto_new_reg;
5091         TCGTemp *ts2;
5092         int i1, i2;
5093 
5094         i = args_ct[nb_oargs + k].sort_index;
5095         arg = op->args[i];
5096         arg_ct = &args_ct[i];
5097         ts = arg_temp(arg);
5098 
5099         if (ts->val_type == TEMP_VAL_CONST) {
5100 #ifdef TCG_REG_ZERO
5101             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5102                 /* Hardware zero register: indicate register via non-const. */
5103                 const_args[i] = 0;
5104                 new_args[i] = TCG_REG_ZERO;
5105                 continue;
5106             }
5107 #endif
5108 
5109             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5110                                        op_cond, TCGOP_VECE(op))) {
5111                 /* constant is OK for instruction */
5112                 const_args[i] = 1;
5113                 new_args[i] = ts->val;
5114                 continue;
5115             }
5116         }
5117 
5118         reg = ts->reg;
5119         i_preferred_regs = 0;
5120         i_required_regs = arg_ct->regs;
5121         allocate_new_reg = false;
5122         copyto_new_reg = false;
5123 
5124         switch (arg_ct->pair) {
5125         case 0: /* not paired */
5126             if (arg_ct->ialias) {
5127                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5128 
5129                 /*
5130                  * If the input is readonly, then it cannot also be an
5131                  * output and aliased to itself.  If the input is not
5132                  * dead after the instruction, we must allocate a new
5133                  * register and move it.
5134                  */
5135                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5136                     || args_ct[arg_ct->alias_index].newreg) {
5137                     allocate_new_reg = true;
5138                 } else if (ts->val_type == TEMP_VAL_REG) {
5139                     /*
5140                      * Check if the current register has already been
5141                      * allocated for another input.
5142                      */
5143                     allocate_new_reg =
5144                         tcg_regset_test_reg(i_allocated_regs, reg);
5145                 }
5146             }
5147             if (!allocate_new_reg) {
5148                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5149                           i_preferred_regs);
5150                 reg = ts->reg;
5151                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5152             }
5153             if (allocate_new_reg) {
5154                 /*
5155                  * Allocate a new register matching the constraint
5156                  * and move the temporary register into it.
5157                  */
5158                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5159                           i_allocated_regs, 0);
5160                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5161                                     i_preferred_regs, ts->indirect_base);
5162                 copyto_new_reg = true;
5163             }
5164             break;
5165 
5166         case 1:
5167             /* First of an input pair; if i1 == i2, the second is an output. */
5168             i1 = i;
5169             i2 = arg_ct->pair_index;
5170             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5171 
5172             /*
5173              * It is easier to default to allocating a new pair
5174              * and to identify a few cases where it's not required.
5175              */
5176             if (arg_ct->ialias) {
5177                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5178                 if (IS_DEAD_ARG(i1) &&
5179                     IS_DEAD_ARG(i2) &&
5180                     !temp_readonly(ts) &&
5181                     ts->val_type == TEMP_VAL_REG &&
5182                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5183                     tcg_regset_test_reg(i_required_regs, reg) &&
5184                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5185                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5186                     (ts2
5187                      ? ts2->val_type == TEMP_VAL_REG &&
5188                        ts2->reg == reg + 1 &&
5189                        !temp_readonly(ts2)
5190                      : s->reg_to_temp[reg + 1] == NULL)) {
5191                     break;
5192                 }
5193             } else {
5194                 /* Without aliasing, the pair must also be an input. */
5195                 tcg_debug_assert(ts2);
5196                 if (ts->val_type == TEMP_VAL_REG &&
5197                     ts2->val_type == TEMP_VAL_REG &&
5198                     ts2->reg == reg + 1 &&
5199                     tcg_regset_test_reg(i_required_regs, reg)) {
5200                     break;
5201                 }
5202             }
5203             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5204                                      0, ts->indirect_base);
5205             goto do_pair;
5206 
5207         case 2: /* pair second */
5208             reg = new_args[arg_ct->pair_index] + 1;
5209             goto do_pair;
5210 
5211         case 3: /* ialias with second output, no first input */
5212             tcg_debug_assert(arg_ct->ialias);
5213             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5214 
5215             if (IS_DEAD_ARG(i) &&
5216                 !temp_readonly(ts) &&
5217                 ts->val_type == TEMP_VAL_REG &&
5218                 reg > 0 &&
5219                 s->reg_to_temp[reg - 1] == NULL &&
5220                 tcg_regset_test_reg(i_required_regs, reg) &&
5221                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5222                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5223                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5224                 break;
5225             }
5226             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5227                                      i_allocated_regs, 0,
5228                                      ts->indirect_base);
5229             tcg_regset_set_reg(i_allocated_regs, reg);
5230             reg += 1;
5231             goto do_pair;
5232 
5233         do_pair:
5234             /*
5235              * If an aliased input is not dead after the instruction,
5236              * we must allocate a new register and move it.
5237              */
5238             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5239                 TCGRegSet t_allocated_regs = i_allocated_regs;
5240 
5241                 /*
5242                  * Because of the alias, and the continued life, make sure
5243                  * that the temp is somewhere *other* than the reg pair,
5244                  * and we get a copy in reg.
5245                  */
5246                 tcg_regset_set_reg(t_allocated_regs, reg);
5247                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5248                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5249                     /* If ts was already in reg, copy it somewhere else. */
5250                     TCGReg nr;
5251                     bool ok;
5252 
5253                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5254                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5255                                        t_allocated_regs, 0, ts->indirect_base);
5256                     ok = tcg_out_mov(s, ts->type, nr, reg);
5257                     tcg_debug_assert(ok);
5258 
5259                     set_temp_val_reg(s, ts, nr);
5260                 } else {
5261                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5262                               t_allocated_regs, 0);
5263                     copyto_new_reg = true;
5264                 }
5265             } else {
5266                 /* Preferably allocate to reg, otherwise copy. */
5267                 i_required_regs = (TCGRegSet)1 << reg;
5268                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5269                           i_preferred_regs);
5270                 copyto_new_reg = ts->reg != reg;
5271             }
5272             break;
5273 
5274         default:
5275             g_assert_not_reached();
5276         }
5277 
5278         if (copyto_new_reg) {
5279             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5280                 /*
5281                  * Cross register class move not supported.  Sync the
5282                  * temp back to its slot and load from there.
5283                  */
5284                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5285                 tcg_out_ld(s, ts->type, reg,
5286                            ts->mem_base->reg, ts->mem_offset);
5287             }
5288         }
5289         new_args[i] = reg;
5290         const_args[i] = 0;
5291         tcg_regset_set_reg(i_allocated_regs, reg);
5292     }
5293 
5294     /* mark dead temporaries and free the associated registers */
5295     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5296         if (IS_DEAD_ARG(i)) {
5297             temp_dead(s, arg_temp(op->args[i]));
5298         }
5299     }
5300 
5301     if (def->flags & TCG_OPF_COND_BRANCH) {
5302         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5303     } else if (def->flags & TCG_OPF_BB_END) {
5304         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5305     } else {
5306         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5307             /* XXX: permit generic clobber register list ? */
5308             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5309                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5310                     tcg_reg_free(s, i, i_allocated_regs);
5311                 }
5312             }
5313         }
5314         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5315             /* sync globals if the op has side effects and might trigger
5316                an exception. */
5317             sync_globals(s, i_allocated_regs);
5318         }
5319 
5320         /* satisfy the output constraints */
5321         for (k = 0; k < nb_oargs; k++) {
5322             i = args_ct[k].sort_index;
5323             arg = op->args[i];
5324             arg_ct = &args_ct[i];
5325             ts = arg_temp(arg);
5326 
5327             /* ENV should not be modified.  */
5328             tcg_debug_assert(!temp_readonly(ts));
5329 
5330             switch (arg_ct->pair) {
5331             case 0: /* not paired */
5332                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5333                     reg = new_args[arg_ct->alias_index];
5334                 } else if (arg_ct->newreg) {
5335                     reg = tcg_reg_alloc(s, arg_ct->regs,
5336                                         i_allocated_regs | o_allocated_regs,
5337                                         output_pref(op, k), ts->indirect_base);
5338                 } else {
5339                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5340                                         output_pref(op, k), ts->indirect_base);
5341                 }
5342                 break;
5343 
5344             case 1: /* first of pair */
5345                 if (arg_ct->oalias) {
5346                     reg = new_args[arg_ct->alias_index];
5347                 } else if (arg_ct->newreg) {
5348                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5349                                              i_allocated_regs | o_allocated_regs,
5350                                              output_pref(op, k),
5351                                              ts->indirect_base);
5352                 } else {
5353                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5354                                              output_pref(op, k),
5355                                              ts->indirect_base);
5356                 }
5357                 break;
5358 
5359             case 2: /* second of pair */
5360                 if (arg_ct->oalias) {
5361                     reg = new_args[arg_ct->alias_index];
5362                 } else {
5363                     reg = new_args[arg_ct->pair_index] + 1;
5364                 }
5365                 break;
5366 
5367             case 3: /* first of pair, aliasing with a second input */
5368                 tcg_debug_assert(!arg_ct->newreg);
5369                 reg = new_args[arg_ct->pair_index] - 1;
5370                 break;
5371 
5372             default:
5373                 g_assert_not_reached();
5374             }
5375             tcg_regset_set_reg(o_allocated_regs, reg);
5376             set_temp_val_reg(s, ts, reg);
5377             ts->mem_coherent = 0;
5378             new_args[i] = reg;
5379         }
5380     }
5381 
5382     /* emit instruction */
5383     TCGType type = TCGOP_TYPE(op);
5384     switch (op->opc) {
5385     case INDEX_op_ext_i32_i64:
5386         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5387         break;
5388     case INDEX_op_extu_i32_i64:
5389         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5390         break;
5391     case INDEX_op_extrl_i64_i32:
5392         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5393         break;
5394 
5395     case INDEX_op_add:
5396     case INDEX_op_and:
5397     case INDEX_op_andc:
5398     case INDEX_op_clz:
5399     case INDEX_op_ctz:
5400     case INDEX_op_divs:
5401     case INDEX_op_divu:
5402     case INDEX_op_eqv:
5403     case INDEX_op_mul:
5404     case INDEX_op_mulsh:
5405     case INDEX_op_muluh:
5406     case INDEX_op_nand:
5407     case INDEX_op_nor:
5408     case INDEX_op_or:
5409     case INDEX_op_orc:
5410     case INDEX_op_rems:
5411     case INDEX_op_remu:
5412     case INDEX_op_rotl:
5413     case INDEX_op_rotr:
5414     case INDEX_op_sar:
5415     case INDEX_op_shl:
5416     case INDEX_op_shr:
5417     case INDEX_op_xor:
5418         {
5419             const TCGOutOpBinary *out =
5420                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5421 
5422             /* Constants should never appear in the first source operand. */
5423             tcg_debug_assert(!const_args[1]);
5424             if (const_args[2]) {
5425                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5426             } else {
5427                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5428             }
5429         }
5430         break;
5431 
5432     case INDEX_op_sub:
5433         {
5434             const TCGOutOpSubtract *out = &outop_sub;
5435 
5436             /*
5437              * Constants should never appear in the second source operand.
5438              * These are folded to add with negative constant.
5439              */
5440             tcg_debug_assert(!const_args[2]);
5441             if (const_args[1]) {
5442                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5443             } else {
5444                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5445             }
5446         }
5447         break;
5448 
5449     case INDEX_op_ctpop:
5450     case INDEX_op_neg:
5451     case INDEX_op_not:
5452         {
5453             const TCGOutOpUnary *out =
5454                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5455 
5456             /* Constants should have been folded. */
5457             tcg_debug_assert(!const_args[1]);
5458             out->out_rr(s, type, new_args[0], new_args[1]);
5459         }
5460         break;
5461 
5462     case INDEX_op_divs2:
5463     case INDEX_op_divu2:
5464         {
5465             const TCGOutOpDivRem *out =
5466                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5467 
5468             /* Only used by x86 and s390x, which use matching constraints. */
5469             tcg_debug_assert(new_args[0] == new_args[2]);
5470             tcg_debug_assert(new_args[1] == new_args[3]);
5471             tcg_debug_assert(!const_args[4]);
5472             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5473         }
5474         break;
5475 
5476     case INDEX_op_muls2:
5477     case INDEX_op_mulu2:
5478         {
5479             const TCGOutOpMul2 *out =
5480                 container_of(all_outop[op->opc], TCGOutOpMul2, base);
5481 
5482             tcg_debug_assert(!const_args[2]);
5483             tcg_debug_assert(!const_args[3]);
5484             out->out_rrrr(s, type, new_args[0], new_args[1],
5485                           new_args[2], new_args[3]);
5486         }
5487         break;
5488 
5489     case INDEX_op_setcond:
5490     case INDEX_op_negsetcond:
5491         {
5492             const TCGOutOpSetcond *out =
5493                 container_of(all_outop[op->opc], TCGOutOpSetcond, base);
5494             TCGCond cond = new_args[3];
5495 
5496             tcg_debug_assert(!const_args[1]);
5497             if (const_args[2]) {
5498                 out->out_rri(s, type, cond,
5499                              new_args[0], new_args[1], new_args[2]);
5500             } else {
5501                 out->out_rrr(s, type, cond,
5502                              new_args[0], new_args[1], new_args[2]);
5503             }
5504         }
5505         break;
5506 
5507     default:
5508         if (def->flags & TCG_OPF_VECTOR) {
5509             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5510                            TCGOP_VECE(op), new_args, const_args);
5511         } else {
5512             tcg_out_op(s, op->opc, type, new_args, const_args);
5513         }
5514         break;
5515     }
5516 
5517     /* move the outputs in the correct register if needed */
5518     for(i = 0; i < nb_oargs; i++) {
5519         ts = arg_temp(op->args[i]);
5520 
5521         /* ENV should not be modified.  */
5522         tcg_debug_assert(!temp_readonly(ts));
5523 
5524         if (NEED_SYNC_ARG(i)) {
5525             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5526         } else if (IS_DEAD_ARG(i)) {
5527             temp_dead(s, ts);
5528         }
5529     }
5530 }
5531 
5532 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5533 {
5534     const TCGLifeData arg_life = op->life;
5535     TCGTemp *ots, *itsl, *itsh;
5536     TCGType vtype = TCGOP_TYPE(op);
5537 
5538     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5539     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5540     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5541 
5542     ots = arg_temp(op->args[0]);
5543     itsl = arg_temp(op->args[1]);
5544     itsh = arg_temp(op->args[2]);
5545 
5546     /* ENV should not be modified.  */
5547     tcg_debug_assert(!temp_readonly(ots));
5548 
5549     /* Allocate the output register now.  */
5550     if (ots->val_type != TEMP_VAL_REG) {
5551         TCGRegSet allocated_regs = s->reserved_regs;
5552         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5553         TCGReg oreg;
5554 
5555         /* Make sure to not spill the input registers. */
5556         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5557             tcg_regset_set_reg(allocated_regs, itsl->reg);
5558         }
5559         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5560             tcg_regset_set_reg(allocated_regs, itsh->reg);
5561         }
5562 
5563         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5564                              output_pref(op, 0), ots->indirect_base);
5565         set_temp_val_reg(s, ots, oreg);
5566     }
5567 
5568     /* Promote dup2 of immediates to dupi_vec. */
5569     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5570         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5571         MemOp vece = MO_64;
5572 
5573         if (val == dup_const(MO_8, val)) {
5574             vece = MO_8;
5575         } else if (val == dup_const(MO_16, val)) {
5576             vece = MO_16;
5577         } else if (val == dup_const(MO_32, val)) {
5578             vece = MO_32;
5579         }
5580 
5581         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5582         goto done;
5583     }
5584 
5585     /* If the two inputs form one 64-bit value, try dupm_vec. */
5586     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5587         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5588         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5589         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5590 
5591         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5592         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5593 
5594         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5595                              its->mem_base->reg, its->mem_offset)) {
5596             goto done;
5597         }
5598     }
5599 
5600     /* Fall back to generic expansion. */
5601     return false;
5602 
5603  done:
5604     ots->mem_coherent = 0;
5605     if (IS_DEAD_ARG(1)) {
5606         temp_dead(s, itsl);
5607     }
5608     if (IS_DEAD_ARG(2)) {
5609         temp_dead(s, itsh);
5610     }
5611     if (NEED_SYNC_ARG(0)) {
5612         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5613     } else if (IS_DEAD_ARG(0)) {
5614         temp_dead(s, ots);
5615     }
5616     return true;
5617 }
5618 
5619 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5620                          TCGRegSet allocated_regs)
5621 {
5622     if (ts->val_type == TEMP_VAL_REG) {
5623         if (ts->reg != reg) {
5624             tcg_reg_free(s, reg, allocated_regs);
5625             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5626                 /*
5627                  * Cross register class move not supported.  Sync the
5628                  * temp back to its slot and load from there.
5629                  */
5630                 temp_sync(s, ts, allocated_regs, 0, 0);
5631                 tcg_out_ld(s, ts->type, reg,
5632                            ts->mem_base->reg, ts->mem_offset);
5633             }
5634         }
5635     } else {
5636         TCGRegSet arg_set = 0;
5637 
5638         tcg_reg_free(s, reg, allocated_regs);
5639         tcg_regset_set_reg(arg_set, reg);
5640         temp_load(s, ts, arg_set, allocated_regs, 0);
5641     }
5642 }
5643 
5644 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5645                          TCGRegSet allocated_regs)
5646 {
5647     /*
5648      * When the destination is on the stack, load up the temp and store.
5649      * If there are many call-saved registers, the temp might live to
5650      * see another use; otherwise it'll be discarded.
5651      */
5652     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5653     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5654                arg_slot_stk_ofs(arg_slot));
5655 }
5656 
5657 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5658                             TCGTemp *ts, TCGRegSet *allocated_regs)
5659 {
5660     if (arg_slot_reg_p(l->arg_slot)) {
5661         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5662         load_arg_reg(s, reg, ts, *allocated_regs);
5663         tcg_regset_set_reg(*allocated_regs, reg);
5664     } else {
5665         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5666     }
5667 }
5668 
5669 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5670                          intptr_t ref_off, TCGRegSet *allocated_regs)
5671 {
5672     TCGReg reg;
5673 
5674     if (arg_slot_reg_p(arg_slot)) {
5675         reg = tcg_target_call_iarg_regs[arg_slot];
5676         tcg_reg_free(s, reg, *allocated_regs);
5677         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5678         tcg_regset_set_reg(*allocated_regs, reg);
5679     } else {
5680         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5681                             *allocated_regs, 0, false);
5682         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5683         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5684                    arg_slot_stk_ofs(arg_slot));
5685     }
5686 }
5687 
5688 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5689 {
5690     const int nb_oargs = TCGOP_CALLO(op);
5691     const int nb_iargs = TCGOP_CALLI(op);
5692     const TCGLifeData arg_life = op->life;
5693     const TCGHelperInfo *info = tcg_call_info(op);
5694     TCGRegSet allocated_regs = s->reserved_regs;
5695     int i;
5696 
5697     /*
5698      * Move inputs into place in reverse order,
5699      * so that we place stacked arguments first.
5700      */
5701     for (i = nb_iargs - 1; i >= 0; --i) {
5702         const TCGCallArgumentLoc *loc = &info->in[i];
5703         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5704 
5705         switch (loc->kind) {
5706         case TCG_CALL_ARG_NORMAL:
5707         case TCG_CALL_ARG_EXTEND_U:
5708         case TCG_CALL_ARG_EXTEND_S:
5709             load_arg_normal(s, loc, ts, &allocated_regs);
5710             break;
5711         case TCG_CALL_ARG_BY_REF:
5712             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5713             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5714                          arg_slot_stk_ofs(loc->ref_slot),
5715                          &allocated_regs);
5716             break;
5717         case TCG_CALL_ARG_BY_REF_N:
5718             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5719             break;
5720         default:
5721             g_assert_not_reached();
5722         }
5723     }
5724 
5725     /* Mark dead temporaries and free the associated registers.  */
5726     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5727         if (IS_DEAD_ARG(i)) {
5728             temp_dead(s, arg_temp(op->args[i]));
5729         }
5730     }
5731 
5732     /* Clobber call registers.  */
5733     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5734         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5735             tcg_reg_free(s, i, allocated_regs);
5736         }
5737     }
5738 
5739     /*
5740      * Save globals if they might be written by the helper,
5741      * sync them if they might be read.
5742      */
5743     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5744         /* Nothing to do */
5745     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5746         sync_globals(s, allocated_regs);
5747     } else {
5748         save_globals(s, allocated_regs);
5749     }
5750 
5751     /*
5752      * If the ABI passes a pointer to the returned struct as the first
5753      * argument, load that now.  Pass a pointer to the output home slot.
5754      */
5755     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5756         TCGTemp *ts = arg_temp(op->args[0]);
5757 
5758         if (!ts->mem_allocated) {
5759             temp_allocate_frame(s, ts);
5760         }
5761         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5762     }
5763 
5764     tcg_out_call(s, tcg_call_func(op), info);
5765 
5766     /* Assign output registers and emit moves if needed.  */
5767     switch (info->out_kind) {
5768     case TCG_CALL_RET_NORMAL:
5769         for (i = 0; i < nb_oargs; i++) {
5770             TCGTemp *ts = arg_temp(op->args[i]);
5771             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5772 
5773             /* ENV should not be modified.  */
5774             tcg_debug_assert(!temp_readonly(ts));
5775 
5776             set_temp_val_reg(s, ts, reg);
5777             ts->mem_coherent = 0;
5778         }
5779         break;
5780 
5781     case TCG_CALL_RET_BY_VEC:
5782         {
5783             TCGTemp *ts = arg_temp(op->args[0]);
5784 
5785             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5786             tcg_debug_assert(ts->temp_subindex == 0);
5787             if (!ts->mem_allocated) {
5788                 temp_allocate_frame(s, ts);
5789             }
5790             tcg_out_st(s, TCG_TYPE_V128,
5791                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5792                        ts->mem_base->reg, ts->mem_offset);
5793         }
5794         /* fall through to mark all parts in memory */
5795 
5796     case TCG_CALL_RET_BY_REF:
5797         /* The callee has performed a write through the reference. */
5798         for (i = 0; i < nb_oargs; i++) {
5799             TCGTemp *ts = arg_temp(op->args[i]);
5800             ts->val_type = TEMP_VAL_MEM;
5801         }
5802         break;
5803 
5804     default:
5805         g_assert_not_reached();
5806     }
5807 
5808     /* Flush or discard output registers as needed. */
5809     for (i = 0; i < nb_oargs; i++) {
5810         TCGTemp *ts = arg_temp(op->args[i]);
5811         if (NEED_SYNC_ARG(i)) {
5812             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5813         } else if (IS_DEAD_ARG(i)) {
5814             temp_dead(s, ts);
5815         }
5816     }
5817 }
5818 
5819 /**
5820  * atom_and_align_for_opc:
5821  * @s: tcg context
5822  * @opc: memory operation code
5823  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5824  * @allow_two_ops: true if we are prepared to issue two operations
5825  *
5826  * Return the alignment and atomicity to use for the inline fast path
5827  * for the given memory operation.  The alignment may be larger than
5828  * that specified in @opc, and the correct alignment will be diagnosed
5829  * by the slow path helper.
5830  *
5831  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5832  * and issue two loads or stores for subalignment.
5833  */
5834 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5835                                            MemOp host_atom, bool allow_two_ops)
5836 {
5837     MemOp align = memop_alignment_bits(opc);
5838     MemOp size = opc & MO_SIZE;
5839     MemOp half = size ? size - 1 : 0;
5840     MemOp atom = opc & MO_ATOM_MASK;
5841     MemOp atmax;
5842 
5843     switch (atom) {
5844     case MO_ATOM_NONE:
5845         /* The operation requires no specific atomicity. */
5846         atmax = MO_8;
5847         break;
5848 
5849     case MO_ATOM_IFALIGN:
5850         atmax = size;
5851         break;
5852 
5853     case MO_ATOM_IFALIGN_PAIR:
5854         atmax = half;
5855         break;
5856 
5857     case MO_ATOM_WITHIN16:
5858         atmax = size;
5859         if (size == MO_128) {
5860             /* Misalignment implies !within16, and therefore no atomicity. */
5861         } else if (host_atom != MO_ATOM_WITHIN16) {
5862             /* The host does not implement within16, so require alignment. */
5863             align = MAX(align, size);
5864         }
5865         break;
5866 
5867     case MO_ATOM_WITHIN16_PAIR:
5868         atmax = size;
5869         /*
5870          * Misalignment implies !within16, and therefore half atomicity.
5871          * Any host prepared for two operations can implement this with
5872          * half alignment.
5873          */
5874         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5875             align = MAX(align, half);
5876         }
5877         break;
5878 
5879     case MO_ATOM_SUBALIGN:
5880         atmax = size;
5881         if (host_atom != MO_ATOM_SUBALIGN) {
5882             /* If unaligned but not odd, there are subobjects up to half. */
5883             if (allow_two_ops) {
5884                 align = MAX(align, half);
5885             } else {
5886                 align = MAX(align, size);
5887             }
5888         }
5889         break;
5890 
5891     default:
5892         g_assert_not_reached();
5893     }
5894 
5895     return (TCGAtomAlign){ .atom = atmax, .align = align };
5896 }
5897 
5898 /*
5899  * Similarly for qemu_ld/st slow path helpers.
5900  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5901  * using only the provided backend tcg_out_* functions.
5902  */
5903 
5904 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5905 {
5906     int ofs = arg_slot_stk_ofs(slot);
5907 
5908     /*
5909      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5910      * require extension to uint64_t, adjust the address for uint32_t.
5911      */
5912     if (HOST_BIG_ENDIAN &&
5913         TCG_TARGET_REG_BITS == 64 &&
5914         type == TCG_TYPE_I32) {
5915         ofs += 4;
5916     }
5917     return ofs;
5918 }
5919 
5920 static void tcg_out_helper_load_slots(TCGContext *s,
5921                                       unsigned nmov, TCGMovExtend *mov,
5922                                       const TCGLdstHelperParam *parm)
5923 {
5924     unsigned i;
5925     TCGReg dst3;
5926 
5927     /*
5928      * Start from the end, storing to the stack first.
5929      * This frees those registers, so we need not consider overlap.
5930      */
5931     for (i = nmov; i-- > 0; ) {
5932         unsigned slot = mov[i].dst;
5933 
5934         if (arg_slot_reg_p(slot)) {
5935             goto found_reg;
5936         }
5937 
5938         TCGReg src = mov[i].src;
5939         TCGType dst_type = mov[i].dst_type;
5940         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5941 
5942         /* The argument is going onto the stack; extend into scratch. */
5943         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5944             tcg_debug_assert(parm->ntmp != 0);
5945             mov[i].dst = src = parm->tmp[0];
5946             tcg_out_movext1(s, &mov[i]);
5947         }
5948 
5949         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5950                    tcg_out_helper_stk_ofs(dst_type, slot));
5951     }
5952     return;
5953 
5954  found_reg:
5955     /*
5956      * The remaining arguments are in registers.
5957      * Convert slot numbers to argument registers.
5958      */
5959     nmov = i + 1;
5960     for (i = 0; i < nmov; ++i) {
5961         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5962     }
5963 
5964     switch (nmov) {
5965     case 4:
5966         /* The backend must have provided enough temps for the worst case. */
5967         tcg_debug_assert(parm->ntmp >= 2);
5968 
5969         dst3 = mov[3].dst;
5970         for (unsigned j = 0; j < 3; ++j) {
5971             if (dst3 == mov[j].src) {
5972                 /*
5973                  * Conflict. Copy the source to a temporary, perform the
5974                  * remaining moves, then the extension from our scratch
5975                  * on the way out.
5976                  */
5977                 TCGReg scratch = parm->tmp[1];
5978 
5979                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5980                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5981                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5982                 break;
5983             }
5984         }
5985 
5986         /* No conflicts: perform this move and continue. */
5987         tcg_out_movext1(s, &mov[3]);
5988         /* fall through */
5989 
5990     case 3:
5991         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5992                         parm->ntmp ? parm->tmp[0] : -1);
5993         break;
5994     case 2:
5995         tcg_out_movext2(s, mov, mov + 1,
5996                         parm->ntmp ? parm->tmp[0] : -1);
5997         break;
5998     case 1:
5999         tcg_out_movext1(s, mov);
6000         break;
6001     default:
6002         g_assert_not_reached();
6003     }
6004 }
6005 
6006 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
6007                                     TCGType type, tcg_target_long imm,
6008                                     const TCGLdstHelperParam *parm)
6009 {
6010     if (arg_slot_reg_p(slot)) {
6011         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
6012     } else {
6013         int ofs = tcg_out_helper_stk_ofs(type, slot);
6014         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
6015             tcg_debug_assert(parm->ntmp != 0);
6016             tcg_out_movi(s, type, parm->tmp[0], imm);
6017             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
6018         }
6019     }
6020 }
6021 
6022 static void tcg_out_helper_load_common_args(TCGContext *s,
6023                                             const TCGLabelQemuLdst *ldst,
6024                                             const TCGLdstHelperParam *parm,
6025                                             const TCGHelperInfo *info,
6026                                             unsigned next_arg)
6027 {
6028     TCGMovExtend ptr_mov = {
6029         .dst_type = TCG_TYPE_PTR,
6030         .src_type = TCG_TYPE_PTR,
6031         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6032     };
6033     const TCGCallArgumentLoc *loc = &info->in[0];
6034     TCGType type;
6035     unsigned slot;
6036     tcg_target_ulong imm;
6037 
6038     /*
6039      * Handle env, which is always first.
6040      */
6041     ptr_mov.dst = loc->arg_slot;
6042     ptr_mov.src = TCG_AREG0;
6043     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6044 
6045     /*
6046      * Handle oi.
6047      */
6048     imm = ldst->oi;
6049     loc = &info->in[next_arg];
6050     type = TCG_TYPE_I32;
6051     switch (loc->kind) {
6052     case TCG_CALL_ARG_NORMAL:
6053         break;
6054     case TCG_CALL_ARG_EXTEND_U:
6055     case TCG_CALL_ARG_EXTEND_S:
6056         /* No extension required for MemOpIdx. */
6057         tcg_debug_assert(imm <= INT32_MAX);
6058         type = TCG_TYPE_REG;
6059         break;
6060     default:
6061         g_assert_not_reached();
6062     }
6063     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6064     next_arg++;
6065 
6066     /*
6067      * Handle ra.
6068      */
6069     loc = &info->in[next_arg];
6070     slot = loc->arg_slot;
6071     if (parm->ra_gen) {
6072         int arg_reg = -1;
6073         TCGReg ra_reg;
6074 
6075         if (arg_slot_reg_p(slot)) {
6076             arg_reg = tcg_target_call_iarg_regs[slot];
6077         }
6078         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6079 
6080         ptr_mov.dst = slot;
6081         ptr_mov.src = ra_reg;
6082         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6083     } else {
6084         imm = (uintptr_t)ldst->raddr;
6085         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6086     }
6087 }
6088 
6089 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6090                                        const TCGCallArgumentLoc *loc,
6091                                        TCGType dst_type, TCGType src_type,
6092                                        TCGReg lo, TCGReg hi)
6093 {
6094     MemOp reg_mo;
6095 
6096     if (dst_type <= TCG_TYPE_REG) {
6097         MemOp src_ext;
6098 
6099         switch (loc->kind) {
6100         case TCG_CALL_ARG_NORMAL:
6101             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6102             break;
6103         case TCG_CALL_ARG_EXTEND_U:
6104             dst_type = TCG_TYPE_REG;
6105             src_ext = MO_UL;
6106             break;
6107         case TCG_CALL_ARG_EXTEND_S:
6108             dst_type = TCG_TYPE_REG;
6109             src_ext = MO_SL;
6110             break;
6111         default:
6112             g_assert_not_reached();
6113         }
6114 
6115         mov[0].dst = loc->arg_slot;
6116         mov[0].dst_type = dst_type;
6117         mov[0].src = lo;
6118         mov[0].src_type = src_type;
6119         mov[0].src_ext = src_ext;
6120         return 1;
6121     }
6122 
6123     if (TCG_TARGET_REG_BITS == 32) {
6124         assert(dst_type == TCG_TYPE_I64);
6125         reg_mo = MO_32;
6126     } else {
6127         assert(dst_type == TCG_TYPE_I128);
6128         reg_mo = MO_64;
6129     }
6130 
6131     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6132     mov[0].src = lo;
6133     mov[0].dst_type = TCG_TYPE_REG;
6134     mov[0].src_type = TCG_TYPE_REG;
6135     mov[0].src_ext = reg_mo;
6136 
6137     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6138     mov[1].src = hi;
6139     mov[1].dst_type = TCG_TYPE_REG;
6140     mov[1].src_type = TCG_TYPE_REG;
6141     mov[1].src_ext = reg_mo;
6142 
6143     return 2;
6144 }
6145 
6146 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6147                                    const TCGLdstHelperParam *parm)
6148 {
6149     const TCGHelperInfo *info;
6150     const TCGCallArgumentLoc *loc;
6151     TCGMovExtend mov[2];
6152     unsigned next_arg, nmov;
6153     MemOp mop = get_memop(ldst->oi);
6154 
6155     switch (mop & MO_SIZE) {
6156     case MO_8:
6157     case MO_16:
6158     case MO_32:
6159         info = &info_helper_ld32_mmu;
6160         break;
6161     case MO_64:
6162         info = &info_helper_ld64_mmu;
6163         break;
6164     case MO_128:
6165         info = &info_helper_ld128_mmu;
6166         break;
6167     default:
6168         g_assert_not_reached();
6169     }
6170 
6171     /* Defer env argument. */
6172     next_arg = 1;
6173 
6174     loc = &info->in[next_arg];
6175     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6176         /*
6177          * 32-bit host with 32-bit guest: zero-extend the guest address
6178          * to 64-bits for the helper by storing the low part, then
6179          * load a zero for the high part.
6180          */
6181         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6182                                TCG_TYPE_I32, TCG_TYPE_I32,
6183                                ldst->addr_reg, -1);
6184         tcg_out_helper_load_slots(s, 1, mov, parm);
6185 
6186         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6187                                 TCG_TYPE_I32, 0, parm);
6188         next_arg += 2;
6189     } else {
6190         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6191                                       ldst->addr_reg, -1);
6192         tcg_out_helper_load_slots(s, nmov, mov, parm);
6193         next_arg += nmov;
6194     }
6195 
6196     switch (info->out_kind) {
6197     case TCG_CALL_RET_NORMAL:
6198     case TCG_CALL_RET_BY_VEC:
6199         break;
6200     case TCG_CALL_RET_BY_REF:
6201         /*
6202          * The return reference is in the first argument slot.
6203          * We need memory in which to return: re-use the top of stack.
6204          */
6205         {
6206             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6207 
6208             if (arg_slot_reg_p(0)) {
6209                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6210                                  TCG_REG_CALL_STACK, ofs_slot0);
6211             } else {
6212                 tcg_debug_assert(parm->ntmp != 0);
6213                 tcg_out_addi_ptr(s, parm->tmp[0],
6214                                  TCG_REG_CALL_STACK, ofs_slot0);
6215                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6216                            TCG_REG_CALL_STACK, ofs_slot0);
6217             }
6218         }
6219         break;
6220     default:
6221         g_assert_not_reached();
6222     }
6223 
6224     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6225 }
6226 
6227 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6228                                   bool load_sign,
6229                                   const TCGLdstHelperParam *parm)
6230 {
6231     MemOp mop = get_memop(ldst->oi);
6232     TCGMovExtend mov[2];
6233     int ofs_slot0;
6234 
6235     switch (ldst->type) {
6236     case TCG_TYPE_I64:
6237         if (TCG_TARGET_REG_BITS == 32) {
6238             break;
6239         }
6240         /* fall through */
6241 
6242     case TCG_TYPE_I32:
6243         mov[0].dst = ldst->datalo_reg;
6244         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6245         mov[0].dst_type = ldst->type;
6246         mov[0].src_type = TCG_TYPE_REG;
6247 
6248         /*
6249          * If load_sign, then we allowed the helper to perform the
6250          * appropriate sign extension to tcg_target_ulong, and all
6251          * we need now is a plain move.
6252          *
6253          * If they do not, then we expect the relevant extension
6254          * instruction to be no more expensive than a move, and
6255          * we thus save the icache etc by only using one of two
6256          * helper functions.
6257          */
6258         if (load_sign || !(mop & MO_SIGN)) {
6259             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6260                 mov[0].src_ext = MO_32;
6261             } else {
6262                 mov[0].src_ext = MO_64;
6263             }
6264         } else {
6265             mov[0].src_ext = mop & MO_SSIZE;
6266         }
6267         tcg_out_movext1(s, mov);
6268         return;
6269 
6270     case TCG_TYPE_I128:
6271         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6272         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6273         switch (TCG_TARGET_CALL_RET_I128) {
6274         case TCG_CALL_RET_NORMAL:
6275             break;
6276         case TCG_CALL_RET_BY_VEC:
6277             tcg_out_st(s, TCG_TYPE_V128,
6278                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6279                        TCG_REG_CALL_STACK, ofs_slot0);
6280             /* fall through */
6281         case TCG_CALL_RET_BY_REF:
6282             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6283                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6284             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6285                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6286             return;
6287         default:
6288             g_assert_not_reached();
6289         }
6290         break;
6291 
6292     default:
6293         g_assert_not_reached();
6294     }
6295 
6296     mov[0].dst = ldst->datalo_reg;
6297     mov[0].src =
6298         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6299     mov[0].dst_type = TCG_TYPE_REG;
6300     mov[0].src_type = TCG_TYPE_REG;
6301     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6302 
6303     mov[1].dst = ldst->datahi_reg;
6304     mov[1].src =
6305         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6306     mov[1].dst_type = TCG_TYPE_REG;
6307     mov[1].src_type = TCG_TYPE_REG;
6308     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6309 
6310     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6311 }
6312 
6313 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6314                                    const TCGLdstHelperParam *parm)
6315 {
6316     const TCGHelperInfo *info;
6317     const TCGCallArgumentLoc *loc;
6318     TCGMovExtend mov[4];
6319     TCGType data_type;
6320     unsigned next_arg, nmov, n;
6321     MemOp mop = get_memop(ldst->oi);
6322 
6323     switch (mop & MO_SIZE) {
6324     case MO_8:
6325     case MO_16:
6326     case MO_32:
6327         info = &info_helper_st32_mmu;
6328         data_type = TCG_TYPE_I32;
6329         break;
6330     case MO_64:
6331         info = &info_helper_st64_mmu;
6332         data_type = TCG_TYPE_I64;
6333         break;
6334     case MO_128:
6335         info = &info_helper_st128_mmu;
6336         data_type = TCG_TYPE_I128;
6337         break;
6338     default:
6339         g_assert_not_reached();
6340     }
6341 
6342     /* Defer env argument. */
6343     next_arg = 1;
6344     nmov = 0;
6345 
6346     /* Handle addr argument. */
6347     loc = &info->in[next_arg];
6348     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6349     if (TCG_TARGET_REG_BITS == 32) {
6350         /*
6351          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6352          * to 64-bits for the helper by storing the low part.  Later,
6353          * after we have processed the register inputs, we will load a
6354          * zero for the high part.
6355          */
6356         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6357                                TCG_TYPE_I32, TCG_TYPE_I32,
6358                                ldst->addr_reg, -1);
6359         next_arg += 2;
6360         nmov += 1;
6361     } else {
6362         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6363                                    ldst->addr_reg, -1);
6364         next_arg += n;
6365         nmov += n;
6366     }
6367 
6368     /* Handle data argument. */
6369     loc = &info->in[next_arg];
6370     switch (loc->kind) {
6371     case TCG_CALL_ARG_NORMAL:
6372     case TCG_CALL_ARG_EXTEND_U:
6373     case TCG_CALL_ARG_EXTEND_S:
6374         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6375                                    ldst->datalo_reg, ldst->datahi_reg);
6376         next_arg += n;
6377         nmov += n;
6378         tcg_out_helper_load_slots(s, nmov, mov, parm);
6379         break;
6380 
6381     case TCG_CALL_ARG_BY_REF:
6382         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6383         tcg_debug_assert(data_type == TCG_TYPE_I128);
6384         tcg_out_st(s, TCG_TYPE_I64,
6385                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6386                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6387         tcg_out_st(s, TCG_TYPE_I64,
6388                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6389                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6390 
6391         tcg_out_helper_load_slots(s, nmov, mov, parm);
6392 
6393         if (arg_slot_reg_p(loc->arg_slot)) {
6394             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6395                              TCG_REG_CALL_STACK,
6396                              arg_slot_stk_ofs(loc->ref_slot));
6397         } else {
6398             tcg_debug_assert(parm->ntmp != 0);
6399             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6400                              arg_slot_stk_ofs(loc->ref_slot));
6401             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6402                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6403         }
6404         next_arg += 2;
6405         break;
6406 
6407     default:
6408         g_assert_not_reached();
6409     }
6410 
6411     if (TCG_TARGET_REG_BITS == 32) {
6412         /* Zero extend the address by loading a zero for the high part. */
6413         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6414         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6415     }
6416 
6417     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6418 }
6419 
6420 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6421 {
6422     int i, start_words, num_insns;
6423     TCGOp *op;
6424 
6425     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6426                  && qemu_log_in_addr_range(pc_start))) {
6427         FILE *logfile = qemu_log_trylock();
6428         if (logfile) {
6429             fprintf(logfile, "OP:\n");
6430             tcg_dump_ops(s, logfile, false);
6431             fprintf(logfile, "\n");
6432             qemu_log_unlock(logfile);
6433         }
6434     }
6435 
6436 #ifdef CONFIG_DEBUG_TCG
6437     /* Ensure all labels referenced have been emitted.  */
6438     {
6439         TCGLabel *l;
6440         bool error = false;
6441 
6442         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6443             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6444                 qemu_log_mask(CPU_LOG_TB_OP,
6445                               "$L%d referenced but not present.\n", l->id);
6446                 error = true;
6447             }
6448         }
6449         assert(!error);
6450     }
6451 #endif
6452 
6453     /* Do not reuse any EBB that may be allocated within the TB. */
6454     tcg_temp_ebb_reset_freed(s);
6455 
6456     tcg_optimize(s);
6457 
6458     reachable_code_pass(s);
6459     liveness_pass_0(s);
6460     liveness_pass_1(s);
6461 
6462     if (s->nb_indirects > 0) {
6463         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6464                      && qemu_log_in_addr_range(pc_start))) {
6465             FILE *logfile = qemu_log_trylock();
6466             if (logfile) {
6467                 fprintf(logfile, "OP before indirect lowering:\n");
6468                 tcg_dump_ops(s, logfile, false);
6469                 fprintf(logfile, "\n");
6470                 qemu_log_unlock(logfile);
6471             }
6472         }
6473 
6474         /* Replace indirect temps with direct temps.  */
6475         if (liveness_pass_2(s)) {
6476             /* If changes were made, re-run liveness.  */
6477             liveness_pass_1(s);
6478         }
6479     }
6480 
6481     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6482                  && qemu_log_in_addr_range(pc_start))) {
6483         FILE *logfile = qemu_log_trylock();
6484         if (logfile) {
6485             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6486             tcg_dump_ops(s, logfile, true);
6487             fprintf(logfile, "\n");
6488             qemu_log_unlock(logfile);
6489         }
6490     }
6491 
6492     /* Initialize goto_tb jump offsets. */
6493     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6494     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6495     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6496     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6497 
6498     tcg_reg_alloc_start(s);
6499 
6500     /*
6501      * Reset the buffer pointers when restarting after overflow.
6502      * TODO: Move this into translate-all.c with the rest of the
6503      * buffer management.  Having only this done here is confusing.
6504      */
6505     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6506     s->code_ptr = s->code_buf;
6507     s->data_gen_ptr = NULL;
6508 
6509     QSIMPLEQ_INIT(&s->ldst_labels);
6510     s->pool_labels = NULL;
6511 
6512     start_words = s->insn_start_words;
6513     s->gen_insn_data =
6514         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6515 
6516     tcg_out_tb_start(s);
6517 
6518     num_insns = -1;
6519     QTAILQ_FOREACH(op, &s->ops, link) {
6520         TCGOpcode opc = op->opc;
6521 
6522         switch (opc) {
6523         case INDEX_op_mov:
6524         case INDEX_op_mov_vec:
6525             tcg_reg_alloc_mov(s, op);
6526             break;
6527         case INDEX_op_dup_vec:
6528             tcg_reg_alloc_dup(s, op);
6529             break;
6530         case INDEX_op_insn_start:
6531             if (num_insns >= 0) {
6532                 size_t off = tcg_current_code_size(s);
6533                 s->gen_insn_end_off[num_insns] = off;
6534                 /* Assert that we do not overflow our stored offset.  */
6535                 assert(s->gen_insn_end_off[num_insns] == off);
6536             }
6537             num_insns++;
6538             for (i = 0; i < start_words; ++i) {
6539                 s->gen_insn_data[num_insns * start_words + i] =
6540                     tcg_get_insn_start_param(op, i);
6541             }
6542             break;
6543         case INDEX_op_discard:
6544             temp_dead(s, arg_temp(op->args[0]));
6545             break;
6546         case INDEX_op_set_label:
6547             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6548             tcg_out_label(s, arg_label(op->args[0]));
6549             break;
6550         case INDEX_op_call:
6551             tcg_reg_alloc_call(s, op);
6552             break;
6553         case INDEX_op_exit_tb:
6554             tcg_out_exit_tb(s, op->args[0]);
6555             break;
6556         case INDEX_op_goto_tb:
6557             tcg_out_goto_tb(s, op->args[0]);
6558             break;
6559         case INDEX_op_dup2_vec:
6560             if (tcg_reg_alloc_dup2(s, op)) {
6561                 break;
6562             }
6563             /* fall through */
6564         default:
6565             /* Sanity check that we've not introduced any unhandled opcodes. */
6566             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6567                                               TCGOP_FLAGS(op)));
6568             /* Note: in order to speed up the code, it would be much
6569                faster to have specialized register allocator functions for
6570                some common argument patterns */
6571             tcg_reg_alloc_op(s, op);
6572             break;
6573         }
6574         /* Test for (pending) buffer overflow.  The assumption is that any
6575            one operation beginning below the high water mark cannot overrun
6576            the buffer completely.  Thus we can test for overflow after
6577            generating code without having to check during generation.  */
6578         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6579             return -1;
6580         }
6581         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6582         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6583             return -2;
6584         }
6585     }
6586     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6587     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6588 
6589     /* Generate TB finalization at the end of block */
6590     i = tcg_out_ldst_finalize(s);
6591     if (i < 0) {
6592         return i;
6593     }
6594     i = tcg_out_pool_finalize(s);
6595     if (i < 0) {
6596         return i;
6597     }
6598     if (!tcg_resolve_relocs(s)) {
6599         return -2;
6600     }
6601 
6602 #ifndef CONFIG_TCG_INTERPRETER
6603     /* flush instruction cache */
6604     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6605                         (uintptr_t)s->code_buf,
6606                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6607 #endif
6608 
6609     return tcg_current_code_size(s);
6610 }
6611 
6612 #ifdef ELF_HOST_MACHINE
6613 /* In order to use this feature, the backend needs to do three things:
6614 
6615    (1) Define ELF_HOST_MACHINE to indicate both what value to
6616        put into the ELF image and to indicate support for the feature.
6617 
6618    (2) Define tcg_register_jit.  This should create a buffer containing
6619        the contents of a .debug_frame section that describes the post-
6620        prologue unwind info for the tcg machine.
6621 
6622    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6623 */
6624 
6625 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6626 typedef enum {
6627     JIT_NOACTION = 0,
6628     JIT_REGISTER_FN,
6629     JIT_UNREGISTER_FN
6630 } jit_actions_t;
6631 
6632 struct jit_code_entry {
6633     struct jit_code_entry *next_entry;
6634     struct jit_code_entry *prev_entry;
6635     const void *symfile_addr;
6636     uint64_t symfile_size;
6637 };
6638 
6639 struct jit_descriptor {
6640     uint32_t version;
6641     uint32_t action_flag;
6642     struct jit_code_entry *relevant_entry;
6643     struct jit_code_entry *first_entry;
6644 };
6645 
6646 void __jit_debug_register_code(void) __attribute__((noinline));
6647 void __jit_debug_register_code(void)
6648 {
6649     asm("");
6650 }
6651 
6652 /* Must statically initialize the version, because GDB may check
6653    the version before we can set it.  */
6654 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6655 
6656 /* End GDB interface.  */
6657 
6658 static int find_string(const char *strtab, const char *str)
6659 {
6660     const char *p = strtab + 1;
6661 
6662     while (1) {
6663         if (strcmp(p, str) == 0) {
6664             return p - strtab;
6665         }
6666         p += strlen(p) + 1;
6667     }
6668 }
6669 
6670 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6671                                  const void *debug_frame,
6672                                  size_t debug_frame_size)
6673 {
6674     struct __attribute__((packed)) DebugInfo {
6675         uint32_t  len;
6676         uint16_t  version;
6677         uint32_t  abbrev;
6678         uint8_t   ptr_size;
6679         uint8_t   cu_die;
6680         uint16_t  cu_lang;
6681         uintptr_t cu_low_pc;
6682         uintptr_t cu_high_pc;
6683         uint8_t   fn_die;
6684         char      fn_name[16];
6685         uintptr_t fn_low_pc;
6686         uintptr_t fn_high_pc;
6687         uint8_t   cu_eoc;
6688     };
6689 
6690     struct ElfImage {
6691         ElfW(Ehdr) ehdr;
6692         ElfW(Phdr) phdr;
6693         ElfW(Shdr) shdr[7];
6694         ElfW(Sym)  sym[2];
6695         struct DebugInfo di;
6696         uint8_t    da[24];
6697         char       str[80];
6698     };
6699 
6700     struct ElfImage *img;
6701 
6702     static const struct ElfImage img_template = {
6703         .ehdr = {
6704             .e_ident[EI_MAG0] = ELFMAG0,
6705             .e_ident[EI_MAG1] = ELFMAG1,
6706             .e_ident[EI_MAG2] = ELFMAG2,
6707             .e_ident[EI_MAG3] = ELFMAG3,
6708             .e_ident[EI_CLASS] = ELF_CLASS,
6709             .e_ident[EI_DATA] = ELF_DATA,
6710             .e_ident[EI_VERSION] = EV_CURRENT,
6711             .e_type = ET_EXEC,
6712             .e_machine = ELF_HOST_MACHINE,
6713             .e_version = EV_CURRENT,
6714             .e_phoff = offsetof(struct ElfImage, phdr),
6715             .e_shoff = offsetof(struct ElfImage, shdr),
6716             .e_ehsize = sizeof(ElfW(Shdr)),
6717             .e_phentsize = sizeof(ElfW(Phdr)),
6718             .e_phnum = 1,
6719             .e_shentsize = sizeof(ElfW(Shdr)),
6720             .e_shnum = ARRAY_SIZE(img->shdr),
6721             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6722 #ifdef ELF_HOST_FLAGS
6723             .e_flags = ELF_HOST_FLAGS,
6724 #endif
6725 #ifdef ELF_OSABI
6726             .e_ident[EI_OSABI] = ELF_OSABI,
6727 #endif
6728         },
6729         .phdr = {
6730             .p_type = PT_LOAD,
6731             .p_flags = PF_X,
6732         },
6733         .shdr = {
6734             [0] = { .sh_type = SHT_NULL },
6735             /* Trick: The contents of code_gen_buffer are not present in
6736                this fake ELF file; that got allocated elsewhere.  Therefore
6737                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6738                will not look for contents.  We can record any address.  */
6739             [1] = { /* .text */
6740                 .sh_type = SHT_NOBITS,
6741                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6742             },
6743             [2] = { /* .debug_info */
6744                 .sh_type = SHT_PROGBITS,
6745                 .sh_offset = offsetof(struct ElfImage, di),
6746                 .sh_size = sizeof(struct DebugInfo),
6747             },
6748             [3] = { /* .debug_abbrev */
6749                 .sh_type = SHT_PROGBITS,
6750                 .sh_offset = offsetof(struct ElfImage, da),
6751                 .sh_size = sizeof(img->da),
6752             },
6753             [4] = { /* .debug_frame */
6754                 .sh_type = SHT_PROGBITS,
6755                 .sh_offset = sizeof(struct ElfImage),
6756             },
6757             [5] = { /* .symtab */
6758                 .sh_type = SHT_SYMTAB,
6759                 .sh_offset = offsetof(struct ElfImage, sym),
6760                 .sh_size = sizeof(img->sym),
6761                 .sh_info = 1,
6762                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6763                 .sh_entsize = sizeof(ElfW(Sym)),
6764             },
6765             [6] = { /* .strtab */
6766                 .sh_type = SHT_STRTAB,
6767                 .sh_offset = offsetof(struct ElfImage, str),
6768                 .sh_size = sizeof(img->str),
6769             }
6770         },
6771         .sym = {
6772             [1] = { /* code_gen_buffer */
6773                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6774                 .st_shndx = 1,
6775             }
6776         },
6777         .di = {
6778             .len = sizeof(struct DebugInfo) - 4,
6779             .version = 2,
6780             .ptr_size = sizeof(void *),
6781             .cu_die = 1,
6782             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6783             .fn_die = 2,
6784             .fn_name = "code_gen_buffer"
6785         },
6786         .da = {
6787             1,          /* abbrev number (the cu) */
6788             0x11, 1,    /* DW_TAG_compile_unit, has children */
6789             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6790             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6791             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6792             0, 0,       /* end of abbrev */
6793             2,          /* abbrev number (the fn) */
6794             0x2e, 0,    /* DW_TAG_subprogram, no children */
6795             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6796             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6797             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6798             0, 0,       /* end of abbrev */
6799             0           /* no more abbrev */
6800         },
6801         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6802                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6803     };
6804 
6805     /* We only need a single jit entry; statically allocate it.  */
6806     static struct jit_code_entry one_entry;
6807 
6808     uintptr_t buf = (uintptr_t)buf_ptr;
6809     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6810     DebugFrameHeader *dfh;
6811 
6812     img = g_malloc(img_size);
6813     *img = img_template;
6814 
6815     img->phdr.p_vaddr = buf;
6816     img->phdr.p_paddr = buf;
6817     img->phdr.p_memsz = buf_size;
6818 
6819     img->shdr[1].sh_name = find_string(img->str, ".text");
6820     img->shdr[1].sh_addr = buf;
6821     img->shdr[1].sh_size = buf_size;
6822 
6823     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6824     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6825 
6826     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6827     img->shdr[4].sh_size = debug_frame_size;
6828 
6829     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6830     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6831 
6832     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6833     img->sym[1].st_value = buf;
6834     img->sym[1].st_size = buf_size;
6835 
6836     img->di.cu_low_pc = buf;
6837     img->di.cu_high_pc = buf + buf_size;
6838     img->di.fn_low_pc = buf;
6839     img->di.fn_high_pc = buf + buf_size;
6840 
6841     dfh = (DebugFrameHeader *)(img + 1);
6842     memcpy(dfh, debug_frame, debug_frame_size);
6843     dfh->fde.func_start = buf;
6844     dfh->fde.func_len = buf_size;
6845 
6846 #ifdef DEBUG_JIT
6847     /* Enable this block to be able to debug the ELF image file creation.
6848        One can use readelf, objdump, or other inspection utilities.  */
6849     {
6850         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6851         FILE *f = fopen(jit, "w+b");
6852         if (f) {
6853             if (fwrite(img, img_size, 1, f) != img_size) {
6854                 /* Avoid stupid unused return value warning for fwrite.  */
6855             }
6856             fclose(f);
6857         }
6858     }
6859 #endif
6860 
6861     one_entry.symfile_addr = img;
6862     one_entry.symfile_size = img_size;
6863 
6864     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6865     __jit_debug_descriptor.relevant_entry = &one_entry;
6866     __jit_debug_descriptor.first_entry = &one_entry;
6867     __jit_debug_register_code();
6868 }
6869 #else
6870 /* No support for the feature.  Provide the entry point expected by exec.c,
6871    and implement the internal function we declared earlier.  */
6872 
6873 static void tcg_register_jit_int(const void *buf, size_t size,
6874                                  const void *debug_frame,
6875                                  size_t debug_frame_size)
6876 {
6877 }
6878 
6879 void tcg_register_jit(const void *buf, size_t buf_size)
6880 {
6881 }
6882 #endif /* ELF_HOST_MACHINE */
6883 
6884 #if !TCG_TARGET_MAYBE_vec
6885 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6886 {
6887     g_assert_not_reached();
6888 }
6889 #endif
6890