xref: /openbmc/qemu/tcg/tcg.c (revision fa361eefac24dcaa1d6dfbc433fce0652fdd8ba8)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpBrcond {
990     TCGOutOp base;
991     void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond,
992                    TCGReg a1, TCGReg a2, TCGLabel *label);
993     void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond,
994                    TCGReg a1, tcg_target_long a2, TCGLabel *label);
995 } TCGOutOpBrcond;
996 
997 typedef struct TCGOutOpBrcond2 {
998     TCGOutOp base;
999     void (*out)(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
1000                 TCGArg bl, bool const_bl,
1001                 TCGArg bh, bool const_bh, TCGLabel *l);
1002 } TCGOutOpBrcond2;
1003 
1004 typedef struct TCGOutOpBswap {
1005     TCGOutOp base;
1006     void (*out_rr)(TCGContext *s, TCGType type,
1007                    TCGReg a0, TCGReg a1, unsigned flags);
1008 } TCGOutOpBswap;
1009 
1010 typedef struct TCGOutOpDivRem {
1011     TCGOutOp base;
1012     void (*out_rr01r)(TCGContext *s, TCGType type,
1013                       TCGReg a0, TCGReg a1, TCGReg a4);
1014 } TCGOutOpDivRem;
1015 
1016 typedef struct TCGOutOpExtract {
1017     TCGOutOp base;
1018     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1019                    unsigned ofs, unsigned len);
1020 } TCGOutOpExtract;
1021 
1022 typedef struct TCGOutOpMovcond {
1023     TCGOutOp base;
1024     void (*out)(TCGContext *s, TCGType type, TCGCond cond,
1025                 TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2,
1026                 TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf);
1027 } TCGOutOpMovcond;
1028 
1029 typedef struct TCGOutOpMul2 {
1030     TCGOutOp base;
1031     void (*out_rrrr)(TCGContext *s, TCGType type,
1032                      TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3);
1033 } TCGOutOpMul2;
1034 
1035 typedef struct TCGOutOpUnary {
1036     TCGOutOp base;
1037     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
1038 } TCGOutOpUnary;
1039 
1040 typedef struct TCGOutOpSetcond {
1041     TCGOutOp base;
1042     void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond,
1043                     TCGReg ret, TCGReg a1, TCGReg a2);
1044     void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond,
1045                     TCGReg ret, TCGReg a1, tcg_target_long a2);
1046 } TCGOutOpSetcond;
1047 
1048 typedef struct TCGOutOpSetcond2 {
1049     TCGOutOp base;
1050     void (*out)(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah,
1051                 TCGArg bl, bool const_bl, TCGArg bh, bool const_bh);
1052 } TCGOutOpSetcond2;
1053 
1054 typedef struct TCGOutOpSubtract {
1055     TCGOutOp base;
1056     void (*out_rrr)(TCGContext *s, TCGType type,
1057                     TCGReg a0, TCGReg a1, TCGReg a2);
1058     void (*out_rir)(TCGContext *s, TCGType type,
1059                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1060 } TCGOutOpSubtract;
1061 
1062 #include "tcg-target.c.inc"
1063 
1064 #ifndef CONFIG_TCG_INTERPRETER
1065 /* Validate CPUTLBDescFast placement. */
1066 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1067                         sizeof(CPUNegativeOffsetState))
1068                   < MIN_TLB_MASK_TABLE_OFS);
1069 #endif
1070 
1071 /*
1072  * Register V as the TCGOutOp for O.
1073  * This verifies that V is of type T, otherwise give a nice compiler error.
1074  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1075  */
1076 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1077 
1078 /* Register allocation descriptions for every TCGOpcode. */
1079 static const TCGOutOp * const all_outop[NB_OPS] = {
1080     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1081     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1082     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1083     OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond),
1084     OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16),
1085     OUTOP(INDEX_op_bswap32, TCGOutOpBswap, outop_bswap32),
1086     OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz),
1087     OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop),
1088     OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz),
1089     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1090     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1091     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1092     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1093     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1094     OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract),
1095     OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond),
1096     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1097     OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2),
1098     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1099     OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2),
1100     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1101     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1102     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1103     OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond),
1104     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1105     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1106     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1107     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1108     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1109     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1110     OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl),
1111     OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr),
1112     OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1113     OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond),
1114     OUTOP(INDEX_op_sextract, TCGOutOpExtract, outop_sextract),
1115     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1116     OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1117     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1118     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1119 
1120 #if TCG_TARGET_REG_BITS == 32
1121     OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2),
1122     OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2),
1123 #else
1124     OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64),
1125 #endif
1126 };
1127 
1128 #undef OUTOP
1129 
1130 /*
1131  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1132  * and registered the target's TCG globals) must register with this function
1133  * before initiating translation.
1134  *
1135  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1136  * of tcg_region_init() for the reasoning behind this.
1137  *
1138  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1139  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1140  * is not used anymore for translation once this function is called.
1141  *
1142  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1143  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1144  * modes.
1145  */
1146 #ifdef CONFIG_USER_ONLY
1147 void tcg_register_thread(void)
1148 {
1149     tcg_ctx = &tcg_init_ctx;
1150 }
1151 #else
1152 void tcg_register_thread(void)
1153 {
1154     TCGContext *s = g_malloc(sizeof(*s));
1155     unsigned int i, n;
1156 
1157     *s = tcg_init_ctx;
1158 
1159     /* Relink mem_base.  */
1160     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1161         if (tcg_init_ctx.temps[i].mem_base) {
1162             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1163             tcg_debug_assert(b >= 0 && b < n);
1164             s->temps[i].mem_base = &s->temps[b];
1165         }
1166     }
1167 
1168     /* Claim an entry in tcg_ctxs */
1169     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1170     g_assert(n < tcg_max_ctxs);
1171     qatomic_set(&tcg_ctxs[n], s);
1172 
1173     if (n > 0) {
1174         tcg_region_initial_alloc(s);
1175     }
1176 
1177     tcg_ctx = s;
1178 }
1179 #endif /* !CONFIG_USER_ONLY */
1180 
1181 /* pool based memory allocation */
1182 void *tcg_malloc_internal(TCGContext *s, int size)
1183 {
1184     TCGPool *p;
1185     int pool_size;
1186 
1187     if (size > TCG_POOL_CHUNK_SIZE) {
1188         /* big malloc: insert a new pool (XXX: could optimize) */
1189         p = g_malloc(sizeof(TCGPool) + size);
1190         p->size = size;
1191         p->next = s->pool_first_large;
1192         s->pool_first_large = p;
1193         return p->data;
1194     } else {
1195         p = s->pool_current;
1196         if (!p) {
1197             p = s->pool_first;
1198             if (!p)
1199                 goto new_pool;
1200         } else {
1201             if (!p->next) {
1202             new_pool:
1203                 pool_size = TCG_POOL_CHUNK_SIZE;
1204                 p = g_malloc(sizeof(TCGPool) + pool_size);
1205                 p->size = pool_size;
1206                 p->next = NULL;
1207                 if (s->pool_current) {
1208                     s->pool_current->next = p;
1209                 } else {
1210                     s->pool_first = p;
1211                 }
1212             } else {
1213                 p = p->next;
1214             }
1215         }
1216     }
1217     s->pool_current = p;
1218     s->pool_cur = p->data + size;
1219     s->pool_end = p->data + p->size;
1220     return p->data;
1221 }
1222 
1223 void tcg_pool_reset(TCGContext *s)
1224 {
1225     TCGPool *p, *t;
1226     for (p = s->pool_first_large; p; p = t) {
1227         t = p->next;
1228         g_free(p);
1229     }
1230     s->pool_first_large = NULL;
1231     s->pool_cur = s->pool_end = NULL;
1232     s->pool_current = NULL;
1233 }
1234 
1235 /*
1236  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1237  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1238  * We only use these for layout in tcg_out_ld_helper_ret and
1239  * tcg_out_st_helper_args, and share them between several of
1240  * the helpers, with the end result that it's easier to build manually.
1241  */
1242 
1243 #if TCG_TARGET_REG_BITS == 32
1244 # define dh_typecode_ttl  dh_typecode_i32
1245 #else
1246 # define dh_typecode_ttl  dh_typecode_i64
1247 #endif
1248 
1249 static TCGHelperInfo info_helper_ld32_mmu = {
1250     .flags = TCG_CALL_NO_WG,
1251     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1252               | dh_typemask(env, 1)
1253               | dh_typemask(i64, 2)  /* uint64_t addr */
1254               | dh_typemask(i32, 3)  /* unsigned oi */
1255               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1256 };
1257 
1258 static TCGHelperInfo info_helper_ld64_mmu = {
1259     .flags = TCG_CALL_NO_WG,
1260     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1261               | dh_typemask(env, 1)
1262               | dh_typemask(i64, 2)  /* uint64_t addr */
1263               | dh_typemask(i32, 3)  /* unsigned oi */
1264               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1265 };
1266 
1267 static TCGHelperInfo info_helper_ld128_mmu = {
1268     .flags = TCG_CALL_NO_WG,
1269     .typemask = dh_typemask(i128, 0) /* return Int128 */
1270               | dh_typemask(env, 1)
1271               | dh_typemask(i64, 2)  /* uint64_t addr */
1272               | dh_typemask(i32, 3)  /* unsigned oi */
1273               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1274 };
1275 
1276 static TCGHelperInfo info_helper_st32_mmu = {
1277     .flags = TCG_CALL_NO_WG,
1278     .typemask = dh_typemask(void, 0)
1279               | dh_typemask(env, 1)
1280               | dh_typemask(i64, 2)  /* uint64_t addr */
1281               | dh_typemask(i32, 3)  /* uint32_t data */
1282               | dh_typemask(i32, 4)  /* unsigned oi */
1283               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1284 };
1285 
1286 static TCGHelperInfo info_helper_st64_mmu = {
1287     .flags = TCG_CALL_NO_WG,
1288     .typemask = dh_typemask(void, 0)
1289               | dh_typemask(env, 1)
1290               | dh_typemask(i64, 2)  /* uint64_t addr */
1291               | dh_typemask(i64, 3)  /* uint64_t data */
1292               | dh_typemask(i32, 4)  /* unsigned oi */
1293               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1294 };
1295 
1296 static TCGHelperInfo info_helper_st128_mmu = {
1297     .flags = TCG_CALL_NO_WG,
1298     .typemask = dh_typemask(void, 0)
1299               | dh_typemask(env, 1)
1300               | dh_typemask(i64, 2)  /* uint64_t addr */
1301               | dh_typemask(i128, 3) /* Int128 data */
1302               | dh_typemask(i32, 4)  /* unsigned oi */
1303               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1304 };
1305 
1306 #ifdef CONFIG_TCG_INTERPRETER
1307 static ffi_type *typecode_to_ffi(int argmask)
1308 {
1309     /*
1310      * libffi does not support __int128_t, so we have forced Int128
1311      * to use the structure definition instead of the builtin type.
1312      */
1313     static ffi_type *ffi_type_i128_elements[3] = {
1314         &ffi_type_uint64,
1315         &ffi_type_uint64,
1316         NULL
1317     };
1318     static ffi_type ffi_type_i128 = {
1319         .size = 16,
1320         .alignment = __alignof__(Int128),
1321         .type = FFI_TYPE_STRUCT,
1322         .elements = ffi_type_i128_elements,
1323     };
1324 
1325     switch (argmask) {
1326     case dh_typecode_void:
1327         return &ffi_type_void;
1328     case dh_typecode_i32:
1329         return &ffi_type_uint32;
1330     case dh_typecode_s32:
1331         return &ffi_type_sint32;
1332     case dh_typecode_i64:
1333         return &ffi_type_uint64;
1334     case dh_typecode_s64:
1335         return &ffi_type_sint64;
1336     case dh_typecode_ptr:
1337         return &ffi_type_pointer;
1338     case dh_typecode_i128:
1339         return &ffi_type_i128;
1340     }
1341     g_assert_not_reached();
1342 }
1343 
1344 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1345 {
1346     unsigned typemask = info->typemask;
1347     struct {
1348         ffi_cif cif;
1349         ffi_type *args[];
1350     } *ca;
1351     ffi_status status;
1352     int nargs;
1353 
1354     /* Ignoring the return type, find the last non-zero field. */
1355     nargs = 32 - clz32(typemask >> 3);
1356     nargs = DIV_ROUND_UP(nargs, 3);
1357     assert(nargs <= MAX_CALL_IARGS);
1358 
1359     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1360     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1361     ca->cif.nargs = nargs;
1362 
1363     if (nargs != 0) {
1364         ca->cif.arg_types = ca->args;
1365         for (int j = 0; j < nargs; ++j) {
1366             int typecode = extract32(typemask, (j + 1) * 3, 3);
1367             ca->args[j] = typecode_to_ffi(typecode);
1368         }
1369     }
1370 
1371     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1372                           ca->cif.rtype, ca->cif.arg_types);
1373     assert(status == FFI_OK);
1374 
1375     return &ca->cif;
1376 }
1377 
1378 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1379 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1380 #else
1381 #define HELPER_INFO_INIT(I)      (&(I)->init)
1382 #define HELPER_INFO_INIT_VAL(I)  1
1383 #endif /* CONFIG_TCG_INTERPRETER */
1384 
1385 static inline bool arg_slot_reg_p(unsigned arg_slot)
1386 {
1387     /*
1388      * Split the sizeof away from the comparison to avoid Werror from
1389      * "unsigned < 0 is always false", when iarg_regs is empty.
1390      */
1391     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1392     return arg_slot < nreg;
1393 }
1394 
1395 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1396 {
1397     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1398     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1399 
1400     tcg_debug_assert(stk_slot < max);
1401     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1402 }
1403 
1404 typedef struct TCGCumulativeArgs {
1405     int arg_idx;                /* tcg_gen_callN args[] */
1406     int info_in_idx;            /* TCGHelperInfo in[] */
1407     int arg_slot;               /* regs+stack slot */
1408     int ref_slot;               /* stack slots for references */
1409 } TCGCumulativeArgs;
1410 
1411 static void layout_arg_even(TCGCumulativeArgs *cum)
1412 {
1413     cum->arg_slot += cum->arg_slot & 1;
1414 }
1415 
1416 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1417                          TCGCallArgumentKind kind)
1418 {
1419     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1420 
1421     *loc = (TCGCallArgumentLoc){
1422         .kind = kind,
1423         .arg_idx = cum->arg_idx,
1424         .arg_slot = cum->arg_slot,
1425     };
1426     cum->info_in_idx++;
1427     cum->arg_slot++;
1428 }
1429 
1430 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1431                                 TCGHelperInfo *info, int n)
1432 {
1433     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1434 
1435     for (int i = 0; i < n; ++i) {
1436         /* Layout all using the same arg_idx, adjusting the subindex. */
1437         loc[i] = (TCGCallArgumentLoc){
1438             .kind = TCG_CALL_ARG_NORMAL,
1439             .arg_idx = cum->arg_idx,
1440             .tmp_subindex = i,
1441             .arg_slot = cum->arg_slot + i,
1442         };
1443     }
1444     cum->info_in_idx += n;
1445     cum->arg_slot += n;
1446 }
1447 
1448 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1449 {
1450     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1451     int n = 128 / TCG_TARGET_REG_BITS;
1452 
1453     /* The first subindex carries the pointer. */
1454     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1455 
1456     /*
1457      * The callee is allowed to clobber memory associated with
1458      * structure pass by-reference.  Therefore we must make copies.
1459      * Allocate space from "ref_slot", which will be adjusted to
1460      * follow the parameters on the stack.
1461      */
1462     loc[0].ref_slot = cum->ref_slot;
1463 
1464     /*
1465      * Subsequent words also go into the reference slot, but
1466      * do not accumulate into the regular arguments.
1467      */
1468     for (int i = 1; i < n; ++i) {
1469         loc[i] = (TCGCallArgumentLoc){
1470             .kind = TCG_CALL_ARG_BY_REF_N,
1471             .arg_idx = cum->arg_idx,
1472             .tmp_subindex = i,
1473             .ref_slot = cum->ref_slot + i,
1474         };
1475     }
1476     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1477     cum->ref_slot += n;
1478 }
1479 
1480 static void init_call_layout(TCGHelperInfo *info)
1481 {
1482     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1483     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1484     unsigned typemask = info->typemask;
1485     unsigned typecode;
1486     TCGCumulativeArgs cum = { };
1487 
1488     /*
1489      * Parse and place any function return value.
1490      */
1491     typecode = typemask & 7;
1492     switch (typecode) {
1493     case dh_typecode_void:
1494         info->nr_out = 0;
1495         break;
1496     case dh_typecode_i32:
1497     case dh_typecode_s32:
1498     case dh_typecode_ptr:
1499         info->nr_out = 1;
1500         info->out_kind = TCG_CALL_RET_NORMAL;
1501         break;
1502     case dh_typecode_i64:
1503     case dh_typecode_s64:
1504         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1505         info->out_kind = TCG_CALL_RET_NORMAL;
1506         /* Query the last register now to trigger any assert early. */
1507         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1508         break;
1509     case dh_typecode_i128:
1510         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1511         info->out_kind = TCG_TARGET_CALL_RET_I128;
1512         switch (TCG_TARGET_CALL_RET_I128) {
1513         case TCG_CALL_RET_NORMAL:
1514             /* Query the last register now to trigger any assert early. */
1515             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1516             break;
1517         case TCG_CALL_RET_BY_VEC:
1518             /* Query the single register now to trigger any assert early. */
1519             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1520             break;
1521         case TCG_CALL_RET_BY_REF:
1522             /*
1523              * Allocate the first argument to the output.
1524              * We don't need to store this anywhere, just make it
1525              * unavailable for use in the input loop below.
1526              */
1527             cum.arg_slot = 1;
1528             break;
1529         default:
1530             qemu_build_not_reached();
1531         }
1532         break;
1533     default:
1534         g_assert_not_reached();
1535     }
1536 
1537     /*
1538      * Parse and place function arguments.
1539      */
1540     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1541         TCGCallArgumentKind kind;
1542         TCGType type;
1543 
1544         typecode = typemask & 7;
1545         switch (typecode) {
1546         case dh_typecode_i32:
1547         case dh_typecode_s32:
1548             type = TCG_TYPE_I32;
1549             break;
1550         case dh_typecode_i64:
1551         case dh_typecode_s64:
1552             type = TCG_TYPE_I64;
1553             break;
1554         case dh_typecode_ptr:
1555             type = TCG_TYPE_PTR;
1556             break;
1557         case dh_typecode_i128:
1558             type = TCG_TYPE_I128;
1559             break;
1560         default:
1561             g_assert_not_reached();
1562         }
1563 
1564         switch (type) {
1565         case TCG_TYPE_I32:
1566             switch (TCG_TARGET_CALL_ARG_I32) {
1567             case TCG_CALL_ARG_EVEN:
1568                 layout_arg_even(&cum);
1569                 /* fall through */
1570             case TCG_CALL_ARG_NORMAL:
1571                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1572                 break;
1573             case TCG_CALL_ARG_EXTEND:
1574                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1575                 layout_arg_1(&cum, info, kind);
1576                 break;
1577             default:
1578                 qemu_build_not_reached();
1579             }
1580             break;
1581 
1582         case TCG_TYPE_I64:
1583             switch (TCG_TARGET_CALL_ARG_I64) {
1584             case TCG_CALL_ARG_EVEN:
1585                 layout_arg_even(&cum);
1586                 /* fall through */
1587             case TCG_CALL_ARG_NORMAL:
1588                 if (TCG_TARGET_REG_BITS == 32) {
1589                     layout_arg_normal_n(&cum, info, 2);
1590                 } else {
1591                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1592                 }
1593                 break;
1594             default:
1595                 qemu_build_not_reached();
1596             }
1597             break;
1598 
1599         case TCG_TYPE_I128:
1600             switch (TCG_TARGET_CALL_ARG_I128) {
1601             case TCG_CALL_ARG_EVEN:
1602                 layout_arg_even(&cum);
1603                 /* fall through */
1604             case TCG_CALL_ARG_NORMAL:
1605                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1606                 break;
1607             case TCG_CALL_ARG_BY_REF:
1608                 layout_arg_by_ref(&cum, info);
1609                 break;
1610             default:
1611                 qemu_build_not_reached();
1612             }
1613             break;
1614 
1615         default:
1616             g_assert_not_reached();
1617         }
1618     }
1619     info->nr_in = cum.info_in_idx;
1620 
1621     /* Validate that we didn't overrun the input array. */
1622     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1623     /* Validate the backend has enough argument space. */
1624     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1625 
1626     /*
1627      * Relocate the "ref_slot" area to the end of the parameters.
1628      * Minimizing this stack offset helps code size for x86,
1629      * which has a signed 8-bit offset encoding.
1630      */
1631     if (cum.ref_slot != 0) {
1632         int ref_base = 0;
1633 
1634         if (cum.arg_slot > max_reg_slots) {
1635             int align = __alignof(Int128) / sizeof(tcg_target_long);
1636 
1637             ref_base = cum.arg_slot - max_reg_slots;
1638             if (align > 1) {
1639                 ref_base = ROUND_UP(ref_base, align);
1640             }
1641         }
1642         assert(ref_base + cum.ref_slot <= max_stk_slots);
1643         ref_base += max_reg_slots;
1644 
1645         if (ref_base != 0) {
1646             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1647                 TCGCallArgumentLoc *loc = &info->in[i];
1648                 switch (loc->kind) {
1649                 case TCG_CALL_ARG_BY_REF:
1650                 case TCG_CALL_ARG_BY_REF_N:
1651                     loc->ref_slot += ref_base;
1652                     break;
1653                 default:
1654                     break;
1655                 }
1656             }
1657         }
1658     }
1659 }
1660 
1661 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1662 static void process_constraint_sets(void);
1663 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1664                                             TCGReg reg, const char *name);
1665 
1666 static void tcg_context_init(unsigned max_threads)
1667 {
1668     TCGContext *s = &tcg_init_ctx;
1669     int n, i;
1670     TCGTemp *ts;
1671 
1672     memset(s, 0, sizeof(*s));
1673     s->nb_globals = 0;
1674 
1675     init_call_layout(&info_helper_ld32_mmu);
1676     init_call_layout(&info_helper_ld64_mmu);
1677     init_call_layout(&info_helper_ld128_mmu);
1678     init_call_layout(&info_helper_st32_mmu);
1679     init_call_layout(&info_helper_st64_mmu);
1680     init_call_layout(&info_helper_st128_mmu);
1681 
1682     tcg_target_init(s);
1683     process_constraint_sets();
1684 
1685     /* Reverse the order of the saved registers, assuming they're all at
1686        the start of tcg_target_reg_alloc_order.  */
1687     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1688         int r = tcg_target_reg_alloc_order[n];
1689         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1690             break;
1691         }
1692     }
1693     for (i = 0; i < n; ++i) {
1694         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1695     }
1696     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1697         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1698     }
1699 
1700     tcg_ctx = s;
1701     /*
1702      * In user-mode we simply share the init context among threads, since we
1703      * use a single region. See the documentation tcg_region_init() for the
1704      * reasoning behind this.
1705      * In system-mode we will have at most max_threads TCG threads.
1706      */
1707 #ifdef CONFIG_USER_ONLY
1708     tcg_ctxs = &tcg_ctx;
1709     tcg_cur_ctxs = 1;
1710     tcg_max_ctxs = 1;
1711 #else
1712     tcg_max_ctxs = max_threads;
1713     tcg_ctxs = g_new0(TCGContext *, max_threads);
1714 #endif
1715 
1716     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1717     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1718     tcg_env = temp_tcgv_ptr(ts);
1719 }
1720 
1721 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1722 {
1723     tcg_context_init(max_threads);
1724     tcg_region_init(tb_size, splitwx, max_threads);
1725 }
1726 
1727 /*
1728  * Allocate TBs right before their corresponding translated code, making
1729  * sure that TBs and code are on different cache lines.
1730  */
1731 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1732 {
1733     uintptr_t align = qemu_icache_linesize;
1734     TranslationBlock *tb;
1735     void *next;
1736 
1737  retry:
1738     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1739     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1740 
1741     if (unlikely(next > s->code_gen_highwater)) {
1742         if (tcg_region_alloc(s)) {
1743             return NULL;
1744         }
1745         goto retry;
1746     }
1747     qatomic_set(&s->code_gen_ptr, next);
1748     return tb;
1749 }
1750 
1751 void tcg_prologue_init(void)
1752 {
1753     TCGContext *s = tcg_ctx;
1754     size_t prologue_size;
1755 
1756     s->code_ptr = s->code_gen_ptr;
1757     s->code_buf = s->code_gen_ptr;
1758     s->data_gen_ptr = NULL;
1759 
1760 #ifndef CONFIG_TCG_INTERPRETER
1761     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1762 #endif
1763 
1764     s->pool_labels = NULL;
1765 
1766     qemu_thread_jit_write();
1767     /* Generate the prologue.  */
1768     tcg_target_qemu_prologue(s);
1769 
1770     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1771     {
1772         int result = tcg_out_pool_finalize(s);
1773         tcg_debug_assert(result == 0);
1774     }
1775 
1776     prologue_size = tcg_current_code_size(s);
1777     perf_report_prologue(s->code_gen_ptr, prologue_size);
1778 
1779 #ifndef CONFIG_TCG_INTERPRETER
1780     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1781                         (uintptr_t)s->code_buf, prologue_size);
1782 #endif
1783 
1784     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1785         FILE *logfile = qemu_log_trylock();
1786         if (logfile) {
1787             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1788             if (s->data_gen_ptr) {
1789                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1790                 size_t data_size = prologue_size - code_size;
1791                 size_t i;
1792 
1793                 disas(logfile, s->code_gen_ptr, code_size);
1794 
1795                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1796                     if (sizeof(tcg_target_ulong) == 8) {
1797                         fprintf(logfile,
1798                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1799                                 (uintptr_t)s->data_gen_ptr + i,
1800                                 *(uint64_t *)(s->data_gen_ptr + i));
1801                     } else {
1802                         fprintf(logfile,
1803                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1804                                 (uintptr_t)s->data_gen_ptr + i,
1805                                 *(uint32_t *)(s->data_gen_ptr + i));
1806                     }
1807                 }
1808             } else {
1809                 disas(logfile, s->code_gen_ptr, prologue_size);
1810             }
1811             fprintf(logfile, "\n");
1812             qemu_log_unlock(logfile);
1813         }
1814     }
1815 
1816 #ifndef CONFIG_TCG_INTERPRETER
1817     /*
1818      * Assert that goto_ptr is implemented completely, setting an epilogue.
1819      * For tci, we use NULL as the signal to return from the interpreter,
1820      * so skip this check.
1821      */
1822     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1823 #endif
1824 
1825     tcg_region_prologue_set(s);
1826 }
1827 
1828 void tcg_func_start(TCGContext *s)
1829 {
1830     tcg_pool_reset(s);
1831     s->nb_temps = s->nb_globals;
1832 
1833     /* No temps have been previously allocated for size or locality.  */
1834     tcg_temp_ebb_reset_freed(s);
1835 
1836     /* No constant temps have been previously allocated. */
1837     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1838         if (s->const_table[i]) {
1839             g_hash_table_remove_all(s->const_table[i]);
1840         }
1841     }
1842 
1843     s->nb_ops = 0;
1844     s->nb_labels = 0;
1845     s->current_frame_offset = s->frame_start;
1846 
1847 #ifdef CONFIG_DEBUG_TCG
1848     s->goto_tb_issue_mask = 0;
1849 #endif
1850 
1851     QTAILQ_INIT(&s->ops);
1852     QTAILQ_INIT(&s->free_ops);
1853     s->emit_before_op = NULL;
1854     QSIMPLEQ_INIT(&s->labels);
1855 
1856     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1857     tcg_debug_assert(s->insn_start_words > 0);
1858 }
1859 
1860 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1861 {
1862     int n = s->nb_temps++;
1863 
1864     if (n >= TCG_MAX_TEMPS) {
1865         tcg_raise_tb_overflow(s);
1866     }
1867     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1868 }
1869 
1870 static TCGTemp *tcg_global_alloc(TCGContext *s)
1871 {
1872     TCGTemp *ts;
1873 
1874     tcg_debug_assert(s->nb_globals == s->nb_temps);
1875     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1876     s->nb_globals++;
1877     ts = tcg_temp_alloc(s);
1878     ts->kind = TEMP_GLOBAL;
1879 
1880     return ts;
1881 }
1882 
1883 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1884                                             TCGReg reg, const char *name)
1885 {
1886     TCGTemp *ts;
1887 
1888     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1889 
1890     ts = tcg_global_alloc(s);
1891     ts->base_type = type;
1892     ts->type = type;
1893     ts->kind = TEMP_FIXED;
1894     ts->reg = reg;
1895     ts->name = name;
1896     tcg_regset_set_reg(s->reserved_regs, reg);
1897 
1898     return ts;
1899 }
1900 
1901 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1902 {
1903     s->frame_start = start;
1904     s->frame_end = start + size;
1905     s->frame_temp
1906         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1907 }
1908 
1909 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1910                                             const char *name, TCGType type)
1911 {
1912     TCGContext *s = tcg_ctx;
1913     TCGTemp *base_ts = tcgv_ptr_temp(base);
1914     TCGTemp *ts = tcg_global_alloc(s);
1915     int indirect_reg = 0;
1916 
1917     switch (base_ts->kind) {
1918     case TEMP_FIXED:
1919         break;
1920     case TEMP_GLOBAL:
1921         /* We do not support double-indirect registers.  */
1922         tcg_debug_assert(!base_ts->indirect_reg);
1923         base_ts->indirect_base = 1;
1924         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1925                             ? 2 : 1);
1926         indirect_reg = 1;
1927         break;
1928     default:
1929         g_assert_not_reached();
1930     }
1931 
1932     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1933         TCGTemp *ts2 = tcg_global_alloc(s);
1934         char buf[64];
1935 
1936         ts->base_type = TCG_TYPE_I64;
1937         ts->type = TCG_TYPE_I32;
1938         ts->indirect_reg = indirect_reg;
1939         ts->mem_allocated = 1;
1940         ts->mem_base = base_ts;
1941         ts->mem_offset = offset;
1942         pstrcpy(buf, sizeof(buf), name);
1943         pstrcat(buf, sizeof(buf), "_0");
1944         ts->name = strdup(buf);
1945 
1946         tcg_debug_assert(ts2 == ts + 1);
1947         ts2->base_type = TCG_TYPE_I64;
1948         ts2->type = TCG_TYPE_I32;
1949         ts2->indirect_reg = indirect_reg;
1950         ts2->mem_allocated = 1;
1951         ts2->mem_base = base_ts;
1952         ts2->mem_offset = offset + 4;
1953         ts2->temp_subindex = 1;
1954         pstrcpy(buf, sizeof(buf), name);
1955         pstrcat(buf, sizeof(buf), "_1");
1956         ts2->name = strdup(buf);
1957     } else {
1958         ts->base_type = type;
1959         ts->type = type;
1960         ts->indirect_reg = indirect_reg;
1961         ts->mem_allocated = 1;
1962         ts->mem_base = base_ts;
1963         ts->mem_offset = offset;
1964         ts->name = name;
1965     }
1966     return ts;
1967 }
1968 
1969 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1970 {
1971     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1972     return temp_tcgv_i32(ts);
1973 }
1974 
1975 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1976 {
1977     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1978     return temp_tcgv_i64(ts);
1979 }
1980 
1981 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1982 {
1983     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1984     return temp_tcgv_ptr(ts);
1985 }
1986 
1987 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1988 {
1989     TCGContext *s = tcg_ctx;
1990     TCGTemp *ts;
1991     int n;
1992 
1993     if (kind == TEMP_EBB) {
1994         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1995 
1996         if (idx < TCG_MAX_TEMPS) {
1997             /* There is already an available temp with the right type.  */
1998             clear_bit(idx, s->free_temps[type].l);
1999 
2000             ts = &s->temps[idx];
2001             ts->temp_allocated = 1;
2002             tcg_debug_assert(ts->base_type == type);
2003             tcg_debug_assert(ts->kind == kind);
2004             return ts;
2005         }
2006     } else {
2007         tcg_debug_assert(kind == TEMP_TB);
2008     }
2009 
2010     switch (type) {
2011     case TCG_TYPE_I32:
2012     case TCG_TYPE_V64:
2013     case TCG_TYPE_V128:
2014     case TCG_TYPE_V256:
2015         n = 1;
2016         break;
2017     case TCG_TYPE_I64:
2018         n = 64 / TCG_TARGET_REG_BITS;
2019         break;
2020     case TCG_TYPE_I128:
2021         n = 128 / TCG_TARGET_REG_BITS;
2022         break;
2023     default:
2024         g_assert_not_reached();
2025     }
2026 
2027     ts = tcg_temp_alloc(s);
2028     ts->base_type = type;
2029     ts->temp_allocated = 1;
2030     ts->kind = kind;
2031 
2032     if (n == 1) {
2033         ts->type = type;
2034     } else {
2035         ts->type = TCG_TYPE_REG;
2036 
2037         for (int i = 1; i < n; ++i) {
2038             TCGTemp *ts2 = tcg_temp_alloc(s);
2039 
2040             tcg_debug_assert(ts2 == ts + i);
2041             ts2->base_type = type;
2042             ts2->type = TCG_TYPE_REG;
2043             ts2->temp_allocated = 1;
2044             ts2->temp_subindex = i;
2045             ts2->kind = kind;
2046         }
2047     }
2048     return ts;
2049 }
2050 
2051 TCGv_i32 tcg_temp_new_i32(void)
2052 {
2053     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
2054 }
2055 
2056 TCGv_i32 tcg_temp_ebb_new_i32(void)
2057 {
2058     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
2059 }
2060 
2061 TCGv_i64 tcg_temp_new_i64(void)
2062 {
2063     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
2064 }
2065 
2066 TCGv_i64 tcg_temp_ebb_new_i64(void)
2067 {
2068     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
2069 }
2070 
2071 TCGv_ptr tcg_temp_new_ptr(void)
2072 {
2073     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
2074 }
2075 
2076 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2077 {
2078     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2079 }
2080 
2081 TCGv_i128 tcg_temp_new_i128(void)
2082 {
2083     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2084 }
2085 
2086 TCGv_i128 tcg_temp_ebb_new_i128(void)
2087 {
2088     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2089 }
2090 
2091 TCGv_vec tcg_temp_new_vec(TCGType type)
2092 {
2093     TCGTemp *t;
2094 
2095 #ifdef CONFIG_DEBUG_TCG
2096     switch (type) {
2097     case TCG_TYPE_V64:
2098         assert(TCG_TARGET_HAS_v64);
2099         break;
2100     case TCG_TYPE_V128:
2101         assert(TCG_TARGET_HAS_v128);
2102         break;
2103     case TCG_TYPE_V256:
2104         assert(TCG_TARGET_HAS_v256);
2105         break;
2106     default:
2107         g_assert_not_reached();
2108     }
2109 #endif
2110 
2111     t = tcg_temp_new_internal(type, TEMP_EBB);
2112     return temp_tcgv_vec(t);
2113 }
2114 
2115 /* Create a new temp of the same type as an existing temp.  */
2116 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2117 {
2118     TCGTemp *t = tcgv_vec_temp(match);
2119 
2120     tcg_debug_assert(t->temp_allocated != 0);
2121 
2122     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2123     return temp_tcgv_vec(t);
2124 }
2125 
2126 void tcg_temp_free_internal(TCGTemp *ts)
2127 {
2128     TCGContext *s = tcg_ctx;
2129 
2130     switch (ts->kind) {
2131     case TEMP_CONST:
2132     case TEMP_TB:
2133         /* Silently ignore free. */
2134         break;
2135     case TEMP_EBB:
2136         tcg_debug_assert(ts->temp_allocated != 0);
2137         ts->temp_allocated = 0;
2138         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2139         break;
2140     default:
2141         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2142         g_assert_not_reached();
2143     }
2144 }
2145 
2146 void tcg_temp_free_i32(TCGv_i32 arg)
2147 {
2148     tcg_temp_free_internal(tcgv_i32_temp(arg));
2149 }
2150 
2151 void tcg_temp_free_i64(TCGv_i64 arg)
2152 {
2153     tcg_temp_free_internal(tcgv_i64_temp(arg));
2154 }
2155 
2156 void tcg_temp_free_i128(TCGv_i128 arg)
2157 {
2158     tcg_temp_free_internal(tcgv_i128_temp(arg));
2159 }
2160 
2161 void tcg_temp_free_ptr(TCGv_ptr arg)
2162 {
2163     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2164 }
2165 
2166 void tcg_temp_free_vec(TCGv_vec arg)
2167 {
2168     tcg_temp_free_internal(tcgv_vec_temp(arg));
2169 }
2170 
2171 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2172 {
2173     TCGContext *s = tcg_ctx;
2174     GHashTable *h = s->const_table[type];
2175     TCGTemp *ts;
2176 
2177     if (h == NULL) {
2178         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2179         s->const_table[type] = h;
2180     }
2181 
2182     ts = g_hash_table_lookup(h, &val);
2183     if (ts == NULL) {
2184         int64_t *val_ptr;
2185 
2186         ts = tcg_temp_alloc(s);
2187 
2188         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2189             TCGTemp *ts2 = tcg_temp_alloc(s);
2190 
2191             tcg_debug_assert(ts2 == ts + 1);
2192 
2193             ts->base_type = TCG_TYPE_I64;
2194             ts->type = TCG_TYPE_I32;
2195             ts->kind = TEMP_CONST;
2196             ts->temp_allocated = 1;
2197 
2198             ts2->base_type = TCG_TYPE_I64;
2199             ts2->type = TCG_TYPE_I32;
2200             ts2->kind = TEMP_CONST;
2201             ts2->temp_allocated = 1;
2202             ts2->temp_subindex = 1;
2203 
2204             /*
2205              * Retain the full value of the 64-bit constant in the low
2206              * part, so that the hash table works.  Actual uses will
2207              * truncate the value to the low part.
2208              */
2209             ts[HOST_BIG_ENDIAN].val = val;
2210             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2211             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2212         } else {
2213             ts->base_type = type;
2214             ts->type = type;
2215             ts->kind = TEMP_CONST;
2216             ts->temp_allocated = 1;
2217             ts->val = val;
2218             val_ptr = &ts->val;
2219         }
2220         g_hash_table_insert(h, val_ptr, ts);
2221     }
2222 
2223     return ts;
2224 }
2225 
2226 TCGv_i32 tcg_constant_i32(int32_t val)
2227 {
2228     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2229 }
2230 
2231 TCGv_i64 tcg_constant_i64(int64_t val)
2232 {
2233     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2234 }
2235 
2236 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2237 {
2238     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2239 }
2240 
2241 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2242 {
2243     val = dup_const(vece, val);
2244     return temp_tcgv_vec(tcg_constant_internal(type, val));
2245 }
2246 
2247 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2248 {
2249     TCGTemp *t = tcgv_vec_temp(match);
2250 
2251     tcg_debug_assert(t->temp_allocated != 0);
2252     return tcg_constant_vec(t->base_type, vece, val);
2253 }
2254 
2255 #ifdef CONFIG_DEBUG_TCG
2256 size_t temp_idx(TCGTemp *ts)
2257 {
2258     ptrdiff_t n = ts - tcg_ctx->temps;
2259     assert(n >= 0 && n < tcg_ctx->nb_temps);
2260     return n;
2261 }
2262 
2263 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2264 {
2265     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2266 
2267     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2268     assert(o % sizeof(TCGTemp) == 0);
2269 
2270     return (void *)tcg_ctx + (uintptr_t)v;
2271 }
2272 #endif /* CONFIG_DEBUG_TCG */
2273 
2274 /*
2275  * Return true if OP may appear in the opcode stream with TYPE.
2276  * Test the runtime variable that controls each opcode.
2277  */
2278 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2279 {
2280     bool has_type;
2281 
2282     switch (type) {
2283     case TCG_TYPE_I32:
2284         has_type = true;
2285         break;
2286     case TCG_TYPE_I64:
2287         has_type = TCG_TARGET_REG_BITS == 64;
2288         break;
2289     case TCG_TYPE_V64:
2290         has_type = TCG_TARGET_HAS_v64;
2291         break;
2292     case TCG_TYPE_V128:
2293         has_type = TCG_TARGET_HAS_v128;
2294         break;
2295     case TCG_TYPE_V256:
2296         has_type = TCG_TARGET_HAS_v256;
2297         break;
2298     default:
2299         has_type = false;
2300         break;
2301     }
2302 
2303     switch (op) {
2304     case INDEX_op_discard:
2305     case INDEX_op_set_label:
2306     case INDEX_op_call:
2307     case INDEX_op_br:
2308     case INDEX_op_mb:
2309     case INDEX_op_insn_start:
2310     case INDEX_op_exit_tb:
2311     case INDEX_op_goto_tb:
2312     case INDEX_op_goto_ptr:
2313     case INDEX_op_qemu_ld_i32:
2314     case INDEX_op_qemu_st_i32:
2315     case INDEX_op_qemu_ld_i64:
2316     case INDEX_op_qemu_st_i64:
2317         return true;
2318 
2319     case INDEX_op_qemu_st8_i32:
2320         return TCG_TARGET_HAS_qemu_st8_i32;
2321 
2322     case INDEX_op_qemu_ld_i128:
2323     case INDEX_op_qemu_st_i128:
2324         return TCG_TARGET_HAS_qemu_ldst_i128;
2325 
2326     case INDEX_op_add:
2327     case INDEX_op_and:
2328     case INDEX_op_brcond:
2329     case INDEX_op_extract:
2330     case INDEX_op_mov:
2331     case INDEX_op_movcond:
2332     case INDEX_op_negsetcond:
2333     case INDEX_op_or:
2334     case INDEX_op_setcond:
2335     case INDEX_op_sextract:
2336     case INDEX_op_xor:
2337         return has_type;
2338 
2339     case INDEX_op_ld8u_i32:
2340     case INDEX_op_ld8s_i32:
2341     case INDEX_op_ld16u_i32:
2342     case INDEX_op_ld16s_i32:
2343     case INDEX_op_ld_i32:
2344     case INDEX_op_st8_i32:
2345     case INDEX_op_st16_i32:
2346     case INDEX_op_st_i32:
2347     case INDEX_op_deposit_i32:
2348         return true;
2349 
2350     case INDEX_op_extract2_i32:
2351         return TCG_TARGET_HAS_extract2_i32;
2352     case INDEX_op_add2_i32:
2353         return TCG_TARGET_HAS_add2_i32;
2354     case INDEX_op_sub2_i32:
2355         return TCG_TARGET_HAS_sub2_i32;
2356 
2357     case INDEX_op_brcond2_i32:
2358     case INDEX_op_setcond2_i32:
2359         return TCG_TARGET_REG_BITS == 32;
2360 
2361     case INDEX_op_ld8u_i64:
2362     case INDEX_op_ld8s_i64:
2363     case INDEX_op_ld16u_i64:
2364     case INDEX_op_ld16s_i64:
2365     case INDEX_op_ld32u_i64:
2366     case INDEX_op_ld32s_i64:
2367     case INDEX_op_ld_i64:
2368     case INDEX_op_st8_i64:
2369     case INDEX_op_st16_i64:
2370     case INDEX_op_st32_i64:
2371     case INDEX_op_st_i64:
2372     case INDEX_op_ext_i32_i64:
2373     case INDEX_op_extu_i32_i64:
2374     case INDEX_op_deposit_i64:
2375         return TCG_TARGET_REG_BITS == 64;
2376 
2377     case INDEX_op_extract2_i64:
2378         return TCG_TARGET_HAS_extract2_i64;
2379     case INDEX_op_extrl_i64_i32:
2380     case INDEX_op_extrh_i64_i32:
2381         return TCG_TARGET_HAS_extr_i64_i32;
2382     case INDEX_op_add2_i64:
2383         return TCG_TARGET_HAS_add2_i64;
2384     case INDEX_op_sub2_i64:
2385         return TCG_TARGET_HAS_sub2_i64;
2386 
2387     case INDEX_op_mov_vec:
2388     case INDEX_op_dup_vec:
2389     case INDEX_op_dupm_vec:
2390     case INDEX_op_ld_vec:
2391     case INDEX_op_st_vec:
2392     case INDEX_op_add_vec:
2393     case INDEX_op_sub_vec:
2394     case INDEX_op_and_vec:
2395     case INDEX_op_or_vec:
2396     case INDEX_op_xor_vec:
2397     case INDEX_op_cmp_vec:
2398         return has_type;
2399     case INDEX_op_dup2_vec:
2400         return has_type && TCG_TARGET_REG_BITS == 32;
2401     case INDEX_op_not_vec:
2402         return has_type && TCG_TARGET_HAS_not_vec;
2403     case INDEX_op_neg_vec:
2404         return has_type && TCG_TARGET_HAS_neg_vec;
2405     case INDEX_op_abs_vec:
2406         return has_type && TCG_TARGET_HAS_abs_vec;
2407     case INDEX_op_andc_vec:
2408         return has_type && TCG_TARGET_HAS_andc_vec;
2409     case INDEX_op_orc_vec:
2410         return has_type && TCG_TARGET_HAS_orc_vec;
2411     case INDEX_op_nand_vec:
2412         return has_type && TCG_TARGET_HAS_nand_vec;
2413     case INDEX_op_nor_vec:
2414         return has_type && TCG_TARGET_HAS_nor_vec;
2415     case INDEX_op_eqv_vec:
2416         return has_type && TCG_TARGET_HAS_eqv_vec;
2417     case INDEX_op_mul_vec:
2418         return has_type && TCG_TARGET_HAS_mul_vec;
2419     case INDEX_op_shli_vec:
2420     case INDEX_op_shri_vec:
2421     case INDEX_op_sari_vec:
2422         return has_type && TCG_TARGET_HAS_shi_vec;
2423     case INDEX_op_shls_vec:
2424     case INDEX_op_shrs_vec:
2425     case INDEX_op_sars_vec:
2426         return has_type && TCG_TARGET_HAS_shs_vec;
2427     case INDEX_op_shlv_vec:
2428     case INDEX_op_shrv_vec:
2429     case INDEX_op_sarv_vec:
2430         return has_type && TCG_TARGET_HAS_shv_vec;
2431     case INDEX_op_rotli_vec:
2432         return has_type && TCG_TARGET_HAS_roti_vec;
2433     case INDEX_op_rotls_vec:
2434         return has_type && TCG_TARGET_HAS_rots_vec;
2435     case INDEX_op_rotlv_vec:
2436     case INDEX_op_rotrv_vec:
2437         return has_type && TCG_TARGET_HAS_rotv_vec;
2438     case INDEX_op_ssadd_vec:
2439     case INDEX_op_usadd_vec:
2440     case INDEX_op_sssub_vec:
2441     case INDEX_op_ussub_vec:
2442         return has_type && TCG_TARGET_HAS_sat_vec;
2443     case INDEX_op_smin_vec:
2444     case INDEX_op_umin_vec:
2445     case INDEX_op_smax_vec:
2446     case INDEX_op_umax_vec:
2447         return has_type && TCG_TARGET_HAS_minmax_vec;
2448     case INDEX_op_bitsel_vec:
2449         return has_type && TCG_TARGET_HAS_bitsel_vec;
2450     case INDEX_op_cmpsel_vec:
2451         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2452 
2453     default:
2454         if (op < INDEX_op_last_generic) {
2455             const TCGOutOp *outop;
2456             TCGConstraintSetIndex con_set;
2457 
2458             if (!has_type) {
2459                 return false;
2460             }
2461 
2462             outop = all_outop[op];
2463             tcg_debug_assert(outop != NULL);
2464 
2465             con_set = outop->static_constraint;
2466             if (con_set == C_Dynamic) {
2467                 con_set = outop->dynamic_constraint(type, flags);
2468             }
2469             if (con_set >= 0) {
2470                 return true;
2471             }
2472             tcg_debug_assert(con_set == C_NotImplemented);
2473             return false;
2474         }
2475         tcg_debug_assert(op < NB_OPS);
2476         return true;
2477 
2478     case INDEX_op_last_generic:
2479         g_assert_not_reached();
2480     }
2481 }
2482 
2483 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2484 {
2485     unsigned width;
2486 
2487     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2488     width = (type == TCG_TYPE_I32 ? 32 : 64);
2489 
2490     tcg_debug_assert(ofs < width);
2491     tcg_debug_assert(len > 0);
2492     tcg_debug_assert(len <= width - ofs);
2493 
2494     return TCG_TARGET_deposit_valid(type, ofs, len);
2495 }
2496 
2497 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2498 
2499 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2500                           TCGTemp *ret, TCGTemp **args)
2501 {
2502     TCGv_i64 extend_free[MAX_CALL_IARGS];
2503     int n_extend = 0;
2504     TCGOp *op;
2505     int i, n, pi = 0, total_args;
2506 
2507     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2508         init_call_layout(info);
2509         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2510     }
2511 
2512     total_args = info->nr_out + info->nr_in + 2;
2513     op = tcg_op_alloc(INDEX_op_call, total_args);
2514 
2515 #ifdef CONFIG_PLUGIN
2516     /* Flag helpers that may affect guest state */
2517     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2518         tcg_ctx->plugin_insn->calls_helpers = true;
2519     }
2520 #endif
2521 
2522     TCGOP_CALLO(op) = n = info->nr_out;
2523     switch (n) {
2524     case 0:
2525         tcg_debug_assert(ret == NULL);
2526         break;
2527     case 1:
2528         tcg_debug_assert(ret != NULL);
2529         op->args[pi++] = temp_arg(ret);
2530         break;
2531     case 2:
2532     case 4:
2533         tcg_debug_assert(ret != NULL);
2534         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2535         tcg_debug_assert(ret->temp_subindex == 0);
2536         for (i = 0; i < n; ++i) {
2537             op->args[pi++] = temp_arg(ret + i);
2538         }
2539         break;
2540     default:
2541         g_assert_not_reached();
2542     }
2543 
2544     TCGOP_CALLI(op) = n = info->nr_in;
2545     for (i = 0; i < n; i++) {
2546         const TCGCallArgumentLoc *loc = &info->in[i];
2547         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2548 
2549         switch (loc->kind) {
2550         case TCG_CALL_ARG_NORMAL:
2551         case TCG_CALL_ARG_BY_REF:
2552         case TCG_CALL_ARG_BY_REF_N:
2553             op->args[pi++] = temp_arg(ts);
2554             break;
2555 
2556         case TCG_CALL_ARG_EXTEND_U:
2557         case TCG_CALL_ARG_EXTEND_S:
2558             {
2559                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2560                 TCGv_i32 orig = temp_tcgv_i32(ts);
2561 
2562                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2563                     tcg_gen_ext_i32_i64(temp, orig);
2564                 } else {
2565                     tcg_gen_extu_i32_i64(temp, orig);
2566                 }
2567                 op->args[pi++] = tcgv_i64_arg(temp);
2568                 extend_free[n_extend++] = temp;
2569             }
2570             break;
2571 
2572         default:
2573             g_assert_not_reached();
2574         }
2575     }
2576     op->args[pi++] = (uintptr_t)func;
2577     op->args[pi++] = (uintptr_t)info;
2578     tcg_debug_assert(pi == total_args);
2579 
2580     if (tcg_ctx->emit_before_op) {
2581         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2582     } else {
2583         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2584     }
2585 
2586     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2587     for (i = 0; i < n_extend; ++i) {
2588         tcg_temp_free_i64(extend_free[i]);
2589     }
2590 }
2591 
2592 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2593 {
2594     tcg_gen_callN(func, info, ret, NULL);
2595 }
2596 
2597 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2598 {
2599     tcg_gen_callN(func, info, ret, &t1);
2600 }
2601 
2602 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2603                    TCGTemp *t1, TCGTemp *t2)
2604 {
2605     TCGTemp *args[2] = { t1, t2 };
2606     tcg_gen_callN(func, info, ret, args);
2607 }
2608 
2609 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2610                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2611 {
2612     TCGTemp *args[3] = { t1, t2, t3 };
2613     tcg_gen_callN(func, info, ret, args);
2614 }
2615 
2616 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2617                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2618 {
2619     TCGTemp *args[4] = { t1, t2, t3, t4 };
2620     tcg_gen_callN(func, info, ret, args);
2621 }
2622 
2623 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2624                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2625 {
2626     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2627     tcg_gen_callN(func, info, ret, args);
2628 }
2629 
2630 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2631                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2632                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2633 {
2634     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2635     tcg_gen_callN(func, info, ret, args);
2636 }
2637 
2638 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2639                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2640                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2641 {
2642     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2643     tcg_gen_callN(func, info, ret, args);
2644 }
2645 
2646 static void tcg_reg_alloc_start(TCGContext *s)
2647 {
2648     int i, n;
2649 
2650     for (i = 0, n = s->nb_temps; i < n; i++) {
2651         TCGTemp *ts = &s->temps[i];
2652         TCGTempVal val = TEMP_VAL_MEM;
2653 
2654         switch (ts->kind) {
2655         case TEMP_CONST:
2656             val = TEMP_VAL_CONST;
2657             break;
2658         case TEMP_FIXED:
2659             val = TEMP_VAL_REG;
2660             break;
2661         case TEMP_GLOBAL:
2662             break;
2663         case TEMP_EBB:
2664             val = TEMP_VAL_DEAD;
2665             /* fall through */
2666         case TEMP_TB:
2667             ts->mem_allocated = 0;
2668             break;
2669         default:
2670             g_assert_not_reached();
2671         }
2672         ts->val_type = val;
2673     }
2674 
2675     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2676 }
2677 
2678 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2679                                  TCGTemp *ts)
2680 {
2681     int idx = temp_idx(ts);
2682 
2683     switch (ts->kind) {
2684     case TEMP_FIXED:
2685     case TEMP_GLOBAL:
2686         pstrcpy(buf, buf_size, ts->name);
2687         break;
2688     case TEMP_TB:
2689         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2690         break;
2691     case TEMP_EBB:
2692         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2693         break;
2694     case TEMP_CONST:
2695         switch (ts->type) {
2696         case TCG_TYPE_I32:
2697             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2698             break;
2699 #if TCG_TARGET_REG_BITS > 32
2700         case TCG_TYPE_I64:
2701             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2702             break;
2703 #endif
2704         case TCG_TYPE_V64:
2705         case TCG_TYPE_V128:
2706         case TCG_TYPE_V256:
2707             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2708                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2709             break;
2710         default:
2711             g_assert_not_reached();
2712         }
2713         break;
2714     }
2715     return buf;
2716 }
2717 
2718 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2719                              int buf_size, TCGArg arg)
2720 {
2721     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2722 }
2723 
2724 static const char * const cond_name[] =
2725 {
2726     [TCG_COND_NEVER] = "never",
2727     [TCG_COND_ALWAYS] = "always",
2728     [TCG_COND_EQ] = "eq",
2729     [TCG_COND_NE] = "ne",
2730     [TCG_COND_LT] = "lt",
2731     [TCG_COND_GE] = "ge",
2732     [TCG_COND_LE] = "le",
2733     [TCG_COND_GT] = "gt",
2734     [TCG_COND_LTU] = "ltu",
2735     [TCG_COND_GEU] = "geu",
2736     [TCG_COND_LEU] = "leu",
2737     [TCG_COND_GTU] = "gtu",
2738     [TCG_COND_TSTEQ] = "tsteq",
2739     [TCG_COND_TSTNE] = "tstne",
2740 };
2741 
2742 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2743 {
2744     [MO_UB]   = "ub",
2745     [MO_SB]   = "sb",
2746     [MO_LEUW] = "leuw",
2747     [MO_LESW] = "lesw",
2748     [MO_LEUL] = "leul",
2749     [MO_LESL] = "lesl",
2750     [MO_LEUQ] = "leq",
2751     [MO_BEUW] = "beuw",
2752     [MO_BESW] = "besw",
2753     [MO_BEUL] = "beul",
2754     [MO_BESL] = "besl",
2755     [MO_BEUQ] = "beq",
2756     [MO_128 + MO_BE] = "beo",
2757     [MO_128 + MO_LE] = "leo",
2758 };
2759 
2760 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2761     [MO_UNALN >> MO_ASHIFT]    = "un+",
2762     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2763     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2764     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2765     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2766     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2767     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2768     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2769 };
2770 
2771 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2772     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2773     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2774     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2775     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2776     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2777     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2778 };
2779 
2780 static const char bswap_flag_name[][6] = {
2781     [TCG_BSWAP_IZ] = "iz",
2782     [TCG_BSWAP_OZ] = "oz",
2783     [TCG_BSWAP_OS] = "os",
2784     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2785     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2786 };
2787 
2788 #ifdef CONFIG_PLUGIN
2789 static const char * const plugin_from_name[] = {
2790     "from-tb",
2791     "from-insn",
2792     "after-insn",
2793     "after-tb",
2794 };
2795 #endif
2796 
2797 static inline bool tcg_regset_single(TCGRegSet d)
2798 {
2799     return (d & (d - 1)) == 0;
2800 }
2801 
2802 static inline TCGReg tcg_regset_first(TCGRegSet d)
2803 {
2804     if (TCG_TARGET_NB_REGS <= 32) {
2805         return ctz32(d);
2806     } else {
2807         return ctz64(d);
2808     }
2809 }
2810 
2811 /* Return only the number of characters output -- no error return. */
2812 #define ne_fprintf(...) \
2813     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2814 
2815 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2816 {
2817     char buf[128];
2818     TCGOp *op;
2819 
2820     QTAILQ_FOREACH(op, &s->ops, link) {
2821         int i, k, nb_oargs, nb_iargs, nb_cargs;
2822         const TCGOpDef *def;
2823         TCGOpcode c;
2824         int col = 0;
2825 
2826         c = op->opc;
2827         def = &tcg_op_defs[c];
2828 
2829         if (c == INDEX_op_insn_start) {
2830             nb_oargs = 0;
2831             col += ne_fprintf(f, "\n ----");
2832 
2833             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2834                 col += ne_fprintf(f, " %016" PRIx64,
2835                                   tcg_get_insn_start_param(op, i));
2836             }
2837         } else if (c == INDEX_op_call) {
2838             const TCGHelperInfo *info = tcg_call_info(op);
2839             void *func = tcg_call_func(op);
2840 
2841             /* variable number of arguments */
2842             nb_oargs = TCGOP_CALLO(op);
2843             nb_iargs = TCGOP_CALLI(op);
2844             nb_cargs = def->nb_cargs;
2845 
2846             col += ne_fprintf(f, " %s ", def->name);
2847 
2848             /*
2849              * Print the function name from TCGHelperInfo, if available.
2850              * Note that plugins have a template function for the info,
2851              * but the actual function pointer comes from the plugin.
2852              */
2853             if (func == info->func) {
2854                 col += ne_fprintf(f, "%s", info->name);
2855             } else {
2856                 col += ne_fprintf(f, "plugin(%p)", func);
2857             }
2858 
2859             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2860             for (i = 0; i < nb_oargs; i++) {
2861                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2862                                                             op->args[i]));
2863             }
2864             for (i = 0; i < nb_iargs; i++) {
2865                 TCGArg arg = op->args[nb_oargs + i];
2866                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2867                 col += ne_fprintf(f, ",%s", t);
2868             }
2869         } else {
2870             if (def->flags & TCG_OPF_INT) {
2871                 col += ne_fprintf(f, " %s_i%d ",
2872                                   def->name,
2873                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2874             } else if (def->flags & TCG_OPF_VECTOR) {
2875                 col += ne_fprintf(f, "%s v%d,e%d,",
2876                                   def->name,
2877                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2878                                   8 << TCGOP_VECE(op));
2879             } else {
2880                 col += ne_fprintf(f, " %s ", def->name);
2881             }
2882 
2883             nb_oargs = def->nb_oargs;
2884             nb_iargs = def->nb_iargs;
2885             nb_cargs = def->nb_cargs;
2886 
2887             k = 0;
2888             for (i = 0; i < nb_oargs; i++) {
2889                 const char *sep =  k ? "," : "";
2890                 col += ne_fprintf(f, "%s%s", sep,
2891                                   tcg_get_arg_str(s, buf, sizeof(buf),
2892                                                   op->args[k++]));
2893             }
2894             for (i = 0; i < nb_iargs; i++) {
2895                 const char *sep =  k ? "," : "";
2896                 col += ne_fprintf(f, "%s%s", sep,
2897                                   tcg_get_arg_str(s, buf, sizeof(buf),
2898                                                   op->args[k++]));
2899             }
2900             switch (c) {
2901             case INDEX_op_brcond:
2902             case INDEX_op_setcond:
2903             case INDEX_op_negsetcond:
2904             case INDEX_op_movcond:
2905             case INDEX_op_brcond2_i32:
2906             case INDEX_op_setcond2_i32:
2907             case INDEX_op_cmp_vec:
2908             case INDEX_op_cmpsel_vec:
2909                 if (op->args[k] < ARRAY_SIZE(cond_name)
2910                     && cond_name[op->args[k]]) {
2911                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2912                 } else {
2913                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2914                 }
2915                 i = 1;
2916                 break;
2917             case INDEX_op_qemu_ld_i32:
2918             case INDEX_op_qemu_st_i32:
2919             case INDEX_op_qemu_st8_i32:
2920             case INDEX_op_qemu_ld_i64:
2921             case INDEX_op_qemu_st_i64:
2922             case INDEX_op_qemu_ld_i128:
2923             case INDEX_op_qemu_st_i128:
2924                 {
2925                     const char *s_al, *s_op, *s_at;
2926                     MemOpIdx oi = op->args[k++];
2927                     MemOp mop = get_memop(oi);
2928                     unsigned ix = get_mmuidx(oi);
2929 
2930                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2931                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2932                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2933                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2934 
2935                     /* If all fields are accounted for, print symbolically. */
2936                     if (!mop && s_al && s_op && s_at) {
2937                         col += ne_fprintf(f, ",%s%s%s,%u",
2938                                           s_at, s_al, s_op, ix);
2939                     } else {
2940                         mop = get_memop(oi);
2941                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2942                     }
2943                     i = 1;
2944                 }
2945                 break;
2946             case INDEX_op_bswap16:
2947             case INDEX_op_bswap32:
2948             case INDEX_op_bswap64:
2949                 {
2950                     TCGArg flags = op->args[k];
2951                     const char *name = NULL;
2952 
2953                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2954                         name = bswap_flag_name[flags];
2955                     }
2956                     if (name) {
2957                         col += ne_fprintf(f, ",%s", name);
2958                     } else {
2959                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2960                     }
2961                     i = k = 1;
2962                 }
2963                 break;
2964 #ifdef CONFIG_PLUGIN
2965             case INDEX_op_plugin_cb:
2966                 {
2967                     TCGArg from = op->args[k++];
2968                     const char *name = NULL;
2969 
2970                     if (from < ARRAY_SIZE(plugin_from_name)) {
2971                         name = plugin_from_name[from];
2972                     }
2973                     if (name) {
2974                         col += ne_fprintf(f, "%s", name);
2975                     } else {
2976                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2977                     }
2978                     i = 1;
2979                 }
2980                 break;
2981 #endif
2982             default:
2983                 i = 0;
2984                 break;
2985             }
2986             switch (c) {
2987             case INDEX_op_set_label:
2988             case INDEX_op_br:
2989             case INDEX_op_brcond:
2990             case INDEX_op_brcond2_i32:
2991                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2992                                   arg_label(op->args[k])->id);
2993                 i++, k++;
2994                 break;
2995             case INDEX_op_mb:
2996                 {
2997                     TCGBar membar = op->args[k];
2998                     const char *b_op, *m_op;
2999 
3000                     switch (membar & TCG_BAR_SC) {
3001                     case 0:
3002                         b_op = "none";
3003                         break;
3004                     case TCG_BAR_LDAQ:
3005                         b_op = "acq";
3006                         break;
3007                     case TCG_BAR_STRL:
3008                         b_op = "rel";
3009                         break;
3010                     case TCG_BAR_SC:
3011                         b_op = "seq";
3012                         break;
3013                     default:
3014                         g_assert_not_reached();
3015                     }
3016 
3017                     switch (membar & TCG_MO_ALL) {
3018                     case 0:
3019                         m_op = "none";
3020                         break;
3021                     case TCG_MO_LD_LD:
3022                         m_op = "rr";
3023                         break;
3024                     case TCG_MO_LD_ST:
3025                         m_op = "rw";
3026                         break;
3027                     case TCG_MO_ST_LD:
3028                         m_op = "wr";
3029                         break;
3030                     case TCG_MO_ST_ST:
3031                         m_op = "ww";
3032                         break;
3033                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3034                         m_op = "rr+rw";
3035                         break;
3036                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3037                         m_op = "rr+wr";
3038                         break;
3039                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3040                         m_op = "rr+ww";
3041                         break;
3042                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3043                         m_op = "rw+wr";
3044                         break;
3045                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3046                         m_op = "rw+ww";
3047                         break;
3048                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3049                         m_op = "wr+ww";
3050                         break;
3051                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3052                         m_op = "rr+rw+wr";
3053                         break;
3054                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3055                         m_op = "rr+rw+ww";
3056                         break;
3057                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3058                         m_op = "rr+wr+ww";
3059                         break;
3060                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3061                         m_op = "rw+wr+ww";
3062                         break;
3063                     case TCG_MO_ALL:
3064                         m_op = "all";
3065                         break;
3066                     default:
3067                         g_assert_not_reached();
3068                     }
3069 
3070                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3071                     i++, k++;
3072                 }
3073                 break;
3074             default:
3075                 break;
3076             }
3077             for (; i < nb_cargs; i++, k++) {
3078                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3079                                   op->args[k]);
3080             }
3081         }
3082 
3083         if (have_prefs || op->life) {
3084             for (; col < 40; ++col) {
3085                 putc(' ', f);
3086             }
3087         }
3088 
3089         if (op->life) {
3090             unsigned life = op->life;
3091 
3092             if (life & (SYNC_ARG * 3)) {
3093                 ne_fprintf(f, "  sync:");
3094                 for (i = 0; i < 2; ++i) {
3095                     if (life & (SYNC_ARG << i)) {
3096                         ne_fprintf(f, " %d", i);
3097                     }
3098                 }
3099             }
3100             life /= DEAD_ARG;
3101             if (life) {
3102                 ne_fprintf(f, "  dead:");
3103                 for (i = 0; life; ++i, life >>= 1) {
3104                     if (life & 1) {
3105                         ne_fprintf(f, " %d", i);
3106                     }
3107                 }
3108             }
3109         }
3110 
3111         if (have_prefs) {
3112             for (i = 0; i < nb_oargs; ++i) {
3113                 TCGRegSet set = output_pref(op, i);
3114 
3115                 if (i == 0) {
3116                     ne_fprintf(f, "  pref=");
3117                 } else {
3118                     ne_fprintf(f, ",");
3119                 }
3120                 if (set == 0) {
3121                     ne_fprintf(f, "none");
3122                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3123                     ne_fprintf(f, "all");
3124 #ifdef CONFIG_DEBUG_TCG
3125                 } else if (tcg_regset_single(set)) {
3126                     TCGReg reg = tcg_regset_first(set);
3127                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3128 #endif
3129                 } else if (TCG_TARGET_NB_REGS <= 32) {
3130                     ne_fprintf(f, "0x%x", (uint32_t)set);
3131                 } else {
3132                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3133                 }
3134             }
3135         }
3136 
3137         putc('\n', f);
3138     }
3139 }
3140 
3141 /* we give more priority to constraints with less registers */
3142 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3143 {
3144     int n;
3145 
3146     arg_ct += k;
3147     n = ctpop64(arg_ct->regs);
3148 
3149     /*
3150      * Sort constraints of a single register first, which includes output
3151      * aliases (which must exactly match the input already allocated).
3152      */
3153     if (n == 1 || arg_ct->oalias) {
3154         return INT_MAX;
3155     }
3156 
3157     /*
3158      * Sort register pairs next, first then second immediately after.
3159      * Arbitrarily sort multiple pairs by the index of the first reg;
3160      * there shouldn't be many pairs.
3161      */
3162     switch (arg_ct->pair) {
3163     case 1:
3164     case 3:
3165         return (k + 1) * 2;
3166     case 2:
3167         return (arg_ct->pair_index + 1) * 2 - 1;
3168     }
3169 
3170     /* Finally, sort by decreasing register count. */
3171     assert(n > 1);
3172     return -n;
3173 }
3174 
3175 /* sort from highest priority to lowest */
3176 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3177 {
3178     int i, j;
3179 
3180     for (i = 0; i < n; i++) {
3181         a[start + i].sort_index = start + i;
3182     }
3183     if (n <= 1) {
3184         return;
3185     }
3186     for (i = 0; i < n - 1; i++) {
3187         for (j = i + 1; j < n; j++) {
3188             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3189             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3190             if (p1 < p2) {
3191                 int tmp = a[start + i].sort_index;
3192                 a[start + i].sort_index = a[start + j].sort_index;
3193                 a[start + j].sort_index = tmp;
3194             }
3195         }
3196     }
3197 }
3198 
3199 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3200 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3201 
3202 static void process_constraint_sets(void)
3203 {
3204     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3205         const TCGConstraintSet *tdefs = &constraint_sets[c];
3206         TCGArgConstraint *args_ct = all_cts[c];
3207         int nb_oargs = tdefs->nb_oargs;
3208         int nb_iargs = tdefs->nb_iargs;
3209         int nb_args = nb_oargs + nb_iargs;
3210         bool saw_alias_pair = false;
3211 
3212         for (int i = 0; i < nb_args; i++) {
3213             const char *ct_str = tdefs->args_ct_str[i];
3214             bool input_p = i >= nb_oargs;
3215             int o;
3216 
3217             switch (*ct_str) {
3218             case '0' ... '9':
3219                 o = *ct_str - '0';
3220                 tcg_debug_assert(input_p);
3221                 tcg_debug_assert(o < nb_oargs);
3222                 tcg_debug_assert(args_ct[o].regs != 0);
3223                 tcg_debug_assert(!args_ct[o].oalias);
3224                 args_ct[i] = args_ct[o];
3225                 /* The output sets oalias.  */
3226                 args_ct[o].oalias = 1;
3227                 args_ct[o].alias_index = i;
3228                 /* The input sets ialias. */
3229                 args_ct[i].ialias = 1;
3230                 args_ct[i].alias_index = o;
3231                 if (args_ct[i].pair) {
3232                     saw_alias_pair = true;
3233                 }
3234                 tcg_debug_assert(ct_str[1] == '\0');
3235                 continue;
3236 
3237             case '&':
3238                 tcg_debug_assert(!input_p);
3239                 args_ct[i].newreg = true;
3240                 ct_str++;
3241                 break;
3242 
3243             case 'p': /* plus */
3244                 /* Allocate to the register after the previous. */
3245                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3246                 o = i - 1;
3247                 tcg_debug_assert(!args_ct[o].pair);
3248                 tcg_debug_assert(!args_ct[o].ct);
3249                 args_ct[i] = (TCGArgConstraint){
3250                     .pair = 2,
3251                     .pair_index = o,
3252                     .regs = args_ct[o].regs << 1,
3253                     .newreg = args_ct[o].newreg,
3254                 };
3255                 args_ct[o].pair = 1;
3256                 args_ct[o].pair_index = i;
3257                 tcg_debug_assert(ct_str[1] == '\0');
3258                 continue;
3259 
3260             case 'm': /* minus */
3261                 /* Allocate to the register before the previous. */
3262                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3263                 o = i - 1;
3264                 tcg_debug_assert(!args_ct[o].pair);
3265                 tcg_debug_assert(!args_ct[o].ct);
3266                 args_ct[i] = (TCGArgConstraint){
3267                     .pair = 1,
3268                     .pair_index = o,
3269                     .regs = args_ct[o].regs >> 1,
3270                     .newreg = args_ct[o].newreg,
3271                 };
3272                 args_ct[o].pair = 2;
3273                 args_ct[o].pair_index = i;
3274                 tcg_debug_assert(ct_str[1] == '\0');
3275                 continue;
3276             }
3277 
3278             do {
3279                 switch (*ct_str) {
3280                 case 'i':
3281                     args_ct[i].ct |= TCG_CT_CONST;
3282                     break;
3283 #ifdef TCG_REG_ZERO
3284                 case 'z':
3285                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3286                     break;
3287 #endif
3288 
3289                 /* Include all of the target-specific constraints. */
3290 
3291 #undef CONST
3292 #define CONST(CASE, MASK) \
3293     case CASE: args_ct[i].ct |= MASK; break;
3294 #define REGS(CASE, MASK) \
3295     case CASE: args_ct[i].regs |= MASK; break;
3296 
3297 #include "tcg-target-con-str.h"
3298 
3299 #undef REGS
3300 #undef CONST
3301                 default:
3302                 case '0' ... '9':
3303                 case '&':
3304                 case 'p':
3305                 case 'm':
3306                     /* Typo in TCGConstraintSet constraint. */
3307                     g_assert_not_reached();
3308                 }
3309             } while (*++ct_str != '\0');
3310         }
3311 
3312         /*
3313          * Fix up output pairs that are aliased with inputs.
3314          * When we created the alias, we copied pair from the output.
3315          * There are three cases:
3316          *    (1a) Pairs of inputs alias pairs of outputs.
3317          *    (1b) One input aliases the first of a pair of outputs.
3318          *    (2)  One input aliases the second of a pair of outputs.
3319          *
3320          * Case 1a is handled by making sure that the pair_index'es are
3321          * properly updated so that they appear the same as a pair of inputs.
3322          *
3323          * Case 1b is handled by setting the pair_index of the input to
3324          * itself, simply so it doesn't point to an unrelated argument.
3325          * Since we don't encounter the "second" during the input allocation
3326          * phase, nothing happens with the second half of the input pair.
3327          *
3328          * Case 2 is handled by setting the second input to pair=3, the
3329          * first output to pair=3, and the pair_index'es to match.
3330          */
3331         if (saw_alias_pair) {
3332             for (int i = nb_oargs; i < nb_args; i++) {
3333                 int o, o2, i2;
3334 
3335                 /*
3336                  * Since [0-9pm] must be alone in the constraint string,
3337                  * the only way they can both be set is if the pair comes
3338                  * from the output alias.
3339                  */
3340                 if (!args_ct[i].ialias) {
3341                     continue;
3342                 }
3343                 switch (args_ct[i].pair) {
3344                 case 0:
3345                     break;
3346                 case 1:
3347                     o = args_ct[i].alias_index;
3348                     o2 = args_ct[o].pair_index;
3349                     tcg_debug_assert(args_ct[o].pair == 1);
3350                     tcg_debug_assert(args_ct[o2].pair == 2);
3351                     if (args_ct[o2].oalias) {
3352                         /* Case 1a */
3353                         i2 = args_ct[o2].alias_index;
3354                         tcg_debug_assert(args_ct[i2].pair == 2);
3355                         args_ct[i2].pair_index = i;
3356                         args_ct[i].pair_index = i2;
3357                     } else {
3358                         /* Case 1b */
3359                         args_ct[i].pair_index = i;
3360                     }
3361                     break;
3362                 case 2:
3363                     o = args_ct[i].alias_index;
3364                     o2 = args_ct[o].pair_index;
3365                     tcg_debug_assert(args_ct[o].pair == 2);
3366                     tcg_debug_assert(args_ct[o2].pair == 1);
3367                     if (args_ct[o2].oalias) {
3368                         /* Case 1a */
3369                         i2 = args_ct[o2].alias_index;
3370                         tcg_debug_assert(args_ct[i2].pair == 1);
3371                         args_ct[i2].pair_index = i;
3372                         args_ct[i].pair_index = i2;
3373                     } else {
3374                         /* Case 2 */
3375                         args_ct[i].pair = 3;
3376                         args_ct[o2].pair = 3;
3377                         args_ct[i].pair_index = o2;
3378                         args_ct[o2].pair_index = i;
3379                     }
3380                     break;
3381                 default:
3382                     g_assert_not_reached();
3383                 }
3384             }
3385         }
3386 
3387         /* sort the constraints (XXX: this is just an heuristic) */
3388         sort_constraints(args_ct, 0, nb_oargs);
3389         sort_constraints(args_ct, nb_oargs, nb_iargs);
3390     }
3391 }
3392 
3393 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3394 {
3395     TCGOpcode opc = op->opc;
3396     TCGType type = TCGOP_TYPE(op);
3397     unsigned flags = TCGOP_FLAGS(op);
3398     const TCGOpDef *def = &tcg_op_defs[opc];
3399     const TCGOutOp *outop = all_outop[opc];
3400     TCGConstraintSetIndex con_set;
3401 
3402     if (def->flags & TCG_OPF_NOT_PRESENT) {
3403         return empty_cts;
3404     }
3405 
3406     if (outop) {
3407         con_set = outop->static_constraint;
3408         if (con_set == C_Dynamic) {
3409             con_set = outop->dynamic_constraint(type, flags);
3410         }
3411     } else {
3412         con_set = tcg_target_op_def(opc, type, flags);
3413     }
3414     tcg_debug_assert(con_set >= 0);
3415     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3416 
3417     /* The constraint arguments must match TCGOpcode arguments. */
3418     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3419     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3420 
3421     return all_cts[con_set];
3422 }
3423 
3424 static void remove_label_use(TCGOp *op, int idx)
3425 {
3426     TCGLabel *label = arg_label(op->args[idx]);
3427     TCGLabelUse *use;
3428 
3429     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3430         if (use->op == op) {
3431             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3432             return;
3433         }
3434     }
3435     g_assert_not_reached();
3436 }
3437 
3438 void tcg_op_remove(TCGContext *s, TCGOp *op)
3439 {
3440     switch (op->opc) {
3441     case INDEX_op_br:
3442         remove_label_use(op, 0);
3443         break;
3444     case INDEX_op_brcond:
3445         remove_label_use(op, 3);
3446         break;
3447     case INDEX_op_brcond2_i32:
3448         remove_label_use(op, 5);
3449         break;
3450     default:
3451         break;
3452     }
3453 
3454     QTAILQ_REMOVE(&s->ops, op, link);
3455     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3456     s->nb_ops--;
3457 }
3458 
3459 void tcg_remove_ops_after(TCGOp *op)
3460 {
3461     TCGContext *s = tcg_ctx;
3462 
3463     while (true) {
3464         TCGOp *last = tcg_last_op();
3465         if (last == op) {
3466             return;
3467         }
3468         tcg_op_remove(s, last);
3469     }
3470 }
3471 
3472 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3473 {
3474     TCGContext *s = tcg_ctx;
3475     TCGOp *op = NULL;
3476 
3477     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3478         QTAILQ_FOREACH(op, &s->free_ops, link) {
3479             if (nargs <= op->nargs) {
3480                 QTAILQ_REMOVE(&s->free_ops, op, link);
3481                 nargs = op->nargs;
3482                 goto found;
3483             }
3484         }
3485     }
3486 
3487     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3488     nargs = MAX(4, nargs);
3489     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3490 
3491  found:
3492     memset(op, 0, offsetof(TCGOp, link));
3493     op->opc = opc;
3494     op->nargs = nargs;
3495 
3496     /* Check for bitfield overflow. */
3497     tcg_debug_assert(op->nargs == nargs);
3498 
3499     s->nb_ops++;
3500     return op;
3501 }
3502 
3503 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3504 {
3505     TCGOp *op = tcg_op_alloc(opc, nargs);
3506 
3507     if (tcg_ctx->emit_before_op) {
3508         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3509     } else {
3510         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3511     }
3512     return op;
3513 }
3514 
3515 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3516                             TCGOpcode opc, TCGType type, unsigned nargs)
3517 {
3518     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3519 
3520     TCGOP_TYPE(new_op) = type;
3521     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3522     return new_op;
3523 }
3524 
3525 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3526                            TCGOpcode opc, TCGType type, unsigned nargs)
3527 {
3528     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3529 
3530     TCGOP_TYPE(new_op) = type;
3531     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3532     return new_op;
3533 }
3534 
3535 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3536 {
3537     TCGLabelUse *u;
3538 
3539     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3540         TCGOp *op = u->op;
3541         switch (op->opc) {
3542         case INDEX_op_br:
3543             op->args[0] = label_arg(to);
3544             break;
3545         case INDEX_op_brcond:
3546             op->args[3] = label_arg(to);
3547             break;
3548         case INDEX_op_brcond2_i32:
3549             op->args[5] = label_arg(to);
3550             break;
3551         default:
3552             g_assert_not_reached();
3553         }
3554     }
3555 
3556     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3557 }
3558 
3559 /* Reachable analysis : remove unreachable code.  */
3560 static void __attribute__((noinline))
3561 reachable_code_pass(TCGContext *s)
3562 {
3563     TCGOp *op, *op_next, *op_prev;
3564     bool dead = false;
3565 
3566     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3567         bool remove = dead;
3568         TCGLabel *label;
3569 
3570         switch (op->opc) {
3571         case INDEX_op_set_label:
3572             label = arg_label(op->args[0]);
3573 
3574             /*
3575              * Note that the first op in the TB is always a load,
3576              * so there is always something before a label.
3577              */
3578             op_prev = QTAILQ_PREV(op, link);
3579 
3580             /*
3581              * If we find two sequential labels, move all branches to
3582              * reference the second label and remove the first label.
3583              * Do this before branch to next optimization, so that the
3584              * middle label is out of the way.
3585              */
3586             if (op_prev->opc == INDEX_op_set_label) {
3587                 move_label_uses(label, arg_label(op_prev->args[0]));
3588                 tcg_op_remove(s, op_prev);
3589                 op_prev = QTAILQ_PREV(op, link);
3590             }
3591 
3592             /*
3593              * Optimization can fold conditional branches to unconditional.
3594              * If we find a label which is preceded by an unconditional
3595              * branch to next, remove the branch.  We couldn't do this when
3596              * processing the branch because any dead code between the branch
3597              * and label had not yet been removed.
3598              */
3599             if (op_prev->opc == INDEX_op_br &&
3600                 label == arg_label(op_prev->args[0])) {
3601                 tcg_op_remove(s, op_prev);
3602                 /* Fall through means insns become live again.  */
3603                 dead = false;
3604             }
3605 
3606             if (QSIMPLEQ_EMPTY(&label->branches)) {
3607                 /*
3608                  * While there is an occasional backward branch, virtually
3609                  * all branches generated by the translators are forward.
3610                  * Which means that generally we will have already removed
3611                  * all references to the label that will be, and there is
3612                  * little to be gained by iterating.
3613                  */
3614                 remove = true;
3615             } else {
3616                 /* Once we see a label, insns become live again.  */
3617                 dead = false;
3618                 remove = false;
3619             }
3620             break;
3621 
3622         case INDEX_op_br:
3623         case INDEX_op_exit_tb:
3624         case INDEX_op_goto_ptr:
3625             /* Unconditional branches; everything following is dead.  */
3626             dead = true;
3627             break;
3628 
3629         case INDEX_op_call:
3630             /* Notice noreturn helper calls, raising exceptions.  */
3631             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3632                 dead = true;
3633             }
3634             break;
3635 
3636         case INDEX_op_insn_start:
3637             /* Never remove -- we need to keep these for unwind.  */
3638             remove = false;
3639             break;
3640 
3641         default:
3642             break;
3643         }
3644 
3645         if (remove) {
3646             tcg_op_remove(s, op);
3647         }
3648     }
3649 }
3650 
3651 #define TS_DEAD  1
3652 #define TS_MEM   2
3653 
3654 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3655 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3656 
3657 /* For liveness_pass_1, the register preferences for a given temp.  */
3658 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3659 {
3660     return ts->state_ptr;
3661 }
3662 
3663 /* For liveness_pass_1, reset the preferences for a given temp to the
3664  * maximal regset for its type.
3665  */
3666 static inline void la_reset_pref(TCGTemp *ts)
3667 {
3668     *la_temp_pref(ts)
3669         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3670 }
3671 
3672 /* liveness analysis: end of function: all temps are dead, and globals
3673    should be in memory. */
3674 static void la_func_end(TCGContext *s, int ng, int nt)
3675 {
3676     int i;
3677 
3678     for (i = 0; i < ng; ++i) {
3679         s->temps[i].state = TS_DEAD | TS_MEM;
3680         la_reset_pref(&s->temps[i]);
3681     }
3682     for (i = ng; i < nt; ++i) {
3683         s->temps[i].state = TS_DEAD;
3684         la_reset_pref(&s->temps[i]);
3685     }
3686 }
3687 
3688 /* liveness analysis: end of basic block: all temps are dead, globals
3689    and local temps should be in memory. */
3690 static void la_bb_end(TCGContext *s, int ng, int nt)
3691 {
3692     int i;
3693 
3694     for (i = 0; i < nt; ++i) {
3695         TCGTemp *ts = &s->temps[i];
3696         int state;
3697 
3698         switch (ts->kind) {
3699         case TEMP_FIXED:
3700         case TEMP_GLOBAL:
3701         case TEMP_TB:
3702             state = TS_DEAD | TS_MEM;
3703             break;
3704         case TEMP_EBB:
3705         case TEMP_CONST:
3706             state = TS_DEAD;
3707             break;
3708         default:
3709             g_assert_not_reached();
3710         }
3711         ts->state = state;
3712         la_reset_pref(ts);
3713     }
3714 }
3715 
3716 /* liveness analysis: sync globals back to memory.  */
3717 static void la_global_sync(TCGContext *s, int ng)
3718 {
3719     int i;
3720 
3721     for (i = 0; i < ng; ++i) {
3722         int state = s->temps[i].state;
3723         s->temps[i].state = state | TS_MEM;
3724         if (state == TS_DEAD) {
3725             /* If the global was previously dead, reset prefs.  */
3726             la_reset_pref(&s->temps[i]);
3727         }
3728     }
3729 }
3730 
3731 /*
3732  * liveness analysis: conditional branch: all temps are dead unless
3733  * explicitly live-across-conditional-branch, globals and local temps
3734  * should be synced.
3735  */
3736 static void la_bb_sync(TCGContext *s, int ng, int nt)
3737 {
3738     la_global_sync(s, ng);
3739 
3740     for (int i = ng; i < nt; ++i) {
3741         TCGTemp *ts = &s->temps[i];
3742         int state;
3743 
3744         switch (ts->kind) {
3745         case TEMP_TB:
3746             state = ts->state;
3747             ts->state = state | TS_MEM;
3748             if (state != TS_DEAD) {
3749                 continue;
3750             }
3751             break;
3752         case TEMP_EBB:
3753         case TEMP_CONST:
3754             continue;
3755         default:
3756             g_assert_not_reached();
3757         }
3758         la_reset_pref(&s->temps[i]);
3759     }
3760 }
3761 
3762 /* liveness analysis: sync globals back to memory and kill.  */
3763 static void la_global_kill(TCGContext *s, int ng)
3764 {
3765     int i;
3766 
3767     for (i = 0; i < ng; i++) {
3768         s->temps[i].state = TS_DEAD | TS_MEM;
3769         la_reset_pref(&s->temps[i]);
3770     }
3771 }
3772 
3773 /* liveness analysis: note live globals crossing calls.  */
3774 static void la_cross_call(TCGContext *s, int nt)
3775 {
3776     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3777     int i;
3778 
3779     for (i = 0; i < nt; i++) {
3780         TCGTemp *ts = &s->temps[i];
3781         if (!(ts->state & TS_DEAD)) {
3782             TCGRegSet *pset = la_temp_pref(ts);
3783             TCGRegSet set = *pset;
3784 
3785             set &= mask;
3786             /* If the combination is not possible, restart.  */
3787             if (set == 0) {
3788                 set = tcg_target_available_regs[ts->type] & mask;
3789             }
3790             *pset = set;
3791         }
3792     }
3793 }
3794 
3795 /*
3796  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3797  * to TEMP_EBB, if possible.
3798  */
3799 static void __attribute__((noinline))
3800 liveness_pass_0(TCGContext *s)
3801 {
3802     void * const multiple_ebb = (void *)(uintptr_t)-1;
3803     int nb_temps = s->nb_temps;
3804     TCGOp *op, *ebb;
3805 
3806     for (int i = s->nb_globals; i < nb_temps; ++i) {
3807         s->temps[i].state_ptr = NULL;
3808     }
3809 
3810     /*
3811      * Represent each EBB by the op at which it begins.  In the case of
3812      * the first EBB, this is the first op, otherwise it is a label.
3813      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3814      * within a single EBB, else MULTIPLE_EBB.
3815      */
3816     ebb = QTAILQ_FIRST(&s->ops);
3817     QTAILQ_FOREACH(op, &s->ops, link) {
3818         const TCGOpDef *def;
3819         int nb_oargs, nb_iargs;
3820 
3821         switch (op->opc) {
3822         case INDEX_op_set_label:
3823             ebb = op;
3824             continue;
3825         case INDEX_op_discard:
3826             continue;
3827         case INDEX_op_call:
3828             nb_oargs = TCGOP_CALLO(op);
3829             nb_iargs = TCGOP_CALLI(op);
3830             break;
3831         default:
3832             def = &tcg_op_defs[op->opc];
3833             nb_oargs = def->nb_oargs;
3834             nb_iargs = def->nb_iargs;
3835             break;
3836         }
3837 
3838         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3839             TCGTemp *ts = arg_temp(op->args[i]);
3840 
3841             if (ts->kind != TEMP_TB) {
3842                 continue;
3843             }
3844             if (ts->state_ptr == NULL) {
3845                 ts->state_ptr = ebb;
3846             } else if (ts->state_ptr != ebb) {
3847                 ts->state_ptr = multiple_ebb;
3848             }
3849         }
3850     }
3851 
3852     /*
3853      * For TEMP_TB that turned out not to be used beyond one EBB,
3854      * reduce the liveness to TEMP_EBB.
3855      */
3856     for (int i = s->nb_globals; i < nb_temps; ++i) {
3857         TCGTemp *ts = &s->temps[i];
3858         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3859             ts->kind = TEMP_EBB;
3860         }
3861     }
3862 }
3863 
3864 /* Liveness analysis : update the opc_arg_life array to tell if a
3865    given input arguments is dead. Instructions updating dead
3866    temporaries are removed. */
3867 static void __attribute__((noinline))
3868 liveness_pass_1(TCGContext *s)
3869 {
3870     int nb_globals = s->nb_globals;
3871     int nb_temps = s->nb_temps;
3872     TCGOp *op, *op_prev;
3873     TCGRegSet *prefs;
3874     int i;
3875 
3876     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3877     for (i = 0; i < nb_temps; ++i) {
3878         s->temps[i].state_ptr = prefs + i;
3879     }
3880 
3881     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3882     la_func_end(s, nb_globals, nb_temps);
3883 
3884     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3885         int nb_iargs, nb_oargs;
3886         TCGOpcode opc_new, opc_new2;
3887         TCGLifeData arg_life = 0;
3888         TCGTemp *ts;
3889         TCGOpcode opc = op->opc;
3890         const TCGOpDef *def = &tcg_op_defs[opc];
3891         const TCGArgConstraint *args_ct;
3892 
3893         switch (opc) {
3894         case INDEX_op_call:
3895             {
3896                 const TCGHelperInfo *info = tcg_call_info(op);
3897                 int call_flags = tcg_call_flags(op);
3898 
3899                 nb_oargs = TCGOP_CALLO(op);
3900                 nb_iargs = TCGOP_CALLI(op);
3901 
3902                 /* pure functions can be removed if their result is unused */
3903                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3904                     for (i = 0; i < nb_oargs; i++) {
3905                         ts = arg_temp(op->args[i]);
3906                         if (ts->state != TS_DEAD) {
3907                             goto do_not_remove_call;
3908                         }
3909                     }
3910                     goto do_remove;
3911                 }
3912             do_not_remove_call:
3913 
3914                 /* Output args are dead.  */
3915                 for (i = 0; i < nb_oargs; i++) {
3916                     ts = arg_temp(op->args[i]);
3917                     if (ts->state & TS_DEAD) {
3918                         arg_life |= DEAD_ARG << i;
3919                     }
3920                     if (ts->state & TS_MEM) {
3921                         arg_life |= SYNC_ARG << i;
3922                     }
3923                     ts->state = TS_DEAD;
3924                     la_reset_pref(ts);
3925                 }
3926 
3927                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3928                 memset(op->output_pref, 0, sizeof(op->output_pref));
3929 
3930                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3931                                     TCG_CALL_NO_READ_GLOBALS))) {
3932                     la_global_kill(s, nb_globals);
3933                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3934                     la_global_sync(s, nb_globals);
3935                 }
3936 
3937                 /* Record arguments that die in this helper.  */
3938                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3939                     ts = arg_temp(op->args[i]);
3940                     if (ts->state & TS_DEAD) {
3941                         arg_life |= DEAD_ARG << i;
3942                     }
3943                 }
3944 
3945                 /* For all live registers, remove call-clobbered prefs.  */
3946                 la_cross_call(s, nb_temps);
3947 
3948                 /*
3949                  * Input arguments are live for preceding opcodes.
3950                  *
3951                  * For those arguments that die, and will be allocated in
3952                  * registers, clear the register set for that arg, to be
3953                  * filled in below.  For args that will be on the stack,
3954                  * reset to any available reg.  Process arguments in reverse
3955                  * order so that if a temp is used more than once, the stack
3956                  * reset to max happens before the register reset to 0.
3957                  */
3958                 for (i = nb_iargs - 1; i >= 0; i--) {
3959                     const TCGCallArgumentLoc *loc = &info->in[i];
3960                     ts = arg_temp(op->args[nb_oargs + i]);
3961 
3962                     if (ts->state & TS_DEAD) {
3963                         switch (loc->kind) {
3964                         case TCG_CALL_ARG_NORMAL:
3965                         case TCG_CALL_ARG_EXTEND_U:
3966                         case TCG_CALL_ARG_EXTEND_S:
3967                             if (arg_slot_reg_p(loc->arg_slot)) {
3968                                 *la_temp_pref(ts) = 0;
3969                                 break;
3970                             }
3971                             /* fall through */
3972                         default:
3973                             *la_temp_pref(ts) =
3974                                 tcg_target_available_regs[ts->type];
3975                             break;
3976                         }
3977                         ts->state &= ~TS_DEAD;
3978                     }
3979                 }
3980 
3981                 /*
3982                  * For each input argument, add its input register to prefs.
3983                  * If a temp is used once, this produces a single set bit;
3984                  * if a temp is used multiple times, this produces a set.
3985                  */
3986                 for (i = 0; i < nb_iargs; i++) {
3987                     const TCGCallArgumentLoc *loc = &info->in[i];
3988                     ts = arg_temp(op->args[nb_oargs + i]);
3989 
3990                     switch (loc->kind) {
3991                     case TCG_CALL_ARG_NORMAL:
3992                     case TCG_CALL_ARG_EXTEND_U:
3993                     case TCG_CALL_ARG_EXTEND_S:
3994                         if (arg_slot_reg_p(loc->arg_slot)) {
3995                             tcg_regset_set_reg(*la_temp_pref(ts),
3996                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3997                         }
3998                         break;
3999                     default:
4000                         break;
4001                     }
4002                 }
4003             }
4004             break;
4005         case INDEX_op_insn_start:
4006             break;
4007         case INDEX_op_discard:
4008             /* mark the temporary as dead */
4009             ts = arg_temp(op->args[0]);
4010             ts->state = TS_DEAD;
4011             la_reset_pref(ts);
4012             break;
4013 
4014         case INDEX_op_add2_i32:
4015         case INDEX_op_add2_i64:
4016             opc_new = INDEX_op_add;
4017             goto do_addsub2;
4018         case INDEX_op_sub2_i32:
4019         case INDEX_op_sub2_i64:
4020             opc_new = INDEX_op_sub;
4021         do_addsub2:
4022             nb_iargs = 4;
4023             nb_oargs = 2;
4024             /* Test if the high part of the operation is dead, but not
4025                the low part.  The result can be optimized to a simple
4026                add or sub.  This happens often for x86_64 guest when the
4027                cpu mode is set to 32 bit.  */
4028             if (arg_temp(op->args[1])->state == TS_DEAD) {
4029                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4030                     goto do_remove;
4031                 }
4032                 /* Replace the opcode and adjust the args in place,
4033                    leaving 3 unused args at the end.  */
4034                 op->opc = opc = opc_new;
4035                 op->args[1] = op->args[2];
4036                 op->args[2] = op->args[4];
4037                 /* Fall through and mark the single-word operation live.  */
4038                 nb_iargs = 2;
4039                 nb_oargs = 1;
4040             }
4041             goto do_not_remove;
4042 
4043         case INDEX_op_muls2:
4044             opc_new = INDEX_op_mul;
4045             opc_new2 = INDEX_op_mulsh;
4046             goto do_mul2;
4047         case INDEX_op_mulu2:
4048             opc_new = INDEX_op_mul;
4049             opc_new2 = INDEX_op_muluh;
4050         do_mul2:
4051             nb_iargs = 2;
4052             nb_oargs = 2;
4053             if (arg_temp(op->args[1])->state == TS_DEAD) {
4054                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4055                     /* Both parts of the operation are dead.  */
4056                     goto do_remove;
4057                 }
4058                 /* The high part of the operation is dead; generate the low. */
4059                 op->opc = opc = opc_new;
4060                 op->args[1] = op->args[2];
4061                 op->args[2] = op->args[3];
4062             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4063                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4064                 /* The low part of the operation is dead; generate the high. */
4065                 op->opc = opc = opc_new2;
4066                 op->args[0] = op->args[1];
4067                 op->args[1] = op->args[2];
4068                 op->args[2] = op->args[3];
4069             } else {
4070                 goto do_not_remove;
4071             }
4072             /* Mark the single-word operation live.  */
4073             nb_oargs = 1;
4074             goto do_not_remove;
4075 
4076         default:
4077             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4078             nb_iargs = def->nb_iargs;
4079             nb_oargs = def->nb_oargs;
4080 
4081             /* Test if the operation can be removed because all
4082                its outputs are dead. We assume that nb_oargs == 0
4083                implies side effects */
4084             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4085                 for (i = 0; i < nb_oargs; i++) {
4086                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4087                         goto do_not_remove;
4088                     }
4089                 }
4090                 goto do_remove;
4091             }
4092             goto do_not_remove;
4093 
4094         do_remove:
4095             tcg_op_remove(s, op);
4096             break;
4097 
4098         do_not_remove:
4099             for (i = 0; i < nb_oargs; i++) {
4100                 ts = arg_temp(op->args[i]);
4101 
4102                 /* Remember the preference of the uses that followed.  */
4103                 if (i < ARRAY_SIZE(op->output_pref)) {
4104                     op->output_pref[i] = *la_temp_pref(ts);
4105                 }
4106 
4107                 /* Output args are dead.  */
4108                 if (ts->state & TS_DEAD) {
4109                     arg_life |= DEAD_ARG << i;
4110                 }
4111                 if (ts->state & TS_MEM) {
4112                     arg_life |= SYNC_ARG << i;
4113                 }
4114                 ts->state = TS_DEAD;
4115                 la_reset_pref(ts);
4116             }
4117 
4118             /* If end of basic block, update.  */
4119             if (def->flags & TCG_OPF_BB_EXIT) {
4120                 la_func_end(s, nb_globals, nb_temps);
4121             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4122                 la_bb_sync(s, nb_globals, nb_temps);
4123             } else if (def->flags & TCG_OPF_BB_END) {
4124                 la_bb_end(s, nb_globals, nb_temps);
4125             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4126                 la_global_sync(s, nb_globals);
4127                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4128                     la_cross_call(s, nb_temps);
4129                 }
4130             }
4131 
4132             /* Record arguments that die in this opcode.  */
4133             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4134                 ts = arg_temp(op->args[i]);
4135                 if (ts->state & TS_DEAD) {
4136                     arg_life |= DEAD_ARG << i;
4137                 }
4138             }
4139 
4140             /* Input arguments are live for preceding opcodes.  */
4141             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4142                 ts = arg_temp(op->args[i]);
4143                 if (ts->state & TS_DEAD) {
4144                     /* For operands that were dead, initially allow
4145                        all regs for the type.  */
4146                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4147                     ts->state &= ~TS_DEAD;
4148                 }
4149             }
4150 
4151             /* Incorporate constraints for this operand.  */
4152             switch (opc) {
4153             case INDEX_op_mov:
4154                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4155                    have proper constraints.  That said, special case
4156                    moves to propagate preferences backward.  */
4157                 if (IS_DEAD_ARG(1)) {
4158                     *la_temp_pref(arg_temp(op->args[0]))
4159                         = *la_temp_pref(arg_temp(op->args[1]));
4160                 }
4161                 break;
4162 
4163             default:
4164                 args_ct = opcode_args_ct(op);
4165                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4166                     const TCGArgConstraint *ct = &args_ct[i];
4167                     TCGRegSet set, *pset;
4168 
4169                     ts = arg_temp(op->args[i]);
4170                     pset = la_temp_pref(ts);
4171                     set = *pset;
4172 
4173                     set &= ct->regs;
4174                     if (ct->ialias) {
4175                         set &= output_pref(op, ct->alias_index);
4176                     }
4177                     /* If the combination is not possible, restart.  */
4178                     if (set == 0) {
4179                         set = ct->regs;
4180                     }
4181                     *pset = set;
4182                 }
4183                 break;
4184             }
4185             break;
4186         }
4187         op->life = arg_life;
4188     }
4189 }
4190 
4191 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4192 static bool __attribute__((noinline))
4193 liveness_pass_2(TCGContext *s)
4194 {
4195     int nb_globals = s->nb_globals;
4196     int nb_temps, i;
4197     bool changes = false;
4198     TCGOp *op, *op_next;
4199 
4200     /* Create a temporary for each indirect global.  */
4201     for (i = 0; i < nb_globals; ++i) {
4202         TCGTemp *its = &s->temps[i];
4203         if (its->indirect_reg) {
4204             TCGTemp *dts = tcg_temp_alloc(s);
4205             dts->type = its->type;
4206             dts->base_type = its->base_type;
4207             dts->temp_subindex = its->temp_subindex;
4208             dts->kind = TEMP_EBB;
4209             its->state_ptr = dts;
4210         } else {
4211             its->state_ptr = NULL;
4212         }
4213         /* All globals begin dead.  */
4214         its->state = TS_DEAD;
4215     }
4216     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4217         TCGTemp *its = &s->temps[i];
4218         its->state_ptr = NULL;
4219         its->state = TS_DEAD;
4220     }
4221 
4222     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4223         TCGOpcode opc = op->opc;
4224         const TCGOpDef *def = &tcg_op_defs[opc];
4225         TCGLifeData arg_life = op->life;
4226         int nb_iargs, nb_oargs, call_flags;
4227         TCGTemp *arg_ts, *dir_ts;
4228 
4229         if (opc == INDEX_op_call) {
4230             nb_oargs = TCGOP_CALLO(op);
4231             nb_iargs = TCGOP_CALLI(op);
4232             call_flags = tcg_call_flags(op);
4233         } else {
4234             nb_iargs = def->nb_iargs;
4235             nb_oargs = def->nb_oargs;
4236 
4237             /* Set flags similar to how calls require.  */
4238             if (def->flags & TCG_OPF_COND_BRANCH) {
4239                 /* Like reading globals: sync_globals */
4240                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4241             } else if (def->flags & TCG_OPF_BB_END) {
4242                 /* Like writing globals: save_globals */
4243                 call_flags = 0;
4244             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4245                 /* Like reading globals: sync_globals */
4246                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4247             } else {
4248                 /* No effect on globals.  */
4249                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4250                               TCG_CALL_NO_WRITE_GLOBALS);
4251             }
4252         }
4253 
4254         /* Make sure that input arguments are available.  */
4255         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4256             arg_ts = arg_temp(op->args[i]);
4257             dir_ts = arg_ts->state_ptr;
4258             if (dir_ts && arg_ts->state == TS_DEAD) {
4259                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4260                                   ? INDEX_op_ld_i32
4261                                   : INDEX_op_ld_i64);
4262                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4263                                                   arg_ts->type, 3);
4264 
4265                 lop->args[0] = temp_arg(dir_ts);
4266                 lop->args[1] = temp_arg(arg_ts->mem_base);
4267                 lop->args[2] = arg_ts->mem_offset;
4268 
4269                 /* Loaded, but synced with memory.  */
4270                 arg_ts->state = TS_MEM;
4271             }
4272         }
4273 
4274         /* Perform input replacement, and mark inputs that became dead.
4275            No action is required except keeping temp_state up to date
4276            so that we reload when needed.  */
4277         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4278             arg_ts = arg_temp(op->args[i]);
4279             dir_ts = arg_ts->state_ptr;
4280             if (dir_ts) {
4281                 op->args[i] = temp_arg(dir_ts);
4282                 changes = true;
4283                 if (IS_DEAD_ARG(i)) {
4284                     arg_ts->state = TS_DEAD;
4285                 }
4286             }
4287         }
4288 
4289         /* Liveness analysis should ensure that the following are
4290            all correct, for call sites and basic block end points.  */
4291         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4292             /* Nothing to do */
4293         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4294             for (i = 0; i < nb_globals; ++i) {
4295                 /* Liveness should see that globals are synced back,
4296                    that is, either TS_DEAD or TS_MEM.  */
4297                 arg_ts = &s->temps[i];
4298                 tcg_debug_assert(arg_ts->state_ptr == 0
4299                                  || arg_ts->state != 0);
4300             }
4301         } else {
4302             for (i = 0; i < nb_globals; ++i) {
4303                 /* Liveness should see that globals are saved back,
4304                    that is, TS_DEAD, waiting to be reloaded.  */
4305                 arg_ts = &s->temps[i];
4306                 tcg_debug_assert(arg_ts->state_ptr == 0
4307                                  || arg_ts->state == TS_DEAD);
4308             }
4309         }
4310 
4311         /* Outputs become available.  */
4312         if (opc == INDEX_op_mov) {
4313             arg_ts = arg_temp(op->args[0]);
4314             dir_ts = arg_ts->state_ptr;
4315             if (dir_ts) {
4316                 op->args[0] = temp_arg(dir_ts);
4317                 changes = true;
4318 
4319                 /* The output is now live and modified.  */
4320                 arg_ts->state = 0;
4321 
4322                 if (NEED_SYNC_ARG(0)) {
4323                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4324                                       ? INDEX_op_st_i32
4325                                       : INDEX_op_st_i64);
4326                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4327                                                      arg_ts->type, 3);
4328                     TCGTemp *out_ts = dir_ts;
4329 
4330                     if (IS_DEAD_ARG(0)) {
4331                         out_ts = arg_temp(op->args[1]);
4332                         arg_ts->state = TS_DEAD;
4333                         tcg_op_remove(s, op);
4334                     } else {
4335                         arg_ts->state = TS_MEM;
4336                     }
4337 
4338                     sop->args[0] = temp_arg(out_ts);
4339                     sop->args[1] = temp_arg(arg_ts->mem_base);
4340                     sop->args[2] = arg_ts->mem_offset;
4341                 } else {
4342                     tcg_debug_assert(!IS_DEAD_ARG(0));
4343                 }
4344             }
4345         } else {
4346             for (i = 0; i < nb_oargs; i++) {
4347                 arg_ts = arg_temp(op->args[i]);
4348                 dir_ts = arg_ts->state_ptr;
4349                 if (!dir_ts) {
4350                     continue;
4351                 }
4352                 op->args[i] = temp_arg(dir_ts);
4353                 changes = true;
4354 
4355                 /* The output is now live and modified.  */
4356                 arg_ts->state = 0;
4357 
4358                 /* Sync outputs upon their last write.  */
4359                 if (NEED_SYNC_ARG(i)) {
4360                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4361                                       ? INDEX_op_st_i32
4362                                       : INDEX_op_st_i64);
4363                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4364                                                      arg_ts->type, 3);
4365 
4366                     sop->args[0] = temp_arg(dir_ts);
4367                     sop->args[1] = temp_arg(arg_ts->mem_base);
4368                     sop->args[2] = arg_ts->mem_offset;
4369 
4370                     arg_ts->state = TS_MEM;
4371                 }
4372                 /* Drop outputs that are dead.  */
4373                 if (IS_DEAD_ARG(i)) {
4374                     arg_ts->state = TS_DEAD;
4375                 }
4376             }
4377         }
4378     }
4379 
4380     return changes;
4381 }
4382 
4383 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4384 {
4385     intptr_t off;
4386     int size, align;
4387 
4388     /* When allocating an object, look at the full type. */
4389     size = tcg_type_size(ts->base_type);
4390     switch (ts->base_type) {
4391     case TCG_TYPE_I32:
4392         align = 4;
4393         break;
4394     case TCG_TYPE_I64:
4395     case TCG_TYPE_V64:
4396         align = 8;
4397         break;
4398     case TCG_TYPE_I128:
4399     case TCG_TYPE_V128:
4400     case TCG_TYPE_V256:
4401         /*
4402          * Note that we do not require aligned storage for V256,
4403          * and that we provide alignment for I128 to match V128,
4404          * even if that's above what the host ABI requires.
4405          */
4406         align = 16;
4407         break;
4408     default:
4409         g_assert_not_reached();
4410     }
4411 
4412     /*
4413      * Assume the stack is sufficiently aligned.
4414      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4415      * and do not require 16 byte vector alignment.  This seems slightly
4416      * easier than fully parameterizing the above switch statement.
4417      */
4418     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4419     off = ROUND_UP(s->current_frame_offset, align);
4420 
4421     /* If we've exhausted the stack frame, restart with a smaller TB. */
4422     if (off + size > s->frame_end) {
4423         tcg_raise_tb_overflow(s);
4424     }
4425     s->current_frame_offset = off + size;
4426 #if defined(__sparc__)
4427     off += TCG_TARGET_STACK_BIAS;
4428 #endif
4429 
4430     /* If the object was subdivided, assign memory to all the parts. */
4431     if (ts->base_type != ts->type) {
4432         int part_size = tcg_type_size(ts->type);
4433         int part_count = size / part_size;
4434 
4435         /*
4436          * Each part is allocated sequentially in tcg_temp_new_internal.
4437          * Jump back to the first part by subtracting the current index.
4438          */
4439         ts -= ts->temp_subindex;
4440         for (int i = 0; i < part_count; ++i) {
4441             ts[i].mem_offset = off + i * part_size;
4442             ts[i].mem_base = s->frame_temp;
4443             ts[i].mem_allocated = 1;
4444         }
4445     } else {
4446         ts->mem_offset = off;
4447         ts->mem_base = s->frame_temp;
4448         ts->mem_allocated = 1;
4449     }
4450 }
4451 
4452 /* Assign @reg to @ts, and update reg_to_temp[]. */
4453 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4454 {
4455     if (ts->val_type == TEMP_VAL_REG) {
4456         TCGReg old = ts->reg;
4457         tcg_debug_assert(s->reg_to_temp[old] == ts);
4458         if (old == reg) {
4459             return;
4460         }
4461         s->reg_to_temp[old] = NULL;
4462     }
4463     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4464     s->reg_to_temp[reg] = ts;
4465     ts->val_type = TEMP_VAL_REG;
4466     ts->reg = reg;
4467 }
4468 
4469 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4470 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4471 {
4472     tcg_debug_assert(type != TEMP_VAL_REG);
4473     if (ts->val_type == TEMP_VAL_REG) {
4474         TCGReg reg = ts->reg;
4475         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4476         s->reg_to_temp[reg] = NULL;
4477     }
4478     ts->val_type = type;
4479 }
4480 
4481 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4482 
4483 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4484    mark it free; otherwise mark it dead.  */
4485 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4486 {
4487     TCGTempVal new_type;
4488 
4489     switch (ts->kind) {
4490     case TEMP_FIXED:
4491         return;
4492     case TEMP_GLOBAL:
4493     case TEMP_TB:
4494         new_type = TEMP_VAL_MEM;
4495         break;
4496     case TEMP_EBB:
4497         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4498         break;
4499     case TEMP_CONST:
4500         new_type = TEMP_VAL_CONST;
4501         break;
4502     default:
4503         g_assert_not_reached();
4504     }
4505     set_temp_val_nonreg(s, ts, new_type);
4506 }
4507 
4508 /* Mark a temporary as dead.  */
4509 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4510 {
4511     temp_free_or_dead(s, ts, 1);
4512 }
4513 
4514 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4515    registers needs to be allocated to store a constant.  If 'free_or_dead'
4516    is non-zero, subsequently release the temporary; if it is positive, the
4517    temp is dead; if it is negative, the temp is free.  */
4518 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4519                       TCGRegSet preferred_regs, int free_or_dead)
4520 {
4521     if (!temp_readonly(ts) && !ts->mem_coherent) {
4522         if (!ts->mem_allocated) {
4523             temp_allocate_frame(s, ts);
4524         }
4525         switch (ts->val_type) {
4526         case TEMP_VAL_CONST:
4527             /* If we're going to free the temp immediately, then we won't
4528                require it later in a register, so attempt to store the
4529                constant to memory directly.  */
4530             if (free_or_dead
4531                 && tcg_out_sti(s, ts->type, ts->val,
4532                                ts->mem_base->reg, ts->mem_offset)) {
4533                 break;
4534             }
4535             temp_load(s, ts, tcg_target_available_regs[ts->type],
4536                       allocated_regs, preferred_regs);
4537             /* fallthrough */
4538 
4539         case TEMP_VAL_REG:
4540             tcg_out_st(s, ts->type, ts->reg,
4541                        ts->mem_base->reg, ts->mem_offset);
4542             break;
4543 
4544         case TEMP_VAL_MEM:
4545             break;
4546 
4547         case TEMP_VAL_DEAD:
4548         default:
4549             g_assert_not_reached();
4550         }
4551         ts->mem_coherent = 1;
4552     }
4553     if (free_or_dead) {
4554         temp_free_or_dead(s, ts, free_or_dead);
4555     }
4556 }
4557 
4558 /* free register 'reg' by spilling the corresponding temporary if necessary */
4559 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4560 {
4561     TCGTemp *ts = s->reg_to_temp[reg];
4562     if (ts != NULL) {
4563         temp_sync(s, ts, allocated_regs, 0, -1);
4564     }
4565 }
4566 
4567 /**
4568  * tcg_reg_alloc:
4569  * @required_regs: Set of registers in which we must allocate.
4570  * @allocated_regs: Set of registers which must be avoided.
4571  * @preferred_regs: Set of registers we should prefer.
4572  * @rev: True if we search the registers in "indirect" order.
4573  *
4574  * The allocated register must be in @required_regs & ~@allocated_regs,
4575  * but if we can put it in @preferred_regs we may save a move later.
4576  */
4577 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4578                             TCGRegSet allocated_regs,
4579                             TCGRegSet preferred_regs, bool rev)
4580 {
4581     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4582     TCGRegSet reg_ct[2];
4583     const int *order;
4584 
4585     reg_ct[1] = required_regs & ~allocated_regs;
4586     tcg_debug_assert(reg_ct[1] != 0);
4587     reg_ct[0] = reg_ct[1] & preferred_regs;
4588 
4589     /* Skip the preferred_regs option if it cannot be satisfied,
4590        or if the preference made no difference.  */
4591     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4592 
4593     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4594 
4595     /* Try free registers, preferences first.  */
4596     for (j = f; j < 2; j++) {
4597         TCGRegSet set = reg_ct[j];
4598 
4599         if (tcg_regset_single(set)) {
4600             /* One register in the set.  */
4601             TCGReg reg = tcg_regset_first(set);
4602             if (s->reg_to_temp[reg] == NULL) {
4603                 return reg;
4604             }
4605         } else {
4606             for (i = 0; i < n; i++) {
4607                 TCGReg reg = order[i];
4608                 if (s->reg_to_temp[reg] == NULL &&
4609                     tcg_regset_test_reg(set, reg)) {
4610                     return reg;
4611                 }
4612             }
4613         }
4614     }
4615 
4616     /* We must spill something.  */
4617     for (j = f; j < 2; j++) {
4618         TCGRegSet set = reg_ct[j];
4619 
4620         if (tcg_regset_single(set)) {
4621             /* One register in the set.  */
4622             TCGReg reg = tcg_regset_first(set);
4623             tcg_reg_free(s, reg, allocated_regs);
4624             return reg;
4625         } else {
4626             for (i = 0; i < n; i++) {
4627                 TCGReg reg = order[i];
4628                 if (tcg_regset_test_reg(set, reg)) {
4629                     tcg_reg_free(s, reg, allocated_regs);
4630                     return reg;
4631                 }
4632             }
4633         }
4634     }
4635 
4636     g_assert_not_reached();
4637 }
4638 
4639 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4640                                  TCGRegSet allocated_regs,
4641                                  TCGRegSet preferred_regs, bool rev)
4642 {
4643     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4644     TCGRegSet reg_ct[2];
4645     const int *order;
4646 
4647     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4648     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4649     tcg_debug_assert(reg_ct[1] != 0);
4650     reg_ct[0] = reg_ct[1] & preferred_regs;
4651 
4652     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4653 
4654     /*
4655      * Skip the preferred_regs option if it cannot be satisfied,
4656      * or if the preference made no difference.
4657      */
4658     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4659 
4660     /*
4661      * Minimize the number of flushes by looking for 2 free registers first,
4662      * then a single flush, then two flushes.
4663      */
4664     for (fmin = 2; fmin >= 0; fmin--) {
4665         for (j = k; j < 2; j++) {
4666             TCGRegSet set = reg_ct[j];
4667 
4668             for (i = 0; i < n; i++) {
4669                 TCGReg reg = order[i];
4670 
4671                 if (tcg_regset_test_reg(set, reg)) {
4672                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4673                     if (f >= fmin) {
4674                         tcg_reg_free(s, reg, allocated_regs);
4675                         tcg_reg_free(s, reg + 1, allocated_regs);
4676                         return reg;
4677                     }
4678                 }
4679             }
4680         }
4681     }
4682     g_assert_not_reached();
4683 }
4684 
4685 /* Make sure the temporary is in a register.  If needed, allocate the register
4686    from DESIRED while avoiding ALLOCATED.  */
4687 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4688                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4689 {
4690     TCGReg reg;
4691 
4692     switch (ts->val_type) {
4693     case TEMP_VAL_REG:
4694         return;
4695     case TEMP_VAL_CONST:
4696         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4697                             preferred_regs, ts->indirect_base);
4698         if (ts->type <= TCG_TYPE_I64) {
4699             tcg_out_movi(s, ts->type, reg, ts->val);
4700         } else {
4701             uint64_t val = ts->val;
4702             MemOp vece = MO_64;
4703 
4704             /*
4705              * Find the minimal vector element that matches the constant.
4706              * The targets will, in general, have to do this search anyway,
4707              * do this generically.
4708              */
4709             if (val == dup_const(MO_8, val)) {
4710                 vece = MO_8;
4711             } else if (val == dup_const(MO_16, val)) {
4712                 vece = MO_16;
4713             } else if (val == dup_const(MO_32, val)) {
4714                 vece = MO_32;
4715             }
4716 
4717             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4718         }
4719         ts->mem_coherent = 0;
4720         break;
4721     case TEMP_VAL_MEM:
4722         if (!ts->mem_allocated) {
4723             temp_allocate_frame(s, ts);
4724         }
4725         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4726                             preferred_regs, ts->indirect_base);
4727         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4728         ts->mem_coherent = 1;
4729         break;
4730     case TEMP_VAL_DEAD:
4731     default:
4732         g_assert_not_reached();
4733     }
4734     set_temp_val_reg(s, ts, reg);
4735 }
4736 
4737 /* Save a temporary to memory. 'allocated_regs' is used in case a
4738    temporary registers needs to be allocated to store a constant.  */
4739 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4740 {
4741     /* The liveness analysis already ensures that globals are back
4742        in memory. Keep an tcg_debug_assert for safety. */
4743     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4744 }
4745 
4746 /* save globals to their canonical location and assume they can be
4747    modified be the following code. 'allocated_regs' is used in case a
4748    temporary registers needs to be allocated to store a constant. */
4749 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4750 {
4751     int i, n;
4752 
4753     for (i = 0, n = s->nb_globals; i < n; i++) {
4754         temp_save(s, &s->temps[i], allocated_regs);
4755     }
4756 }
4757 
4758 /* sync globals to their canonical location and assume they can be
4759    read by the following code. 'allocated_regs' is used in case a
4760    temporary registers needs to be allocated to store a constant. */
4761 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4762 {
4763     int i, n;
4764 
4765     for (i = 0, n = s->nb_globals; i < n; i++) {
4766         TCGTemp *ts = &s->temps[i];
4767         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4768                          || ts->kind == TEMP_FIXED
4769                          || ts->mem_coherent);
4770     }
4771 }
4772 
4773 /* at the end of a basic block, we assume all temporaries are dead and
4774    all globals are stored at their canonical location. */
4775 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4776 {
4777     int i;
4778 
4779     for (i = s->nb_globals; i < s->nb_temps; i++) {
4780         TCGTemp *ts = &s->temps[i];
4781 
4782         switch (ts->kind) {
4783         case TEMP_TB:
4784             temp_save(s, ts, allocated_regs);
4785             break;
4786         case TEMP_EBB:
4787             /* The liveness analysis already ensures that temps are dead.
4788                Keep an tcg_debug_assert for safety. */
4789             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4790             break;
4791         case TEMP_CONST:
4792             /* Similarly, we should have freed any allocated register. */
4793             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4794             break;
4795         default:
4796             g_assert_not_reached();
4797         }
4798     }
4799 
4800     save_globals(s, allocated_regs);
4801 }
4802 
4803 /*
4804  * At a conditional branch, we assume all temporaries are dead unless
4805  * explicitly live-across-conditional-branch; all globals and local
4806  * temps are synced to their location.
4807  */
4808 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4809 {
4810     sync_globals(s, allocated_regs);
4811 
4812     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4813         TCGTemp *ts = &s->temps[i];
4814         /*
4815          * The liveness analysis already ensures that temps are dead.
4816          * Keep tcg_debug_asserts for safety.
4817          */
4818         switch (ts->kind) {
4819         case TEMP_TB:
4820             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4821             break;
4822         case TEMP_EBB:
4823         case TEMP_CONST:
4824             break;
4825         default:
4826             g_assert_not_reached();
4827         }
4828     }
4829 }
4830 
4831 /*
4832  * Specialized code generation for INDEX_op_mov_* with a constant.
4833  */
4834 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4835                                   tcg_target_ulong val, TCGLifeData arg_life,
4836                                   TCGRegSet preferred_regs)
4837 {
4838     /* ENV should not be modified.  */
4839     tcg_debug_assert(!temp_readonly(ots));
4840 
4841     /* The movi is not explicitly generated here.  */
4842     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4843     ots->val = val;
4844     ots->mem_coherent = 0;
4845     if (NEED_SYNC_ARG(0)) {
4846         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4847     } else if (IS_DEAD_ARG(0)) {
4848         temp_dead(s, ots);
4849     }
4850 }
4851 
4852 /*
4853  * Specialized code generation for INDEX_op_mov_*.
4854  */
4855 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4856 {
4857     const TCGLifeData arg_life = op->life;
4858     TCGRegSet allocated_regs, preferred_regs;
4859     TCGTemp *ts, *ots;
4860     TCGType otype, itype;
4861     TCGReg oreg, ireg;
4862 
4863     allocated_regs = s->reserved_regs;
4864     preferred_regs = output_pref(op, 0);
4865     ots = arg_temp(op->args[0]);
4866     ts = arg_temp(op->args[1]);
4867 
4868     /* ENV should not be modified.  */
4869     tcg_debug_assert(!temp_readonly(ots));
4870 
4871     /* Note that otype != itype for no-op truncation.  */
4872     otype = ots->type;
4873     itype = ts->type;
4874 
4875     if (ts->val_type == TEMP_VAL_CONST) {
4876         /* propagate constant or generate sti */
4877         tcg_target_ulong val = ts->val;
4878         if (IS_DEAD_ARG(1)) {
4879             temp_dead(s, ts);
4880         }
4881         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4882         return;
4883     }
4884 
4885     /* If the source value is in memory we're going to be forced
4886        to have it in a register in order to perform the copy.  Copy
4887        the SOURCE value into its own register first, that way we
4888        don't have to reload SOURCE the next time it is used. */
4889     if (ts->val_type == TEMP_VAL_MEM) {
4890         temp_load(s, ts, tcg_target_available_regs[itype],
4891                   allocated_regs, preferred_regs);
4892     }
4893     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4894     ireg = ts->reg;
4895 
4896     if (IS_DEAD_ARG(0)) {
4897         /* mov to a non-saved dead register makes no sense (even with
4898            liveness analysis disabled). */
4899         tcg_debug_assert(NEED_SYNC_ARG(0));
4900         if (!ots->mem_allocated) {
4901             temp_allocate_frame(s, ots);
4902         }
4903         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4904         if (IS_DEAD_ARG(1)) {
4905             temp_dead(s, ts);
4906         }
4907         temp_dead(s, ots);
4908         return;
4909     }
4910 
4911     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4912         /*
4913          * The mov can be suppressed.  Kill input first, so that it
4914          * is unlinked from reg_to_temp, then set the output to the
4915          * reg that we saved from the input.
4916          */
4917         temp_dead(s, ts);
4918         oreg = ireg;
4919     } else {
4920         if (ots->val_type == TEMP_VAL_REG) {
4921             oreg = ots->reg;
4922         } else {
4923             /* Make sure to not spill the input register during allocation. */
4924             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4925                                  allocated_regs | ((TCGRegSet)1 << ireg),
4926                                  preferred_regs, ots->indirect_base);
4927         }
4928         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4929             /*
4930              * Cross register class move not supported.
4931              * Store the source register into the destination slot
4932              * and leave the destination temp as TEMP_VAL_MEM.
4933              */
4934             assert(!temp_readonly(ots));
4935             if (!ts->mem_allocated) {
4936                 temp_allocate_frame(s, ots);
4937             }
4938             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4939             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4940             ots->mem_coherent = 1;
4941             return;
4942         }
4943     }
4944     set_temp_val_reg(s, ots, oreg);
4945     ots->mem_coherent = 0;
4946 
4947     if (NEED_SYNC_ARG(0)) {
4948         temp_sync(s, ots, allocated_regs, 0, 0);
4949     }
4950 }
4951 
4952 /*
4953  * Specialized code generation for INDEX_op_dup_vec.
4954  */
4955 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4956 {
4957     const TCGLifeData arg_life = op->life;
4958     TCGRegSet dup_out_regs, dup_in_regs;
4959     const TCGArgConstraint *dup_args_ct;
4960     TCGTemp *its, *ots;
4961     TCGType itype, vtype;
4962     unsigned vece;
4963     int lowpart_ofs;
4964     bool ok;
4965 
4966     ots = arg_temp(op->args[0]);
4967     its = arg_temp(op->args[1]);
4968 
4969     /* ENV should not be modified.  */
4970     tcg_debug_assert(!temp_readonly(ots));
4971 
4972     itype = its->type;
4973     vece = TCGOP_VECE(op);
4974     vtype = TCGOP_TYPE(op);
4975 
4976     if (its->val_type == TEMP_VAL_CONST) {
4977         /* Propagate constant via movi -> dupi.  */
4978         tcg_target_ulong val = its->val;
4979         if (IS_DEAD_ARG(1)) {
4980             temp_dead(s, its);
4981         }
4982         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4983         return;
4984     }
4985 
4986     dup_args_ct = opcode_args_ct(op);
4987     dup_out_regs = dup_args_ct[0].regs;
4988     dup_in_regs = dup_args_ct[1].regs;
4989 
4990     /* Allocate the output register now.  */
4991     if (ots->val_type != TEMP_VAL_REG) {
4992         TCGRegSet allocated_regs = s->reserved_regs;
4993         TCGReg oreg;
4994 
4995         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4996             /* Make sure to not spill the input register. */
4997             tcg_regset_set_reg(allocated_regs, its->reg);
4998         }
4999         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5000                              output_pref(op, 0), ots->indirect_base);
5001         set_temp_val_reg(s, ots, oreg);
5002     }
5003 
5004     switch (its->val_type) {
5005     case TEMP_VAL_REG:
5006         /*
5007          * The dup constriaints must be broad, covering all possible VECE.
5008          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5009          * to fail, indicating that extra moves are required for that case.
5010          */
5011         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5012             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5013                 goto done;
5014             }
5015             /* Try again from memory or a vector input register.  */
5016         }
5017         if (!its->mem_coherent) {
5018             /*
5019              * The input register is not synced, and so an extra store
5020              * would be required to use memory.  Attempt an integer-vector
5021              * register move first.  We do not have a TCGRegSet for this.
5022              */
5023             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5024                 break;
5025             }
5026             /* Sync the temp back to its slot and load from there.  */
5027             temp_sync(s, its, s->reserved_regs, 0, 0);
5028         }
5029         /* fall through */
5030 
5031     case TEMP_VAL_MEM:
5032         lowpart_ofs = 0;
5033         if (HOST_BIG_ENDIAN) {
5034             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5035         }
5036         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5037                              its->mem_offset + lowpart_ofs)) {
5038             goto done;
5039         }
5040         /* Load the input into the destination vector register. */
5041         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5042         break;
5043 
5044     default:
5045         g_assert_not_reached();
5046     }
5047 
5048     /* We now have a vector input register, so dup must succeed. */
5049     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5050     tcg_debug_assert(ok);
5051 
5052  done:
5053     ots->mem_coherent = 0;
5054     if (IS_DEAD_ARG(1)) {
5055         temp_dead(s, its);
5056     }
5057     if (NEED_SYNC_ARG(0)) {
5058         temp_sync(s, ots, s->reserved_regs, 0, 0);
5059     }
5060     if (IS_DEAD_ARG(0)) {
5061         temp_dead(s, ots);
5062     }
5063 }
5064 
5065 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5066 {
5067     const TCGLifeData arg_life = op->life;
5068     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5069     TCGRegSet i_allocated_regs;
5070     TCGRegSet o_allocated_regs;
5071     int i, k, nb_iargs, nb_oargs;
5072     TCGReg reg;
5073     TCGArg arg;
5074     const TCGArgConstraint *args_ct;
5075     const TCGArgConstraint *arg_ct;
5076     TCGTemp *ts;
5077     TCGArg new_args[TCG_MAX_OP_ARGS];
5078     int const_args[TCG_MAX_OP_ARGS];
5079     TCGCond op_cond;
5080 
5081     nb_oargs = def->nb_oargs;
5082     nb_iargs = def->nb_iargs;
5083 
5084     /* copy constants */
5085     memcpy(new_args + nb_oargs + nb_iargs,
5086            op->args + nb_oargs + nb_iargs,
5087            sizeof(TCGArg) * def->nb_cargs);
5088 
5089     i_allocated_regs = s->reserved_regs;
5090     o_allocated_regs = s->reserved_regs;
5091 
5092     switch (op->opc) {
5093     case INDEX_op_brcond:
5094         op_cond = op->args[2];
5095         break;
5096     case INDEX_op_setcond:
5097     case INDEX_op_negsetcond:
5098     case INDEX_op_cmp_vec:
5099         op_cond = op->args[3];
5100         break;
5101     case INDEX_op_brcond2_i32:
5102         op_cond = op->args[4];
5103         break;
5104     case INDEX_op_movcond:
5105     case INDEX_op_setcond2_i32:
5106     case INDEX_op_cmpsel_vec:
5107         op_cond = op->args[5];
5108         break;
5109     default:
5110         /* No condition within opcode. */
5111         op_cond = TCG_COND_ALWAYS;
5112         break;
5113     }
5114 
5115     args_ct = opcode_args_ct(op);
5116 
5117     /* satisfy input constraints */
5118     for (k = 0; k < nb_iargs; k++) {
5119         TCGRegSet i_preferred_regs, i_required_regs;
5120         bool allocate_new_reg, copyto_new_reg;
5121         TCGTemp *ts2;
5122         int i1, i2;
5123 
5124         i = args_ct[nb_oargs + k].sort_index;
5125         arg = op->args[i];
5126         arg_ct = &args_ct[i];
5127         ts = arg_temp(arg);
5128 
5129         if (ts->val_type == TEMP_VAL_CONST) {
5130 #ifdef TCG_REG_ZERO
5131             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5132                 /* Hardware zero register: indicate register via non-const. */
5133                 const_args[i] = 0;
5134                 new_args[i] = TCG_REG_ZERO;
5135                 continue;
5136             }
5137 #endif
5138 
5139             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5140                                        op_cond, TCGOP_VECE(op))) {
5141                 /* constant is OK for instruction */
5142                 const_args[i] = 1;
5143                 new_args[i] = ts->val;
5144                 continue;
5145             }
5146         }
5147 
5148         reg = ts->reg;
5149         i_preferred_regs = 0;
5150         i_required_regs = arg_ct->regs;
5151         allocate_new_reg = false;
5152         copyto_new_reg = false;
5153 
5154         switch (arg_ct->pair) {
5155         case 0: /* not paired */
5156             if (arg_ct->ialias) {
5157                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5158 
5159                 /*
5160                  * If the input is readonly, then it cannot also be an
5161                  * output and aliased to itself.  If the input is not
5162                  * dead after the instruction, we must allocate a new
5163                  * register and move it.
5164                  */
5165                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5166                     || args_ct[arg_ct->alias_index].newreg) {
5167                     allocate_new_reg = true;
5168                 } else if (ts->val_type == TEMP_VAL_REG) {
5169                     /*
5170                      * Check if the current register has already been
5171                      * allocated for another input.
5172                      */
5173                     allocate_new_reg =
5174                         tcg_regset_test_reg(i_allocated_regs, reg);
5175                 }
5176             }
5177             if (!allocate_new_reg) {
5178                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5179                           i_preferred_regs);
5180                 reg = ts->reg;
5181                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5182             }
5183             if (allocate_new_reg) {
5184                 /*
5185                  * Allocate a new register matching the constraint
5186                  * and move the temporary register into it.
5187                  */
5188                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5189                           i_allocated_regs, 0);
5190                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5191                                     i_preferred_regs, ts->indirect_base);
5192                 copyto_new_reg = true;
5193             }
5194             break;
5195 
5196         case 1:
5197             /* First of an input pair; if i1 == i2, the second is an output. */
5198             i1 = i;
5199             i2 = arg_ct->pair_index;
5200             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5201 
5202             /*
5203              * It is easier to default to allocating a new pair
5204              * and to identify a few cases where it's not required.
5205              */
5206             if (arg_ct->ialias) {
5207                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5208                 if (IS_DEAD_ARG(i1) &&
5209                     IS_DEAD_ARG(i2) &&
5210                     !temp_readonly(ts) &&
5211                     ts->val_type == TEMP_VAL_REG &&
5212                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5213                     tcg_regset_test_reg(i_required_regs, reg) &&
5214                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5215                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5216                     (ts2
5217                      ? ts2->val_type == TEMP_VAL_REG &&
5218                        ts2->reg == reg + 1 &&
5219                        !temp_readonly(ts2)
5220                      : s->reg_to_temp[reg + 1] == NULL)) {
5221                     break;
5222                 }
5223             } else {
5224                 /* Without aliasing, the pair must also be an input. */
5225                 tcg_debug_assert(ts2);
5226                 if (ts->val_type == TEMP_VAL_REG &&
5227                     ts2->val_type == TEMP_VAL_REG &&
5228                     ts2->reg == reg + 1 &&
5229                     tcg_regset_test_reg(i_required_regs, reg)) {
5230                     break;
5231                 }
5232             }
5233             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5234                                      0, ts->indirect_base);
5235             goto do_pair;
5236 
5237         case 2: /* pair second */
5238             reg = new_args[arg_ct->pair_index] + 1;
5239             goto do_pair;
5240 
5241         case 3: /* ialias with second output, no first input */
5242             tcg_debug_assert(arg_ct->ialias);
5243             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5244 
5245             if (IS_DEAD_ARG(i) &&
5246                 !temp_readonly(ts) &&
5247                 ts->val_type == TEMP_VAL_REG &&
5248                 reg > 0 &&
5249                 s->reg_to_temp[reg - 1] == NULL &&
5250                 tcg_regset_test_reg(i_required_regs, reg) &&
5251                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5252                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5253                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5254                 break;
5255             }
5256             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5257                                      i_allocated_regs, 0,
5258                                      ts->indirect_base);
5259             tcg_regset_set_reg(i_allocated_regs, reg);
5260             reg += 1;
5261             goto do_pair;
5262 
5263         do_pair:
5264             /*
5265              * If an aliased input is not dead after the instruction,
5266              * we must allocate a new register and move it.
5267              */
5268             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5269                 TCGRegSet t_allocated_regs = i_allocated_regs;
5270 
5271                 /*
5272                  * Because of the alias, and the continued life, make sure
5273                  * that the temp is somewhere *other* than the reg pair,
5274                  * and we get a copy in reg.
5275                  */
5276                 tcg_regset_set_reg(t_allocated_regs, reg);
5277                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5278                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5279                     /* If ts was already in reg, copy it somewhere else. */
5280                     TCGReg nr;
5281                     bool ok;
5282 
5283                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5284                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5285                                        t_allocated_regs, 0, ts->indirect_base);
5286                     ok = tcg_out_mov(s, ts->type, nr, reg);
5287                     tcg_debug_assert(ok);
5288 
5289                     set_temp_val_reg(s, ts, nr);
5290                 } else {
5291                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5292                               t_allocated_regs, 0);
5293                     copyto_new_reg = true;
5294                 }
5295             } else {
5296                 /* Preferably allocate to reg, otherwise copy. */
5297                 i_required_regs = (TCGRegSet)1 << reg;
5298                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5299                           i_preferred_regs);
5300                 copyto_new_reg = ts->reg != reg;
5301             }
5302             break;
5303 
5304         default:
5305             g_assert_not_reached();
5306         }
5307 
5308         if (copyto_new_reg) {
5309             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5310                 /*
5311                  * Cross register class move not supported.  Sync the
5312                  * temp back to its slot and load from there.
5313                  */
5314                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5315                 tcg_out_ld(s, ts->type, reg,
5316                            ts->mem_base->reg, ts->mem_offset);
5317             }
5318         }
5319         new_args[i] = reg;
5320         const_args[i] = 0;
5321         tcg_regset_set_reg(i_allocated_regs, reg);
5322     }
5323 
5324     /* mark dead temporaries and free the associated registers */
5325     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5326         if (IS_DEAD_ARG(i)) {
5327             temp_dead(s, arg_temp(op->args[i]));
5328         }
5329     }
5330 
5331     if (def->flags & TCG_OPF_COND_BRANCH) {
5332         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5333     } else if (def->flags & TCG_OPF_BB_END) {
5334         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5335     } else {
5336         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5337             /* XXX: permit generic clobber register list ? */
5338             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5339                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5340                     tcg_reg_free(s, i, i_allocated_regs);
5341                 }
5342             }
5343         }
5344         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5345             /* sync globals if the op has side effects and might trigger
5346                an exception. */
5347             sync_globals(s, i_allocated_regs);
5348         }
5349 
5350         /* satisfy the output constraints */
5351         for (k = 0; k < nb_oargs; k++) {
5352             i = args_ct[k].sort_index;
5353             arg = op->args[i];
5354             arg_ct = &args_ct[i];
5355             ts = arg_temp(arg);
5356 
5357             /* ENV should not be modified.  */
5358             tcg_debug_assert(!temp_readonly(ts));
5359 
5360             switch (arg_ct->pair) {
5361             case 0: /* not paired */
5362                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5363                     reg = new_args[arg_ct->alias_index];
5364                 } else if (arg_ct->newreg) {
5365                     reg = tcg_reg_alloc(s, arg_ct->regs,
5366                                         i_allocated_regs | o_allocated_regs,
5367                                         output_pref(op, k), ts->indirect_base);
5368                 } else {
5369                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5370                                         output_pref(op, k), ts->indirect_base);
5371                 }
5372                 break;
5373 
5374             case 1: /* first of pair */
5375                 if (arg_ct->oalias) {
5376                     reg = new_args[arg_ct->alias_index];
5377                 } else if (arg_ct->newreg) {
5378                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5379                                              i_allocated_regs | o_allocated_regs,
5380                                              output_pref(op, k),
5381                                              ts->indirect_base);
5382                 } else {
5383                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5384                                              output_pref(op, k),
5385                                              ts->indirect_base);
5386                 }
5387                 break;
5388 
5389             case 2: /* second of pair */
5390                 if (arg_ct->oalias) {
5391                     reg = new_args[arg_ct->alias_index];
5392                 } else {
5393                     reg = new_args[arg_ct->pair_index] + 1;
5394                 }
5395                 break;
5396 
5397             case 3: /* first of pair, aliasing with a second input */
5398                 tcg_debug_assert(!arg_ct->newreg);
5399                 reg = new_args[arg_ct->pair_index] - 1;
5400                 break;
5401 
5402             default:
5403                 g_assert_not_reached();
5404             }
5405             tcg_regset_set_reg(o_allocated_regs, reg);
5406             set_temp_val_reg(s, ts, reg);
5407             ts->mem_coherent = 0;
5408             new_args[i] = reg;
5409         }
5410     }
5411 
5412     /* emit instruction */
5413     TCGType type = TCGOP_TYPE(op);
5414     switch (op->opc) {
5415     case INDEX_op_ext_i32_i64:
5416         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5417         break;
5418     case INDEX_op_extu_i32_i64:
5419         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5420         break;
5421     case INDEX_op_extrl_i64_i32:
5422         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5423         break;
5424 
5425     case INDEX_op_add:
5426     case INDEX_op_and:
5427     case INDEX_op_andc:
5428     case INDEX_op_clz:
5429     case INDEX_op_ctz:
5430     case INDEX_op_divs:
5431     case INDEX_op_divu:
5432     case INDEX_op_eqv:
5433     case INDEX_op_mul:
5434     case INDEX_op_mulsh:
5435     case INDEX_op_muluh:
5436     case INDEX_op_nand:
5437     case INDEX_op_nor:
5438     case INDEX_op_or:
5439     case INDEX_op_orc:
5440     case INDEX_op_rems:
5441     case INDEX_op_remu:
5442     case INDEX_op_rotl:
5443     case INDEX_op_rotr:
5444     case INDEX_op_sar:
5445     case INDEX_op_shl:
5446     case INDEX_op_shr:
5447     case INDEX_op_xor:
5448         {
5449             const TCGOutOpBinary *out =
5450                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5451 
5452             /* Constants should never appear in the first source operand. */
5453             tcg_debug_assert(!const_args[1]);
5454             if (const_args[2]) {
5455                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5456             } else {
5457                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5458             }
5459         }
5460         break;
5461 
5462     case INDEX_op_sub:
5463         {
5464             const TCGOutOpSubtract *out = &outop_sub;
5465 
5466             /*
5467              * Constants should never appear in the second source operand.
5468              * These are folded to add with negative constant.
5469              */
5470             tcg_debug_assert(!const_args[2]);
5471             if (const_args[1]) {
5472                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5473             } else {
5474                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5475             }
5476         }
5477         break;
5478 
5479     case INDEX_op_bswap64:
5480         assert(TCG_TARGET_REG_BITS == 64);
5481         /* fall through */
5482     case INDEX_op_ctpop:
5483     case INDEX_op_neg:
5484     case INDEX_op_not:
5485         {
5486             const TCGOutOpUnary *out =
5487                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5488 
5489             /* Constants should have been folded. */
5490             tcg_debug_assert(!const_args[1]);
5491             out->out_rr(s, type, new_args[0], new_args[1]);
5492         }
5493         break;
5494 
5495     case INDEX_op_bswap16:
5496     case INDEX_op_bswap32:
5497         {
5498             const TCGOutOpBswap *out =
5499                 container_of(all_outop[op->opc], TCGOutOpBswap, base);
5500 
5501             tcg_debug_assert(!const_args[1]);
5502             out->out_rr(s, type, new_args[0], new_args[1], new_args[2]);
5503         }
5504         break;
5505 
5506     case INDEX_op_divs2:
5507     case INDEX_op_divu2:
5508         {
5509             const TCGOutOpDivRem *out =
5510                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5511 
5512             /* Only used by x86 and s390x, which use matching constraints. */
5513             tcg_debug_assert(new_args[0] == new_args[2]);
5514             tcg_debug_assert(new_args[1] == new_args[3]);
5515             tcg_debug_assert(!const_args[4]);
5516             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5517         }
5518         break;
5519 
5520     case INDEX_op_extract:
5521     case INDEX_op_sextract:
5522         {
5523             const TCGOutOpExtract *out =
5524                 container_of(all_outop[op->opc], TCGOutOpExtract, base);
5525 
5526             tcg_debug_assert(!const_args[1]);
5527             out->out_rr(s, type, new_args[0], new_args[1],
5528                         new_args[2], new_args[3]);
5529         }
5530         break;
5531 
5532     case INDEX_op_muls2:
5533     case INDEX_op_mulu2:
5534         {
5535             const TCGOutOpMul2 *out =
5536                 container_of(all_outop[op->opc], TCGOutOpMul2, base);
5537 
5538             tcg_debug_assert(!const_args[2]);
5539             tcg_debug_assert(!const_args[3]);
5540             out->out_rrrr(s, type, new_args[0], new_args[1],
5541                           new_args[2], new_args[3]);
5542         }
5543         break;
5544 
5545     case INDEX_op_brcond:
5546         {
5547             const TCGOutOpBrcond *out = &outop_brcond;
5548             TCGCond cond = new_args[2];
5549             TCGLabel *label = arg_label(new_args[3]);
5550 
5551             tcg_debug_assert(!const_args[0]);
5552             if (const_args[1]) {
5553                 out->out_ri(s, type, cond, new_args[0], new_args[1], label);
5554             } else {
5555                 out->out_rr(s, type, cond, new_args[0], new_args[1], label);
5556             }
5557         }
5558         break;
5559 
5560     case INDEX_op_movcond:
5561         {
5562             const TCGOutOpMovcond *out = &outop_movcond;
5563             TCGCond cond = new_args[5];
5564 
5565             tcg_debug_assert(!const_args[1]);
5566             out->out(s, type, cond, new_args[0],
5567                      new_args[1], new_args[2], const_args[2],
5568                      new_args[3], const_args[3],
5569                      new_args[4], const_args[4]);
5570         }
5571         break;
5572 
5573     case INDEX_op_setcond:
5574     case INDEX_op_negsetcond:
5575         {
5576             const TCGOutOpSetcond *out =
5577                 container_of(all_outop[op->opc], TCGOutOpSetcond, base);
5578             TCGCond cond = new_args[3];
5579 
5580             tcg_debug_assert(!const_args[1]);
5581             if (const_args[2]) {
5582                 out->out_rri(s, type, cond,
5583                              new_args[0], new_args[1], new_args[2]);
5584             } else {
5585                 out->out_rrr(s, type, cond,
5586                              new_args[0], new_args[1], new_args[2]);
5587             }
5588         }
5589         break;
5590 
5591 #if TCG_TARGET_REG_BITS == 32
5592     case INDEX_op_brcond2_i32:
5593         {
5594             const TCGOutOpBrcond2 *out = &outop_brcond2;
5595             TCGCond cond = new_args[4];
5596             TCGLabel *label = arg_label(new_args[5]);
5597 
5598             tcg_debug_assert(!const_args[0]);
5599             tcg_debug_assert(!const_args[1]);
5600             out->out(s, cond, new_args[0], new_args[1],
5601                      new_args[2], const_args[2],
5602                      new_args[3], const_args[3], label);
5603         }
5604         break;
5605     case INDEX_op_setcond2_i32:
5606         {
5607             const TCGOutOpSetcond2 *out = &outop_setcond2;
5608             TCGCond cond = new_args[5];
5609 
5610             tcg_debug_assert(!const_args[1]);
5611             tcg_debug_assert(!const_args[2]);
5612             out->out(s, cond, new_args[0], new_args[1], new_args[2],
5613                      new_args[3], const_args[3], new_args[4], const_args[4]);
5614         }
5615         break;
5616 #else
5617     case INDEX_op_brcond2_i32:
5618     case INDEX_op_setcond2_i32:
5619         g_assert_not_reached();
5620 #endif
5621 
5622     default:
5623         if (def->flags & TCG_OPF_VECTOR) {
5624             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5625                            TCGOP_VECE(op), new_args, const_args);
5626         } else {
5627             tcg_out_op(s, op->opc, type, new_args, const_args);
5628         }
5629         break;
5630     }
5631 
5632     /* move the outputs in the correct register if needed */
5633     for(i = 0; i < nb_oargs; i++) {
5634         ts = arg_temp(op->args[i]);
5635 
5636         /* ENV should not be modified.  */
5637         tcg_debug_assert(!temp_readonly(ts));
5638 
5639         if (NEED_SYNC_ARG(i)) {
5640             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5641         } else if (IS_DEAD_ARG(i)) {
5642             temp_dead(s, ts);
5643         }
5644     }
5645 }
5646 
5647 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5648 {
5649     const TCGLifeData arg_life = op->life;
5650     TCGTemp *ots, *itsl, *itsh;
5651     TCGType vtype = TCGOP_TYPE(op);
5652 
5653     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5654     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5655     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5656 
5657     ots = arg_temp(op->args[0]);
5658     itsl = arg_temp(op->args[1]);
5659     itsh = arg_temp(op->args[2]);
5660 
5661     /* ENV should not be modified.  */
5662     tcg_debug_assert(!temp_readonly(ots));
5663 
5664     /* Allocate the output register now.  */
5665     if (ots->val_type != TEMP_VAL_REG) {
5666         TCGRegSet allocated_regs = s->reserved_regs;
5667         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5668         TCGReg oreg;
5669 
5670         /* Make sure to not spill the input registers. */
5671         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5672             tcg_regset_set_reg(allocated_regs, itsl->reg);
5673         }
5674         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5675             tcg_regset_set_reg(allocated_regs, itsh->reg);
5676         }
5677 
5678         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5679                              output_pref(op, 0), ots->indirect_base);
5680         set_temp_val_reg(s, ots, oreg);
5681     }
5682 
5683     /* Promote dup2 of immediates to dupi_vec. */
5684     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5685         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5686         MemOp vece = MO_64;
5687 
5688         if (val == dup_const(MO_8, val)) {
5689             vece = MO_8;
5690         } else if (val == dup_const(MO_16, val)) {
5691             vece = MO_16;
5692         } else if (val == dup_const(MO_32, val)) {
5693             vece = MO_32;
5694         }
5695 
5696         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5697         goto done;
5698     }
5699 
5700     /* If the two inputs form one 64-bit value, try dupm_vec. */
5701     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5702         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5703         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5704         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5705 
5706         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5707         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5708 
5709         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5710                              its->mem_base->reg, its->mem_offset)) {
5711             goto done;
5712         }
5713     }
5714 
5715     /* Fall back to generic expansion. */
5716     return false;
5717 
5718  done:
5719     ots->mem_coherent = 0;
5720     if (IS_DEAD_ARG(1)) {
5721         temp_dead(s, itsl);
5722     }
5723     if (IS_DEAD_ARG(2)) {
5724         temp_dead(s, itsh);
5725     }
5726     if (NEED_SYNC_ARG(0)) {
5727         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5728     } else if (IS_DEAD_ARG(0)) {
5729         temp_dead(s, ots);
5730     }
5731     return true;
5732 }
5733 
5734 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5735                          TCGRegSet allocated_regs)
5736 {
5737     if (ts->val_type == TEMP_VAL_REG) {
5738         if (ts->reg != reg) {
5739             tcg_reg_free(s, reg, allocated_regs);
5740             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5741                 /*
5742                  * Cross register class move not supported.  Sync the
5743                  * temp back to its slot and load from there.
5744                  */
5745                 temp_sync(s, ts, allocated_regs, 0, 0);
5746                 tcg_out_ld(s, ts->type, reg,
5747                            ts->mem_base->reg, ts->mem_offset);
5748             }
5749         }
5750     } else {
5751         TCGRegSet arg_set = 0;
5752 
5753         tcg_reg_free(s, reg, allocated_regs);
5754         tcg_regset_set_reg(arg_set, reg);
5755         temp_load(s, ts, arg_set, allocated_regs, 0);
5756     }
5757 }
5758 
5759 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5760                          TCGRegSet allocated_regs)
5761 {
5762     /*
5763      * When the destination is on the stack, load up the temp and store.
5764      * If there are many call-saved registers, the temp might live to
5765      * see another use; otherwise it'll be discarded.
5766      */
5767     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5768     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5769                arg_slot_stk_ofs(arg_slot));
5770 }
5771 
5772 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5773                             TCGTemp *ts, TCGRegSet *allocated_regs)
5774 {
5775     if (arg_slot_reg_p(l->arg_slot)) {
5776         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5777         load_arg_reg(s, reg, ts, *allocated_regs);
5778         tcg_regset_set_reg(*allocated_regs, reg);
5779     } else {
5780         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5781     }
5782 }
5783 
5784 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5785                          intptr_t ref_off, TCGRegSet *allocated_regs)
5786 {
5787     TCGReg reg;
5788 
5789     if (arg_slot_reg_p(arg_slot)) {
5790         reg = tcg_target_call_iarg_regs[arg_slot];
5791         tcg_reg_free(s, reg, *allocated_regs);
5792         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5793         tcg_regset_set_reg(*allocated_regs, reg);
5794     } else {
5795         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5796                             *allocated_regs, 0, false);
5797         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5798         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5799                    arg_slot_stk_ofs(arg_slot));
5800     }
5801 }
5802 
5803 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5804 {
5805     const int nb_oargs = TCGOP_CALLO(op);
5806     const int nb_iargs = TCGOP_CALLI(op);
5807     const TCGLifeData arg_life = op->life;
5808     const TCGHelperInfo *info = tcg_call_info(op);
5809     TCGRegSet allocated_regs = s->reserved_regs;
5810     int i;
5811 
5812     /*
5813      * Move inputs into place in reverse order,
5814      * so that we place stacked arguments first.
5815      */
5816     for (i = nb_iargs - 1; i >= 0; --i) {
5817         const TCGCallArgumentLoc *loc = &info->in[i];
5818         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5819 
5820         switch (loc->kind) {
5821         case TCG_CALL_ARG_NORMAL:
5822         case TCG_CALL_ARG_EXTEND_U:
5823         case TCG_CALL_ARG_EXTEND_S:
5824             load_arg_normal(s, loc, ts, &allocated_regs);
5825             break;
5826         case TCG_CALL_ARG_BY_REF:
5827             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5828             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5829                          arg_slot_stk_ofs(loc->ref_slot),
5830                          &allocated_regs);
5831             break;
5832         case TCG_CALL_ARG_BY_REF_N:
5833             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5834             break;
5835         default:
5836             g_assert_not_reached();
5837         }
5838     }
5839 
5840     /* Mark dead temporaries and free the associated registers.  */
5841     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5842         if (IS_DEAD_ARG(i)) {
5843             temp_dead(s, arg_temp(op->args[i]));
5844         }
5845     }
5846 
5847     /* Clobber call registers.  */
5848     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5849         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5850             tcg_reg_free(s, i, allocated_regs);
5851         }
5852     }
5853 
5854     /*
5855      * Save globals if they might be written by the helper,
5856      * sync them if they might be read.
5857      */
5858     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5859         /* Nothing to do */
5860     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5861         sync_globals(s, allocated_regs);
5862     } else {
5863         save_globals(s, allocated_regs);
5864     }
5865 
5866     /*
5867      * If the ABI passes a pointer to the returned struct as the first
5868      * argument, load that now.  Pass a pointer to the output home slot.
5869      */
5870     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5871         TCGTemp *ts = arg_temp(op->args[0]);
5872 
5873         if (!ts->mem_allocated) {
5874             temp_allocate_frame(s, ts);
5875         }
5876         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5877     }
5878 
5879     tcg_out_call(s, tcg_call_func(op), info);
5880 
5881     /* Assign output registers and emit moves if needed.  */
5882     switch (info->out_kind) {
5883     case TCG_CALL_RET_NORMAL:
5884         for (i = 0; i < nb_oargs; i++) {
5885             TCGTemp *ts = arg_temp(op->args[i]);
5886             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5887 
5888             /* ENV should not be modified.  */
5889             tcg_debug_assert(!temp_readonly(ts));
5890 
5891             set_temp_val_reg(s, ts, reg);
5892             ts->mem_coherent = 0;
5893         }
5894         break;
5895 
5896     case TCG_CALL_RET_BY_VEC:
5897         {
5898             TCGTemp *ts = arg_temp(op->args[0]);
5899 
5900             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5901             tcg_debug_assert(ts->temp_subindex == 0);
5902             if (!ts->mem_allocated) {
5903                 temp_allocate_frame(s, ts);
5904             }
5905             tcg_out_st(s, TCG_TYPE_V128,
5906                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5907                        ts->mem_base->reg, ts->mem_offset);
5908         }
5909         /* fall through to mark all parts in memory */
5910 
5911     case TCG_CALL_RET_BY_REF:
5912         /* The callee has performed a write through the reference. */
5913         for (i = 0; i < nb_oargs; i++) {
5914             TCGTemp *ts = arg_temp(op->args[i]);
5915             ts->val_type = TEMP_VAL_MEM;
5916         }
5917         break;
5918 
5919     default:
5920         g_assert_not_reached();
5921     }
5922 
5923     /* Flush or discard output registers as needed. */
5924     for (i = 0; i < nb_oargs; i++) {
5925         TCGTemp *ts = arg_temp(op->args[i]);
5926         if (NEED_SYNC_ARG(i)) {
5927             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5928         } else if (IS_DEAD_ARG(i)) {
5929             temp_dead(s, ts);
5930         }
5931     }
5932 }
5933 
5934 /**
5935  * atom_and_align_for_opc:
5936  * @s: tcg context
5937  * @opc: memory operation code
5938  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5939  * @allow_two_ops: true if we are prepared to issue two operations
5940  *
5941  * Return the alignment and atomicity to use for the inline fast path
5942  * for the given memory operation.  The alignment may be larger than
5943  * that specified in @opc, and the correct alignment will be diagnosed
5944  * by the slow path helper.
5945  *
5946  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5947  * and issue two loads or stores for subalignment.
5948  */
5949 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5950                                            MemOp host_atom, bool allow_two_ops)
5951 {
5952     MemOp align = memop_alignment_bits(opc);
5953     MemOp size = opc & MO_SIZE;
5954     MemOp half = size ? size - 1 : 0;
5955     MemOp atom = opc & MO_ATOM_MASK;
5956     MemOp atmax;
5957 
5958     switch (atom) {
5959     case MO_ATOM_NONE:
5960         /* The operation requires no specific atomicity. */
5961         atmax = MO_8;
5962         break;
5963 
5964     case MO_ATOM_IFALIGN:
5965         atmax = size;
5966         break;
5967 
5968     case MO_ATOM_IFALIGN_PAIR:
5969         atmax = half;
5970         break;
5971 
5972     case MO_ATOM_WITHIN16:
5973         atmax = size;
5974         if (size == MO_128) {
5975             /* Misalignment implies !within16, and therefore no atomicity. */
5976         } else if (host_atom != MO_ATOM_WITHIN16) {
5977             /* The host does not implement within16, so require alignment. */
5978             align = MAX(align, size);
5979         }
5980         break;
5981 
5982     case MO_ATOM_WITHIN16_PAIR:
5983         atmax = size;
5984         /*
5985          * Misalignment implies !within16, and therefore half atomicity.
5986          * Any host prepared for two operations can implement this with
5987          * half alignment.
5988          */
5989         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5990             align = MAX(align, half);
5991         }
5992         break;
5993 
5994     case MO_ATOM_SUBALIGN:
5995         atmax = size;
5996         if (host_atom != MO_ATOM_SUBALIGN) {
5997             /* If unaligned but not odd, there are subobjects up to half. */
5998             if (allow_two_ops) {
5999                 align = MAX(align, half);
6000             } else {
6001                 align = MAX(align, size);
6002             }
6003         }
6004         break;
6005 
6006     default:
6007         g_assert_not_reached();
6008     }
6009 
6010     return (TCGAtomAlign){ .atom = atmax, .align = align };
6011 }
6012 
6013 /*
6014  * Similarly for qemu_ld/st slow path helpers.
6015  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
6016  * using only the provided backend tcg_out_* functions.
6017  */
6018 
6019 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
6020 {
6021     int ofs = arg_slot_stk_ofs(slot);
6022 
6023     /*
6024      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
6025      * require extension to uint64_t, adjust the address for uint32_t.
6026      */
6027     if (HOST_BIG_ENDIAN &&
6028         TCG_TARGET_REG_BITS == 64 &&
6029         type == TCG_TYPE_I32) {
6030         ofs += 4;
6031     }
6032     return ofs;
6033 }
6034 
6035 static void tcg_out_helper_load_slots(TCGContext *s,
6036                                       unsigned nmov, TCGMovExtend *mov,
6037                                       const TCGLdstHelperParam *parm)
6038 {
6039     unsigned i;
6040     TCGReg dst3;
6041 
6042     /*
6043      * Start from the end, storing to the stack first.
6044      * This frees those registers, so we need not consider overlap.
6045      */
6046     for (i = nmov; i-- > 0; ) {
6047         unsigned slot = mov[i].dst;
6048 
6049         if (arg_slot_reg_p(slot)) {
6050             goto found_reg;
6051         }
6052 
6053         TCGReg src = mov[i].src;
6054         TCGType dst_type = mov[i].dst_type;
6055         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6056 
6057         /* The argument is going onto the stack; extend into scratch. */
6058         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
6059             tcg_debug_assert(parm->ntmp != 0);
6060             mov[i].dst = src = parm->tmp[0];
6061             tcg_out_movext1(s, &mov[i]);
6062         }
6063 
6064         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
6065                    tcg_out_helper_stk_ofs(dst_type, slot));
6066     }
6067     return;
6068 
6069  found_reg:
6070     /*
6071      * The remaining arguments are in registers.
6072      * Convert slot numbers to argument registers.
6073      */
6074     nmov = i + 1;
6075     for (i = 0; i < nmov; ++i) {
6076         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
6077     }
6078 
6079     switch (nmov) {
6080     case 4:
6081         /* The backend must have provided enough temps for the worst case. */
6082         tcg_debug_assert(parm->ntmp >= 2);
6083 
6084         dst3 = mov[3].dst;
6085         for (unsigned j = 0; j < 3; ++j) {
6086             if (dst3 == mov[j].src) {
6087                 /*
6088                  * Conflict. Copy the source to a temporary, perform the
6089                  * remaining moves, then the extension from our scratch
6090                  * on the way out.
6091                  */
6092                 TCGReg scratch = parm->tmp[1];
6093 
6094                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
6095                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
6096                 tcg_out_movext1_new_src(s, &mov[3], scratch);
6097                 break;
6098             }
6099         }
6100 
6101         /* No conflicts: perform this move and continue. */
6102         tcg_out_movext1(s, &mov[3]);
6103         /* fall through */
6104 
6105     case 3:
6106         tcg_out_movext3(s, mov, mov + 1, mov + 2,
6107                         parm->ntmp ? parm->tmp[0] : -1);
6108         break;
6109     case 2:
6110         tcg_out_movext2(s, mov, mov + 1,
6111                         parm->ntmp ? parm->tmp[0] : -1);
6112         break;
6113     case 1:
6114         tcg_out_movext1(s, mov);
6115         break;
6116     default:
6117         g_assert_not_reached();
6118     }
6119 }
6120 
6121 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
6122                                     TCGType type, tcg_target_long imm,
6123                                     const TCGLdstHelperParam *parm)
6124 {
6125     if (arg_slot_reg_p(slot)) {
6126         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
6127     } else {
6128         int ofs = tcg_out_helper_stk_ofs(type, slot);
6129         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
6130             tcg_debug_assert(parm->ntmp != 0);
6131             tcg_out_movi(s, type, parm->tmp[0], imm);
6132             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
6133         }
6134     }
6135 }
6136 
6137 static void tcg_out_helper_load_common_args(TCGContext *s,
6138                                             const TCGLabelQemuLdst *ldst,
6139                                             const TCGLdstHelperParam *parm,
6140                                             const TCGHelperInfo *info,
6141                                             unsigned next_arg)
6142 {
6143     TCGMovExtend ptr_mov = {
6144         .dst_type = TCG_TYPE_PTR,
6145         .src_type = TCG_TYPE_PTR,
6146         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6147     };
6148     const TCGCallArgumentLoc *loc = &info->in[0];
6149     TCGType type;
6150     unsigned slot;
6151     tcg_target_ulong imm;
6152 
6153     /*
6154      * Handle env, which is always first.
6155      */
6156     ptr_mov.dst = loc->arg_slot;
6157     ptr_mov.src = TCG_AREG0;
6158     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6159 
6160     /*
6161      * Handle oi.
6162      */
6163     imm = ldst->oi;
6164     loc = &info->in[next_arg];
6165     type = TCG_TYPE_I32;
6166     switch (loc->kind) {
6167     case TCG_CALL_ARG_NORMAL:
6168         break;
6169     case TCG_CALL_ARG_EXTEND_U:
6170     case TCG_CALL_ARG_EXTEND_S:
6171         /* No extension required for MemOpIdx. */
6172         tcg_debug_assert(imm <= INT32_MAX);
6173         type = TCG_TYPE_REG;
6174         break;
6175     default:
6176         g_assert_not_reached();
6177     }
6178     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6179     next_arg++;
6180 
6181     /*
6182      * Handle ra.
6183      */
6184     loc = &info->in[next_arg];
6185     slot = loc->arg_slot;
6186     if (parm->ra_gen) {
6187         int arg_reg = -1;
6188         TCGReg ra_reg;
6189 
6190         if (arg_slot_reg_p(slot)) {
6191             arg_reg = tcg_target_call_iarg_regs[slot];
6192         }
6193         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6194 
6195         ptr_mov.dst = slot;
6196         ptr_mov.src = ra_reg;
6197         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6198     } else {
6199         imm = (uintptr_t)ldst->raddr;
6200         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6201     }
6202 }
6203 
6204 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6205                                        const TCGCallArgumentLoc *loc,
6206                                        TCGType dst_type, TCGType src_type,
6207                                        TCGReg lo, TCGReg hi)
6208 {
6209     MemOp reg_mo;
6210 
6211     if (dst_type <= TCG_TYPE_REG) {
6212         MemOp src_ext;
6213 
6214         switch (loc->kind) {
6215         case TCG_CALL_ARG_NORMAL:
6216             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6217             break;
6218         case TCG_CALL_ARG_EXTEND_U:
6219             dst_type = TCG_TYPE_REG;
6220             src_ext = MO_UL;
6221             break;
6222         case TCG_CALL_ARG_EXTEND_S:
6223             dst_type = TCG_TYPE_REG;
6224             src_ext = MO_SL;
6225             break;
6226         default:
6227             g_assert_not_reached();
6228         }
6229 
6230         mov[0].dst = loc->arg_slot;
6231         mov[0].dst_type = dst_type;
6232         mov[0].src = lo;
6233         mov[0].src_type = src_type;
6234         mov[0].src_ext = src_ext;
6235         return 1;
6236     }
6237 
6238     if (TCG_TARGET_REG_BITS == 32) {
6239         assert(dst_type == TCG_TYPE_I64);
6240         reg_mo = MO_32;
6241     } else {
6242         assert(dst_type == TCG_TYPE_I128);
6243         reg_mo = MO_64;
6244     }
6245 
6246     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6247     mov[0].src = lo;
6248     mov[0].dst_type = TCG_TYPE_REG;
6249     mov[0].src_type = TCG_TYPE_REG;
6250     mov[0].src_ext = reg_mo;
6251 
6252     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6253     mov[1].src = hi;
6254     mov[1].dst_type = TCG_TYPE_REG;
6255     mov[1].src_type = TCG_TYPE_REG;
6256     mov[1].src_ext = reg_mo;
6257 
6258     return 2;
6259 }
6260 
6261 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6262                                    const TCGLdstHelperParam *parm)
6263 {
6264     const TCGHelperInfo *info;
6265     const TCGCallArgumentLoc *loc;
6266     TCGMovExtend mov[2];
6267     unsigned next_arg, nmov;
6268     MemOp mop = get_memop(ldst->oi);
6269 
6270     switch (mop & MO_SIZE) {
6271     case MO_8:
6272     case MO_16:
6273     case MO_32:
6274         info = &info_helper_ld32_mmu;
6275         break;
6276     case MO_64:
6277         info = &info_helper_ld64_mmu;
6278         break;
6279     case MO_128:
6280         info = &info_helper_ld128_mmu;
6281         break;
6282     default:
6283         g_assert_not_reached();
6284     }
6285 
6286     /* Defer env argument. */
6287     next_arg = 1;
6288 
6289     loc = &info->in[next_arg];
6290     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6291         /*
6292          * 32-bit host with 32-bit guest: zero-extend the guest address
6293          * to 64-bits for the helper by storing the low part, then
6294          * load a zero for the high part.
6295          */
6296         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6297                                TCG_TYPE_I32, TCG_TYPE_I32,
6298                                ldst->addr_reg, -1);
6299         tcg_out_helper_load_slots(s, 1, mov, parm);
6300 
6301         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6302                                 TCG_TYPE_I32, 0, parm);
6303         next_arg += 2;
6304     } else {
6305         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6306                                       ldst->addr_reg, -1);
6307         tcg_out_helper_load_slots(s, nmov, mov, parm);
6308         next_arg += nmov;
6309     }
6310 
6311     switch (info->out_kind) {
6312     case TCG_CALL_RET_NORMAL:
6313     case TCG_CALL_RET_BY_VEC:
6314         break;
6315     case TCG_CALL_RET_BY_REF:
6316         /*
6317          * The return reference is in the first argument slot.
6318          * We need memory in which to return: re-use the top of stack.
6319          */
6320         {
6321             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6322 
6323             if (arg_slot_reg_p(0)) {
6324                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6325                                  TCG_REG_CALL_STACK, ofs_slot0);
6326             } else {
6327                 tcg_debug_assert(parm->ntmp != 0);
6328                 tcg_out_addi_ptr(s, parm->tmp[0],
6329                                  TCG_REG_CALL_STACK, ofs_slot0);
6330                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6331                            TCG_REG_CALL_STACK, ofs_slot0);
6332             }
6333         }
6334         break;
6335     default:
6336         g_assert_not_reached();
6337     }
6338 
6339     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6340 }
6341 
6342 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6343                                   bool load_sign,
6344                                   const TCGLdstHelperParam *parm)
6345 {
6346     MemOp mop = get_memop(ldst->oi);
6347     TCGMovExtend mov[2];
6348     int ofs_slot0;
6349 
6350     switch (ldst->type) {
6351     case TCG_TYPE_I64:
6352         if (TCG_TARGET_REG_BITS == 32) {
6353             break;
6354         }
6355         /* fall through */
6356 
6357     case TCG_TYPE_I32:
6358         mov[0].dst = ldst->datalo_reg;
6359         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6360         mov[0].dst_type = ldst->type;
6361         mov[0].src_type = TCG_TYPE_REG;
6362 
6363         /*
6364          * If load_sign, then we allowed the helper to perform the
6365          * appropriate sign extension to tcg_target_ulong, and all
6366          * we need now is a plain move.
6367          *
6368          * If they do not, then we expect the relevant extension
6369          * instruction to be no more expensive than a move, and
6370          * we thus save the icache etc by only using one of two
6371          * helper functions.
6372          */
6373         if (load_sign || !(mop & MO_SIGN)) {
6374             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6375                 mov[0].src_ext = MO_32;
6376             } else {
6377                 mov[0].src_ext = MO_64;
6378             }
6379         } else {
6380             mov[0].src_ext = mop & MO_SSIZE;
6381         }
6382         tcg_out_movext1(s, mov);
6383         return;
6384 
6385     case TCG_TYPE_I128:
6386         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6387         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6388         switch (TCG_TARGET_CALL_RET_I128) {
6389         case TCG_CALL_RET_NORMAL:
6390             break;
6391         case TCG_CALL_RET_BY_VEC:
6392             tcg_out_st(s, TCG_TYPE_V128,
6393                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6394                        TCG_REG_CALL_STACK, ofs_slot0);
6395             /* fall through */
6396         case TCG_CALL_RET_BY_REF:
6397             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6398                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6399             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6400                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6401             return;
6402         default:
6403             g_assert_not_reached();
6404         }
6405         break;
6406 
6407     default:
6408         g_assert_not_reached();
6409     }
6410 
6411     mov[0].dst = ldst->datalo_reg;
6412     mov[0].src =
6413         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6414     mov[0].dst_type = TCG_TYPE_REG;
6415     mov[0].src_type = TCG_TYPE_REG;
6416     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6417 
6418     mov[1].dst = ldst->datahi_reg;
6419     mov[1].src =
6420         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6421     mov[1].dst_type = TCG_TYPE_REG;
6422     mov[1].src_type = TCG_TYPE_REG;
6423     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6424 
6425     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6426 }
6427 
6428 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6429                                    const TCGLdstHelperParam *parm)
6430 {
6431     const TCGHelperInfo *info;
6432     const TCGCallArgumentLoc *loc;
6433     TCGMovExtend mov[4];
6434     TCGType data_type;
6435     unsigned next_arg, nmov, n;
6436     MemOp mop = get_memop(ldst->oi);
6437 
6438     switch (mop & MO_SIZE) {
6439     case MO_8:
6440     case MO_16:
6441     case MO_32:
6442         info = &info_helper_st32_mmu;
6443         data_type = TCG_TYPE_I32;
6444         break;
6445     case MO_64:
6446         info = &info_helper_st64_mmu;
6447         data_type = TCG_TYPE_I64;
6448         break;
6449     case MO_128:
6450         info = &info_helper_st128_mmu;
6451         data_type = TCG_TYPE_I128;
6452         break;
6453     default:
6454         g_assert_not_reached();
6455     }
6456 
6457     /* Defer env argument. */
6458     next_arg = 1;
6459     nmov = 0;
6460 
6461     /* Handle addr argument. */
6462     loc = &info->in[next_arg];
6463     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6464     if (TCG_TARGET_REG_BITS == 32) {
6465         /*
6466          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6467          * to 64-bits for the helper by storing the low part.  Later,
6468          * after we have processed the register inputs, we will load a
6469          * zero for the high part.
6470          */
6471         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6472                                TCG_TYPE_I32, TCG_TYPE_I32,
6473                                ldst->addr_reg, -1);
6474         next_arg += 2;
6475         nmov += 1;
6476     } else {
6477         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6478                                    ldst->addr_reg, -1);
6479         next_arg += n;
6480         nmov += n;
6481     }
6482 
6483     /* Handle data argument. */
6484     loc = &info->in[next_arg];
6485     switch (loc->kind) {
6486     case TCG_CALL_ARG_NORMAL:
6487     case TCG_CALL_ARG_EXTEND_U:
6488     case TCG_CALL_ARG_EXTEND_S:
6489         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6490                                    ldst->datalo_reg, ldst->datahi_reg);
6491         next_arg += n;
6492         nmov += n;
6493         tcg_out_helper_load_slots(s, nmov, mov, parm);
6494         break;
6495 
6496     case TCG_CALL_ARG_BY_REF:
6497         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6498         tcg_debug_assert(data_type == TCG_TYPE_I128);
6499         tcg_out_st(s, TCG_TYPE_I64,
6500                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6501                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6502         tcg_out_st(s, TCG_TYPE_I64,
6503                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6504                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6505 
6506         tcg_out_helper_load_slots(s, nmov, mov, parm);
6507 
6508         if (arg_slot_reg_p(loc->arg_slot)) {
6509             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6510                              TCG_REG_CALL_STACK,
6511                              arg_slot_stk_ofs(loc->ref_slot));
6512         } else {
6513             tcg_debug_assert(parm->ntmp != 0);
6514             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6515                              arg_slot_stk_ofs(loc->ref_slot));
6516             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6517                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6518         }
6519         next_arg += 2;
6520         break;
6521 
6522     default:
6523         g_assert_not_reached();
6524     }
6525 
6526     if (TCG_TARGET_REG_BITS == 32) {
6527         /* Zero extend the address by loading a zero for the high part. */
6528         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6529         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6530     }
6531 
6532     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6533 }
6534 
6535 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6536 {
6537     int i, start_words, num_insns;
6538     TCGOp *op;
6539 
6540     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6541                  && qemu_log_in_addr_range(pc_start))) {
6542         FILE *logfile = qemu_log_trylock();
6543         if (logfile) {
6544             fprintf(logfile, "OP:\n");
6545             tcg_dump_ops(s, logfile, false);
6546             fprintf(logfile, "\n");
6547             qemu_log_unlock(logfile);
6548         }
6549     }
6550 
6551 #ifdef CONFIG_DEBUG_TCG
6552     /* Ensure all labels referenced have been emitted.  */
6553     {
6554         TCGLabel *l;
6555         bool error = false;
6556 
6557         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6558             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6559                 qemu_log_mask(CPU_LOG_TB_OP,
6560                               "$L%d referenced but not present.\n", l->id);
6561                 error = true;
6562             }
6563         }
6564         assert(!error);
6565     }
6566 #endif
6567 
6568     /* Do not reuse any EBB that may be allocated within the TB. */
6569     tcg_temp_ebb_reset_freed(s);
6570 
6571     tcg_optimize(s);
6572 
6573     reachable_code_pass(s);
6574     liveness_pass_0(s);
6575     liveness_pass_1(s);
6576 
6577     if (s->nb_indirects > 0) {
6578         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6579                      && qemu_log_in_addr_range(pc_start))) {
6580             FILE *logfile = qemu_log_trylock();
6581             if (logfile) {
6582                 fprintf(logfile, "OP before indirect lowering:\n");
6583                 tcg_dump_ops(s, logfile, false);
6584                 fprintf(logfile, "\n");
6585                 qemu_log_unlock(logfile);
6586             }
6587         }
6588 
6589         /* Replace indirect temps with direct temps.  */
6590         if (liveness_pass_2(s)) {
6591             /* If changes were made, re-run liveness.  */
6592             liveness_pass_1(s);
6593         }
6594     }
6595 
6596     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6597                  && qemu_log_in_addr_range(pc_start))) {
6598         FILE *logfile = qemu_log_trylock();
6599         if (logfile) {
6600             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6601             tcg_dump_ops(s, logfile, true);
6602             fprintf(logfile, "\n");
6603             qemu_log_unlock(logfile);
6604         }
6605     }
6606 
6607     /* Initialize goto_tb jump offsets. */
6608     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6609     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6610     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6611     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6612 
6613     tcg_reg_alloc_start(s);
6614 
6615     /*
6616      * Reset the buffer pointers when restarting after overflow.
6617      * TODO: Move this into translate-all.c with the rest of the
6618      * buffer management.  Having only this done here is confusing.
6619      */
6620     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6621     s->code_ptr = s->code_buf;
6622     s->data_gen_ptr = NULL;
6623 
6624     QSIMPLEQ_INIT(&s->ldst_labels);
6625     s->pool_labels = NULL;
6626 
6627     start_words = s->insn_start_words;
6628     s->gen_insn_data =
6629         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6630 
6631     tcg_out_tb_start(s);
6632 
6633     num_insns = -1;
6634     QTAILQ_FOREACH(op, &s->ops, link) {
6635         TCGOpcode opc = op->opc;
6636 
6637         switch (opc) {
6638         case INDEX_op_mov:
6639         case INDEX_op_mov_vec:
6640             tcg_reg_alloc_mov(s, op);
6641             break;
6642         case INDEX_op_dup_vec:
6643             tcg_reg_alloc_dup(s, op);
6644             break;
6645         case INDEX_op_insn_start:
6646             if (num_insns >= 0) {
6647                 size_t off = tcg_current_code_size(s);
6648                 s->gen_insn_end_off[num_insns] = off;
6649                 /* Assert that we do not overflow our stored offset.  */
6650                 assert(s->gen_insn_end_off[num_insns] == off);
6651             }
6652             num_insns++;
6653             for (i = 0; i < start_words; ++i) {
6654                 s->gen_insn_data[num_insns * start_words + i] =
6655                     tcg_get_insn_start_param(op, i);
6656             }
6657             break;
6658         case INDEX_op_discard:
6659             temp_dead(s, arg_temp(op->args[0]));
6660             break;
6661         case INDEX_op_set_label:
6662             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6663             tcg_out_label(s, arg_label(op->args[0]));
6664             break;
6665         case INDEX_op_call:
6666             tcg_reg_alloc_call(s, op);
6667             break;
6668         case INDEX_op_exit_tb:
6669             tcg_out_exit_tb(s, op->args[0]);
6670             break;
6671         case INDEX_op_goto_tb:
6672             tcg_out_goto_tb(s, op->args[0]);
6673             break;
6674         case INDEX_op_dup2_vec:
6675             if (tcg_reg_alloc_dup2(s, op)) {
6676                 break;
6677             }
6678             /* fall through */
6679         default:
6680             /* Sanity check that we've not introduced any unhandled opcodes. */
6681             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6682                                               TCGOP_FLAGS(op)));
6683             /* Note: in order to speed up the code, it would be much
6684                faster to have specialized register allocator functions for
6685                some common argument patterns */
6686             tcg_reg_alloc_op(s, op);
6687             break;
6688         }
6689         /* Test for (pending) buffer overflow.  The assumption is that any
6690            one operation beginning below the high water mark cannot overrun
6691            the buffer completely.  Thus we can test for overflow after
6692            generating code without having to check during generation.  */
6693         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6694             return -1;
6695         }
6696         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6697         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6698             return -2;
6699         }
6700     }
6701     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6702     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6703 
6704     /* Generate TB finalization at the end of block */
6705     i = tcg_out_ldst_finalize(s);
6706     if (i < 0) {
6707         return i;
6708     }
6709     i = tcg_out_pool_finalize(s);
6710     if (i < 0) {
6711         return i;
6712     }
6713     if (!tcg_resolve_relocs(s)) {
6714         return -2;
6715     }
6716 
6717 #ifndef CONFIG_TCG_INTERPRETER
6718     /* flush instruction cache */
6719     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6720                         (uintptr_t)s->code_buf,
6721                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6722 #endif
6723 
6724     return tcg_current_code_size(s);
6725 }
6726 
6727 #ifdef ELF_HOST_MACHINE
6728 /* In order to use this feature, the backend needs to do three things:
6729 
6730    (1) Define ELF_HOST_MACHINE to indicate both what value to
6731        put into the ELF image and to indicate support for the feature.
6732 
6733    (2) Define tcg_register_jit.  This should create a buffer containing
6734        the contents of a .debug_frame section that describes the post-
6735        prologue unwind info for the tcg machine.
6736 
6737    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6738 */
6739 
6740 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6741 typedef enum {
6742     JIT_NOACTION = 0,
6743     JIT_REGISTER_FN,
6744     JIT_UNREGISTER_FN
6745 } jit_actions_t;
6746 
6747 struct jit_code_entry {
6748     struct jit_code_entry *next_entry;
6749     struct jit_code_entry *prev_entry;
6750     const void *symfile_addr;
6751     uint64_t symfile_size;
6752 };
6753 
6754 struct jit_descriptor {
6755     uint32_t version;
6756     uint32_t action_flag;
6757     struct jit_code_entry *relevant_entry;
6758     struct jit_code_entry *first_entry;
6759 };
6760 
6761 void __jit_debug_register_code(void) __attribute__((noinline));
6762 void __jit_debug_register_code(void)
6763 {
6764     asm("");
6765 }
6766 
6767 /* Must statically initialize the version, because GDB may check
6768    the version before we can set it.  */
6769 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6770 
6771 /* End GDB interface.  */
6772 
6773 static int find_string(const char *strtab, const char *str)
6774 {
6775     const char *p = strtab + 1;
6776 
6777     while (1) {
6778         if (strcmp(p, str) == 0) {
6779             return p - strtab;
6780         }
6781         p += strlen(p) + 1;
6782     }
6783 }
6784 
6785 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6786                                  const void *debug_frame,
6787                                  size_t debug_frame_size)
6788 {
6789     struct __attribute__((packed)) DebugInfo {
6790         uint32_t  len;
6791         uint16_t  version;
6792         uint32_t  abbrev;
6793         uint8_t   ptr_size;
6794         uint8_t   cu_die;
6795         uint16_t  cu_lang;
6796         uintptr_t cu_low_pc;
6797         uintptr_t cu_high_pc;
6798         uint8_t   fn_die;
6799         char      fn_name[16];
6800         uintptr_t fn_low_pc;
6801         uintptr_t fn_high_pc;
6802         uint8_t   cu_eoc;
6803     };
6804 
6805     struct ElfImage {
6806         ElfW(Ehdr) ehdr;
6807         ElfW(Phdr) phdr;
6808         ElfW(Shdr) shdr[7];
6809         ElfW(Sym)  sym[2];
6810         struct DebugInfo di;
6811         uint8_t    da[24];
6812         char       str[80];
6813     };
6814 
6815     struct ElfImage *img;
6816 
6817     static const struct ElfImage img_template = {
6818         .ehdr = {
6819             .e_ident[EI_MAG0] = ELFMAG0,
6820             .e_ident[EI_MAG1] = ELFMAG1,
6821             .e_ident[EI_MAG2] = ELFMAG2,
6822             .e_ident[EI_MAG3] = ELFMAG3,
6823             .e_ident[EI_CLASS] = ELF_CLASS,
6824             .e_ident[EI_DATA] = ELF_DATA,
6825             .e_ident[EI_VERSION] = EV_CURRENT,
6826             .e_type = ET_EXEC,
6827             .e_machine = ELF_HOST_MACHINE,
6828             .e_version = EV_CURRENT,
6829             .e_phoff = offsetof(struct ElfImage, phdr),
6830             .e_shoff = offsetof(struct ElfImage, shdr),
6831             .e_ehsize = sizeof(ElfW(Shdr)),
6832             .e_phentsize = sizeof(ElfW(Phdr)),
6833             .e_phnum = 1,
6834             .e_shentsize = sizeof(ElfW(Shdr)),
6835             .e_shnum = ARRAY_SIZE(img->shdr),
6836             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6837 #ifdef ELF_HOST_FLAGS
6838             .e_flags = ELF_HOST_FLAGS,
6839 #endif
6840 #ifdef ELF_OSABI
6841             .e_ident[EI_OSABI] = ELF_OSABI,
6842 #endif
6843         },
6844         .phdr = {
6845             .p_type = PT_LOAD,
6846             .p_flags = PF_X,
6847         },
6848         .shdr = {
6849             [0] = { .sh_type = SHT_NULL },
6850             /* Trick: The contents of code_gen_buffer are not present in
6851                this fake ELF file; that got allocated elsewhere.  Therefore
6852                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6853                will not look for contents.  We can record any address.  */
6854             [1] = { /* .text */
6855                 .sh_type = SHT_NOBITS,
6856                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6857             },
6858             [2] = { /* .debug_info */
6859                 .sh_type = SHT_PROGBITS,
6860                 .sh_offset = offsetof(struct ElfImage, di),
6861                 .sh_size = sizeof(struct DebugInfo),
6862             },
6863             [3] = { /* .debug_abbrev */
6864                 .sh_type = SHT_PROGBITS,
6865                 .sh_offset = offsetof(struct ElfImage, da),
6866                 .sh_size = sizeof(img->da),
6867             },
6868             [4] = { /* .debug_frame */
6869                 .sh_type = SHT_PROGBITS,
6870                 .sh_offset = sizeof(struct ElfImage),
6871             },
6872             [5] = { /* .symtab */
6873                 .sh_type = SHT_SYMTAB,
6874                 .sh_offset = offsetof(struct ElfImage, sym),
6875                 .sh_size = sizeof(img->sym),
6876                 .sh_info = 1,
6877                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6878                 .sh_entsize = sizeof(ElfW(Sym)),
6879             },
6880             [6] = { /* .strtab */
6881                 .sh_type = SHT_STRTAB,
6882                 .sh_offset = offsetof(struct ElfImage, str),
6883                 .sh_size = sizeof(img->str),
6884             }
6885         },
6886         .sym = {
6887             [1] = { /* code_gen_buffer */
6888                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6889                 .st_shndx = 1,
6890             }
6891         },
6892         .di = {
6893             .len = sizeof(struct DebugInfo) - 4,
6894             .version = 2,
6895             .ptr_size = sizeof(void *),
6896             .cu_die = 1,
6897             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6898             .fn_die = 2,
6899             .fn_name = "code_gen_buffer"
6900         },
6901         .da = {
6902             1,          /* abbrev number (the cu) */
6903             0x11, 1,    /* DW_TAG_compile_unit, has children */
6904             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6905             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6906             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6907             0, 0,       /* end of abbrev */
6908             2,          /* abbrev number (the fn) */
6909             0x2e, 0,    /* DW_TAG_subprogram, no children */
6910             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6911             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6912             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6913             0, 0,       /* end of abbrev */
6914             0           /* no more abbrev */
6915         },
6916         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6917                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6918     };
6919 
6920     /* We only need a single jit entry; statically allocate it.  */
6921     static struct jit_code_entry one_entry;
6922 
6923     uintptr_t buf = (uintptr_t)buf_ptr;
6924     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6925     DebugFrameHeader *dfh;
6926 
6927     img = g_malloc(img_size);
6928     *img = img_template;
6929 
6930     img->phdr.p_vaddr = buf;
6931     img->phdr.p_paddr = buf;
6932     img->phdr.p_memsz = buf_size;
6933 
6934     img->shdr[1].sh_name = find_string(img->str, ".text");
6935     img->shdr[1].sh_addr = buf;
6936     img->shdr[1].sh_size = buf_size;
6937 
6938     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6939     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6940 
6941     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6942     img->shdr[4].sh_size = debug_frame_size;
6943 
6944     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6945     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6946 
6947     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6948     img->sym[1].st_value = buf;
6949     img->sym[1].st_size = buf_size;
6950 
6951     img->di.cu_low_pc = buf;
6952     img->di.cu_high_pc = buf + buf_size;
6953     img->di.fn_low_pc = buf;
6954     img->di.fn_high_pc = buf + buf_size;
6955 
6956     dfh = (DebugFrameHeader *)(img + 1);
6957     memcpy(dfh, debug_frame, debug_frame_size);
6958     dfh->fde.func_start = buf;
6959     dfh->fde.func_len = buf_size;
6960 
6961 #ifdef DEBUG_JIT
6962     /* Enable this block to be able to debug the ELF image file creation.
6963        One can use readelf, objdump, or other inspection utilities.  */
6964     {
6965         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6966         FILE *f = fopen(jit, "w+b");
6967         if (f) {
6968             if (fwrite(img, img_size, 1, f) != img_size) {
6969                 /* Avoid stupid unused return value warning for fwrite.  */
6970             }
6971             fclose(f);
6972         }
6973     }
6974 #endif
6975 
6976     one_entry.symfile_addr = img;
6977     one_entry.symfile_size = img_size;
6978 
6979     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6980     __jit_debug_descriptor.relevant_entry = &one_entry;
6981     __jit_debug_descriptor.first_entry = &one_entry;
6982     __jit_debug_register_code();
6983 }
6984 #else
6985 /* No support for the feature.  Provide the entry point expected by exec.c,
6986    and implement the internal function we declared earlier.  */
6987 
6988 static void tcg_register_jit_int(const void *buf, size_t size,
6989                                  const void *debug_frame,
6990                                  size_t debug_frame_size)
6991 {
6992 }
6993 
6994 void tcg_register_jit(const void *buf, size_t buf_size)
6995 {
6996 }
6997 #endif /* ELF_HOST_MACHINE */
6998 
6999 #if !TCG_TARGET_MAYBE_vec
7000 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
7001 {
7002     g_assert_not_reached();
7003 }
7004 #endif
7005