xref: /openbmc/qemu/tcg/tcg.c (revision 7498d882cbe39ae7df4315ea006830e640f0d47b)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpBrcond {
990     TCGOutOp base;
991     void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond,
992                    TCGReg a1, TCGReg a2, TCGLabel *label);
993     void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond,
994                    TCGReg a1, tcg_target_long a2, TCGLabel *label);
995 } TCGOutOpBrcond;
996 
997 typedef struct TCGOutOpBrcond2 {
998     TCGOutOp base;
999     void (*out)(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
1000                 TCGArg bl, bool const_bl,
1001                 TCGArg bh, bool const_bh, TCGLabel *l);
1002 } TCGOutOpBrcond2;
1003 
1004 typedef struct TCGOutOpBswap {
1005     TCGOutOp base;
1006     void (*out_rr)(TCGContext *s, TCGType type,
1007                    TCGReg a0, TCGReg a1, unsigned flags);
1008 } TCGOutOpBswap;
1009 
1010 typedef struct TCGOutOpDivRem {
1011     TCGOutOp base;
1012     void (*out_rr01r)(TCGContext *s, TCGType type,
1013                       TCGReg a0, TCGReg a1, TCGReg a4);
1014 } TCGOutOpDivRem;
1015 
1016 typedef struct TCGOutOpMovcond {
1017     TCGOutOp base;
1018     void (*out)(TCGContext *s, TCGType type, TCGCond cond,
1019                 TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2,
1020                 TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf);
1021 } TCGOutOpMovcond;
1022 
1023 typedef struct TCGOutOpMul2 {
1024     TCGOutOp base;
1025     void (*out_rrrr)(TCGContext *s, TCGType type,
1026                      TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3);
1027 } TCGOutOpMul2;
1028 
1029 typedef struct TCGOutOpUnary {
1030     TCGOutOp base;
1031     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
1032 } TCGOutOpUnary;
1033 
1034 typedef struct TCGOutOpSetcond {
1035     TCGOutOp base;
1036     void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond,
1037                     TCGReg ret, TCGReg a1, TCGReg a2);
1038     void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond,
1039                     TCGReg ret, TCGReg a1, tcg_target_long a2);
1040 } TCGOutOpSetcond;
1041 
1042 typedef struct TCGOutOpSetcond2 {
1043     TCGOutOp base;
1044     void (*out)(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah,
1045                 TCGArg bl, bool const_bl, TCGArg bh, bool const_bh);
1046 } TCGOutOpSetcond2;
1047 
1048 typedef struct TCGOutOpSubtract {
1049     TCGOutOp base;
1050     void (*out_rrr)(TCGContext *s, TCGType type,
1051                     TCGReg a0, TCGReg a1, TCGReg a2);
1052     void (*out_rir)(TCGContext *s, TCGType type,
1053                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1054 } TCGOutOpSubtract;
1055 
1056 #include "tcg-target.c.inc"
1057 
1058 #ifndef CONFIG_TCG_INTERPRETER
1059 /* Validate CPUTLBDescFast placement. */
1060 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1061                         sizeof(CPUNegativeOffsetState))
1062                   < MIN_TLB_MASK_TABLE_OFS);
1063 #endif
1064 
1065 /*
1066  * Register V as the TCGOutOp for O.
1067  * This verifies that V is of type T, otherwise give a nice compiler error.
1068  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1069  */
1070 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1071 
1072 /* Register allocation descriptions for every TCGOpcode. */
1073 static const TCGOutOp * const all_outop[NB_OPS] = {
1074     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1075     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1076     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1077     OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond),
1078     OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16),
1079     OUTOP(INDEX_op_bswap32, TCGOutOpBswap, outop_bswap32),
1080     OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz),
1081     OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop),
1082     OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz),
1083     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1084     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1085     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1086     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1087     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1088     OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond),
1089     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1090     OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2),
1091     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1092     OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2),
1093     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1094     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1095     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1096     OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond),
1097     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1098     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1099     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1100     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1101     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1102     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1103     OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl),
1104     OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr),
1105     OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1106     OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond),
1107     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1108     OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1109     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1110     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1111 
1112 #if TCG_TARGET_REG_BITS == 32
1113     OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2),
1114     OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2),
1115 #endif
1116 };
1117 
1118 #undef OUTOP
1119 
1120 /*
1121  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1122  * and registered the target's TCG globals) must register with this function
1123  * before initiating translation.
1124  *
1125  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1126  * of tcg_region_init() for the reasoning behind this.
1127  *
1128  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1129  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1130  * is not used anymore for translation once this function is called.
1131  *
1132  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1133  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1134  * modes.
1135  */
1136 #ifdef CONFIG_USER_ONLY
1137 void tcg_register_thread(void)
1138 {
1139     tcg_ctx = &tcg_init_ctx;
1140 }
1141 #else
1142 void tcg_register_thread(void)
1143 {
1144     TCGContext *s = g_malloc(sizeof(*s));
1145     unsigned int i, n;
1146 
1147     *s = tcg_init_ctx;
1148 
1149     /* Relink mem_base.  */
1150     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1151         if (tcg_init_ctx.temps[i].mem_base) {
1152             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1153             tcg_debug_assert(b >= 0 && b < n);
1154             s->temps[i].mem_base = &s->temps[b];
1155         }
1156     }
1157 
1158     /* Claim an entry in tcg_ctxs */
1159     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1160     g_assert(n < tcg_max_ctxs);
1161     qatomic_set(&tcg_ctxs[n], s);
1162 
1163     if (n > 0) {
1164         tcg_region_initial_alloc(s);
1165     }
1166 
1167     tcg_ctx = s;
1168 }
1169 #endif /* !CONFIG_USER_ONLY */
1170 
1171 /* pool based memory allocation */
1172 void *tcg_malloc_internal(TCGContext *s, int size)
1173 {
1174     TCGPool *p;
1175     int pool_size;
1176 
1177     if (size > TCG_POOL_CHUNK_SIZE) {
1178         /* big malloc: insert a new pool (XXX: could optimize) */
1179         p = g_malloc(sizeof(TCGPool) + size);
1180         p->size = size;
1181         p->next = s->pool_first_large;
1182         s->pool_first_large = p;
1183         return p->data;
1184     } else {
1185         p = s->pool_current;
1186         if (!p) {
1187             p = s->pool_first;
1188             if (!p)
1189                 goto new_pool;
1190         } else {
1191             if (!p->next) {
1192             new_pool:
1193                 pool_size = TCG_POOL_CHUNK_SIZE;
1194                 p = g_malloc(sizeof(TCGPool) + pool_size);
1195                 p->size = pool_size;
1196                 p->next = NULL;
1197                 if (s->pool_current) {
1198                     s->pool_current->next = p;
1199                 } else {
1200                     s->pool_first = p;
1201                 }
1202             } else {
1203                 p = p->next;
1204             }
1205         }
1206     }
1207     s->pool_current = p;
1208     s->pool_cur = p->data + size;
1209     s->pool_end = p->data + p->size;
1210     return p->data;
1211 }
1212 
1213 void tcg_pool_reset(TCGContext *s)
1214 {
1215     TCGPool *p, *t;
1216     for (p = s->pool_first_large; p; p = t) {
1217         t = p->next;
1218         g_free(p);
1219     }
1220     s->pool_first_large = NULL;
1221     s->pool_cur = s->pool_end = NULL;
1222     s->pool_current = NULL;
1223 }
1224 
1225 /*
1226  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1227  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1228  * We only use these for layout in tcg_out_ld_helper_ret and
1229  * tcg_out_st_helper_args, and share them between several of
1230  * the helpers, with the end result that it's easier to build manually.
1231  */
1232 
1233 #if TCG_TARGET_REG_BITS == 32
1234 # define dh_typecode_ttl  dh_typecode_i32
1235 #else
1236 # define dh_typecode_ttl  dh_typecode_i64
1237 #endif
1238 
1239 static TCGHelperInfo info_helper_ld32_mmu = {
1240     .flags = TCG_CALL_NO_WG,
1241     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1242               | dh_typemask(env, 1)
1243               | dh_typemask(i64, 2)  /* uint64_t addr */
1244               | dh_typemask(i32, 3)  /* unsigned oi */
1245               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1246 };
1247 
1248 static TCGHelperInfo info_helper_ld64_mmu = {
1249     .flags = TCG_CALL_NO_WG,
1250     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1251               | dh_typemask(env, 1)
1252               | dh_typemask(i64, 2)  /* uint64_t addr */
1253               | dh_typemask(i32, 3)  /* unsigned oi */
1254               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1255 };
1256 
1257 static TCGHelperInfo info_helper_ld128_mmu = {
1258     .flags = TCG_CALL_NO_WG,
1259     .typemask = dh_typemask(i128, 0) /* return Int128 */
1260               | dh_typemask(env, 1)
1261               | dh_typemask(i64, 2)  /* uint64_t addr */
1262               | dh_typemask(i32, 3)  /* unsigned oi */
1263               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1264 };
1265 
1266 static TCGHelperInfo info_helper_st32_mmu = {
1267     .flags = TCG_CALL_NO_WG,
1268     .typemask = dh_typemask(void, 0)
1269               | dh_typemask(env, 1)
1270               | dh_typemask(i64, 2)  /* uint64_t addr */
1271               | dh_typemask(i32, 3)  /* uint32_t data */
1272               | dh_typemask(i32, 4)  /* unsigned oi */
1273               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1274 };
1275 
1276 static TCGHelperInfo info_helper_st64_mmu = {
1277     .flags = TCG_CALL_NO_WG,
1278     .typemask = dh_typemask(void, 0)
1279               | dh_typemask(env, 1)
1280               | dh_typemask(i64, 2)  /* uint64_t addr */
1281               | dh_typemask(i64, 3)  /* uint64_t data */
1282               | dh_typemask(i32, 4)  /* unsigned oi */
1283               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1284 };
1285 
1286 static TCGHelperInfo info_helper_st128_mmu = {
1287     .flags = TCG_CALL_NO_WG,
1288     .typemask = dh_typemask(void, 0)
1289               | dh_typemask(env, 1)
1290               | dh_typemask(i64, 2)  /* uint64_t addr */
1291               | dh_typemask(i128, 3) /* Int128 data */
1292               | dh_typemask(i32, 4)  /* unsigned oi */
1293               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1294 };
1295 
1296 #ifdef CONFIG_TCG_INTERPRETER
1297 static ffi_type *typecode_to_ffi(int argmask)
1298 {
1299     /*
1300      * libffi does not support __int128_t, so we have forced Int128
1301      * to use the structure definition instead of the builtin type.
1302      */
1303     static ffi_type *ffi_type_i128_elements[3] = {
1304         &ffi_type_uint64,
1305         &ffi_type_uint64,
1306         NULL
1307     };
1308     static ffi_type ffi_type_i128 = {
1309         .size = 16,
1310         .alignment = __alignof__(Int128),
1311         .type = FFI_TYPE_STRUCT,
1312         .elements = ffi_type_i128_elements,
1313     };
1314 
1315     switch (argmask) {
1316     case dh_typecode_void:
1317         return &ffi_type_void;
1318     case dh_typecode_i32:
1319         return &ffi_type_uint32;
1320     case dh_typecode_s32:
1321         return &ffi_type_sint32;
1322     case dh_typecode_i64:
1323         return &ffi_type_uint64;
1324     case dh_typecode_s64:
1325         return &ffi_type_sint64;
1326     case dh_typecode_ptr:
1327         return &ffi_type_pointer;
1328     case dh_typecode_i128:
1329         return &ffi_type_i128;
1330     }
1331     g_assert_not_reached();
1332 }
1333 
1334 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1335 {
1336     unsigned typemask = info->typemask;
1337     struct {
1338         ffi_cif cif;
1339         ffi_type *args[];
1340     } *ca;
1341     ffi_status status;
1342     int nargs;
1343 
1344     /* Ignoring the return type, find the last non-zero field. */
1345     nargs = 32 - clz32(typemask >> 3);
1346     nargs = DIV_ROUND_UP(nargs, 3);
1347     assert(nargs <= MAX_CALL_IARGS);
1348 
1349     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1350     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1351     ca->cif.nargs = nargs;
1352 
1353     if (nargs != 0) {
1354         ca->cif.arg_types = ca->args;
1355         for (int j = 0; j < nargs; ++j) {
1356             int typecode = extract32(typemask, (j + 1) * 3, 3);
1357             ca->args[j] = typecode_to_ffi(typecode);
1358         }
1359     }
1360 
1361     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1362                           ca->cif.rtype, ca->cif.arg_types);
1363     assert(status == FFI_OK);
1364 
1365     return &ca->cif;
1366 }
1367 
1368 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1369 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1370 #else
1371 #define HELPER_INFO_INIT(I)      (&(I)->init)
1372 #define HELPER_INFO_INIT_VAL(I)  1
1373 #endif /* CONFIG_TCG_INTERPRETER */
1374 
1375 static inline bool arg_slot_reg_p(unsigned arg_slot)
1376 {
1377     /*
1378      * Split the sizeof away from the comparison to avoid Werror from
1379      * "unsigned < 0 is always false", when iarg_regs is empty.
1380      */
1381     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1382     return arg_slot < nreg;
1383 }
1384 
1385 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1386 {
1387     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1388     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1389 
1390     tcg_debug_assert(stk_slot < max);
1391     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1392 }
1393 
1394 typedef struct TCGCumulativeArgs {
1395     int arg_idx;                /* tcg_gen_callN args[] */
1396     int info_in_idx;            /* TCGHelperInfo in[] */
1397     int arg_slot;               /* regs+stack slot */
1398     int ref_slot;               /* stack slots for references */
1399 } TCGCumulativeArgs;
1400 
1401 static void layout_arg_even(TCGCumulativeArgs *cum)
1402 {
1403     cum->arg_slot += cum->arg_slot & 1;
1404 }
1405 
1406 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1407                          TCGCallArgumentKind kind)
1408 {
1409     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1410 
1411     *loc = (TCGCallArgumentLoc){
1412         .kind = kind,
1413         .arg_idx = cum->arg_idx,
1414         .arg_slot = cum->arg_slot,
1415     };
1416     cum->info_in_idx++;
1417     cum->arg_slot++;
1418 }
1419 
1420 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1421                                 TCGHelperInfo *info, int n)
1422 {
1423     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1424 
1425     for (int i = 0; i < n; ++i) {
1426         /* Layout all using the same arg_idx, adjusting the subindex. */
1427         loc[i] = (TCGCallArgumentLoc){
1428             .kind = TCG_CALL_ARG_NORMAL,
1429             .arg_idx = cum->arg_idx,
1430             .tmp_subindex = i,
1431             .arg_slot = cum->arg_slot + i,
1432         };
1433     }
1434     cum->info_in_idx += n;
1435     cum->arg_slot += n;
1436 }
1437 
1438 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1439 {
1440     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1441     int n = 128 / TCG_TARGET_REG_BITS;
1442 
1443     /* The first subindex carries the pointer. */
1444     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1445 
1446     /*
1447      * The callee is allowed to clobber memory associated with
1448      * structure pass by-reference.  Therefore we must make copies.
1449      * Allocate space from "ref_slot", which will be adjusted to
1450      * follow the parameters on the stack.
1451      */
1452     loc[0].ref_slot = cum->ref_slot;
1453 
1454     /*
1455      * Subsequent words also go into the reference slot, but
1456      * do not accumulate into the regular arguments.
1457      */
1458     for (int i = 1; i < n; ++i) {
1459         loc[i] = (TCGCallArgumentLoc){
1460             .kind = TCG_CALL_ARG_BY_REF_N,
1461             .arg_idx = cum->arg_idx,
1462             .tmp_subindex = i,
1463             .ref_slot = cum->ref_slot + i,
1464         };
1465     }
1466     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1467     cum->ref_slot += n;
1468 }
1469 
1470 static void init_call_layout(TCGHelperInfo *info)
1471 {
1472     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1473     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1474     unsigned typemask = info->typemask;
1475     unsigned typecode;
1476     TCGCumulativeArgs cum = { };
1477 
1478     /*
1479      * Parse and place any function return value.
1480      */
1481     typecode = typemask & 7;
1482     switch (typecode) {
1483     case dh_typecode_void:
1484         info->nr_out = 0;
1485         break;
1486     case dh_typecode_i32:
1487     case dh_typecode_s32:
1488     case dh_typecode_ptr:
1489         info->nr_out = 1;
1490         info->out_kind = TCG_CALL_RET_NORMAL;
1491         break;
1492     case dh_typecode_i64:
1493     case dh_typecode_s64:
1494         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1495         info->out_kind = TCG_CALL_RET_NORMAL;
1496         /* Query the last register now to trigger any assert early. */
1497         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1498         break;
1499     case dh_typecode_i128:
1500         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1501         info->out_kind = TCG_TARGET_CALL_RET_I128;
1502         switch (TCG_TARGET_CALL_RET_I128) {
1503         case TCG_CALL_RET_NORMAL:
1504             /* Query the last register now to trigger any assert early. */
1505             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1506             break;
1507         case TCG_CALL_RET_BY_VEC:
1508             /* Query the single register now to trigger any assert early. */
1509             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1510             break;
1511         case TCG_CALL_RET_BY_REF:
1512             /*
1513              * Allocate the first argument to the output.
1514              * We don't need to store this anywhere, just make it
1515              * unavailable for use in the input loop below.
1516              */
1517             cum.arg_slot = 1;
1518             break;
1519         default:
1520             qemu_build_not_reached();
1521         }
1522         break;
1523     default:
1524         g_assert_not_reached();
1525     }
1526 
1527     /*
1528      * Parse and place function arguments.
1529      */
1530     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1531         TCGCallArgumentKind kind;
1532         TCGType type;
1533 
1534         typecode = typemask & 7;
1535         switch (typecode) {
1536         case dh_typecode_i32:
1537         case dh_typecode_s32:
1538             type = TCG_TYPE_I32;
1539             break;
1540         case dh_typecode_i64:
1541         case dh_typecode_s64:
1542             type = TCG_TYPE_I64;
1543             break;
1544         case dh_typecode_ptr:
1545             type = TCG_TYPE_PTR;
1546             break;
1547         case dh_typecode_i128:
1548             type = TCG_TYPE_I128;
1549             break;
1550         default:
1551             g_assert_not_reached();
1552         }
1553 
1554         switch (type) {
1555         case TCG_TYPE_I32:
1556             switch (TCG_TARGET_CALL_ARG_I32) {
1557             case TCG_CALL_ARG_EVEN:
1558                 layout_arg_even(&cum);
1559                 /* fall through */
1560             case TCG_CALL_ARG_NORMAL:
1561                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1562                 break;
1563             case TCG_CALL_ARG_EXTEND:
1564                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1565                 layout_arg_1(&cum, info, kind);
1566                 break;
1567             default:
1568                 qemu_build_not_reached();
1569             }
1570             break;
1571 
1572         case TCG_TYPE_I64:
1573             switch (TCG_TARGET_CALL_ARG_I64) {
1574             case TCG_CALL_ARG_EVEN:
1575                 layout_arg_even(&cum);
1576                 /* fall through */
1577             case TCG_CALL_ARG_NORMAL:
1578                 if (TCG_TARGET_REG_BITS == 32) {
1579                     layout_arg_normal_n(&cum, info, 2);
1580                 } else {
1581                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1582                 }
1583                 break;
1584             default:
1585                 qemu_build_not_reached();
1586             }
1587             break;
1588 
1589         case TCG_TYPE_I128:
1590             switch (TCG_TARGET_CALL_ARG_I128) {
1591             case TCG_CALL_ARG_EVEN:
1592                 layout_arg_even(&cum);
1593                 /* fall through */
1594             case TCG_CALL_ARG_NORMAL:
1595                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1596                 break;
1597             case TCG_CALL_ARG_BY_REF:
1598                 layout_arg_by_ref(&cum, info);
1599                 break;
1600             default:
1601                 qemu_build_not_reached();
1602             }
1603             break;
1604 
1605         default:
1606             g_assert_not_reached();
1607         }
1608     }
1609     info->nr_in = cum.info_in_idx;
1610 
1611     /* Validate that we didn't overrun the input array. */
1612     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1613     /* Validate the backend has enough argument space. */
1614     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1615 
1616     /*
1617      * Relocate the "ref_slot" area to the end of the parameters.
1618      * Minimizing this stack offset helps code size for x86,
1619      * which has a signed 8-bit offset encoding.
1620      */
1621     if (cum.ref_slot != 0) {
1622         int ref_base = 0;
1623 
1624         if (cum.arg_slot > max_reg_slots) {
1625             int align = __alignof(Int128) / sizeof(tcg_target_long);
1626 
1627             ref_base = cum.arg_slot - max_reg_slots;
1628             if (align > 1) {
1629                 ref_base = ROUND_UP(ref_base, align);
1630             }
1631         }
1632         assert(ref_base + cum.ref_slot <= max_stk_slots);
1633         ref_base += max_reg_slots;
1634 
1635         if (ref_base != 0) {
1636             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1637                 TCGCallArgumentLoc *loc = &info->in[i];
1638                 switch (loc->kind) {
1639                 case TCG_CALL_ARG_BY_REF:
1640                 case TCG_CALL_ARG_BY_REF_N:
1641                     loc->ref_slot += ref_base;
1642                     break;
1643                 default:
1644                     break;
1645                 }
1646             }
1647         }
1648     }
1649 }
1650 
1651 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1652 static void process_constraint_sets(void);
1653 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1654                                             TCGReg reg, const char *name);
1655 
1656 static void tcg_context_init(unsigned max_threads)
1657 {
1658     TCGContext *s = &tcg_init_ctx;
1659     int n, i;
1660     TCGTemp *ts;
1661 
1662     memset(s, 0, sizeof(*s));
1663     s->nb_globals = 0;
1664 
1665     init_call_layout(&info_helper_ld32_mmu);
1666     init_call_layout(&info_helper_ld64_mmu);
1667     init_call_layout(&info_helper_ld128_mmu);
1668     init_call_layout(&info_helper_st32_mmu);
1669     init_call_layout(&info_helper_st64_mmu);
1670     init_call_layout(&info_helper_st128_mmu);
1671 
1672     tcg_target_init(s);
1673     process_constraint_sets();
1674 
1675     /* Reverse the order of the saved registers, assuming they're all at
1676        the start of tcg_target_reg_alloc_order.  */
1677     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1678         int r = tcg_target_reg_alloc_order[n];
1679         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1680             break;
1681         }
1682     }
1683     for (i = 0; i < n; ++i) {
1684         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1685     }
1686     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1687         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1688     }
1689 
1690     tcg_ctx = s;
1691     /*
1692      * In user-mode we simply share the init context among threads, since we
1693      * use a single region. See the documentation tcg_region_init() for the
1694      * reasoning behind this.
1695      * In system-mode we will have at most max_threads TCG threads.
1696      */
1697 #ifdef CONFIG_USER_ONLY
1698     tcg_ctxs = &tcg_ctx;
1699     tcg_cur_ctxs = 1;
1700     tcg_max_ctxs = 1;
1701 #else
1702     tcg_max_ctxs = max_threads;
1703     tcg_ctxs = g_new0(TCGContext *, max_threads);
1704 #endif
1705 
1706     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1707     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1708     tcg_env = temp_tcgv_ptr(ts);
1709 }
1710 
1711 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1712 {
1713     tcg_context_init(max_threads);
1714     tcg_region_init(tb_size, splitwx, max_threads);
1715 }
1716 
1717 /*
1718  * Allocate TBs right before their corresponding translated code, making
1719  * sure that TBs and code are on different cache lines.
1720  */
1721 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1722 {
1723     uintptr_t align = qemu_icache_linesize;
1724     TranslationBlock *tb;
1725     void *next;
1726 
1727  retry:
1728     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1729     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1730 
1731     if (unlikely(next > s->code_gen_highwater)) {
1732         if (tcg_region_alloc(s)) {
1733             return NULL;
1734         }
1735         goto retry;
1736     }
1737     qatomic_set(&s->code_gen_ptr, next);
1738     return tb;
1739 }
1740 
1741 void tcg_prologue_init(void)
1742 {
1743     TCGContext *s = tcg_ctx;
1744     size_t prologue_size;
1745 
1746     s->code_ptr = s->code_gen_ptr;
1747     s->code_buf = s->code_gen_ptr;
1748     s->data_gen_ptr = NULL;
1749 
1750 #ifndef CONFIG_TCG_INTERPRETER
1751     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1752 #endif
1753 
1754     s->pool_labels = NULL;
1755 
1756     qemu_thread_jit_write();
1757     /* Generate the prologue.  */
1758     tcg_target_qemu_prologue(s);
1759 
1760     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1761     {
1762         int result = tcg_out_pool_finalize(s);
1763         tcg_debug_assert(result == 0);
1764     }
1765 
1766     prologue_size = tcg_current_code_size(s);
1767     perf_report_prologue(s->code_gen_ptr, prologue_size);
1768 
1769 #ifndef CONFIG_TCG_INTERPRETER
1770     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1771                         (uintptr_t)s->code_buf, prologue_size);
1772 #endif
1773 
1774     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1775         FILE *logfile = qemu_log_trylock();
1776         if (logfile) {
1777             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1778             if (s->data_gen_ptr) {
1779                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1780                 size_t data_size = prologue_size - code_size;
1781                 size_t i;
1782 
1783                 disas(logfile, s->code_gen_ptr, code_size);
1784 
1785                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1786                     if (sizeof(tcg_target_ulong) == 8) {
1787                         fprintf(logfile,
1788                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1789                                 (uintptr_t)s->data_gen_ptr + i,
1790                                 *(uint64_t *)(s->data_gen_ptr + i));
1791                     } else {
1792                         fprintf(logfile,
1793                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1794                                 (uintptr_t)s->data_gen_ptr + i,
1795                                 *(uint32_t *)(s->data_gen_ptr + i));
1796                     }
1797                 }
1798             } else {
1799                 disas(logfile, s->code_gen_ptr, prologue_size);
1800             }
1801             fprintf(logfile, "\n");
1802             qemu_log_unlock(logfile);
1803         }
1804     }
1805 
1806 #ifndef CONFIG_TCG_INTERPRETER
1807     /*
1808      * Assert that goto_ptr is implemented completely, setting an epilogue.
1809      * For tci, we use NULL as the signal to return from the interpreter,
1810      * so skip this check.
1811      */
1812     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1813 #endif
1814 
1815     tcg_region_prologue_set(s);
1816 }
1817 
1818 void tcg_func_start(TCGContext *s)
1819 {
1820     tcg_pool_reset(s);
1821     s->nb_temps = s->nb_globals;
1822 
1823     /* No temps have been previously allocated for size or locality.  */
1824     tcg_temp_ebb_reset_freed(s);
1825 
1826     /* No constant temps have been previously allocated. */
1827     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1828         if (s->const_table[i]) {
1829             g_hash_table_remove_all(s->const_table[i]);
1830         }
1831     }
1832 
1833     s->nb_ops = 0;
1834     s->nb_labels = 0;
1835     s->current_frame_offset = s->frame_start;
1836 
1837 #ifdef CONFIG_DEBUG_TCG
1838     s->goto_tb_issue_mask = 0;
1839 #endif
1840 
1841     QTAILQ_INIT(&s->ops);
1842     QTAILQ_INIT(&s->free_ops);
1843     s->emit_before_op = NULL;
1844     QSIMPLEQ_INIT(&s->labels);
1845 
1846     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1847     tcg_debug_assert(s->insn_start_words > 0);
1848 }
1849 
1850 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1851 {
1852     int n = s->nb_temps++;
1853 
1854     if (n >= TCG_MAX_TEMPS) {
1855         tcg_raise_tb_overflow(s);
1856     }
1857     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1858 }
1859 
1860 static TCGTemp *tcg_global_alloc(TCGContext *s)
1861 {
1862     TCGTemp *ts;
1863 
1864     tcg_debug_assert(s->nb_globals == s->nb_temps);
1865     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1866     s->nb_globals++;
1867     ts = tcg_temp_alloc(s);
1868     ts->kind = TEMP_GLOBAL;
1869 
1870     return ts;
1871 }
1872 
1873 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1874                                             TCGReg reg, const char *name)
1875 {
1876     TCGTemp *ts;
1877 
1878     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1879 
1880     ts = tcg_global_alloc(s);
1881     ts->base_type = type;
1882     ts->type = type;
1883     ts->kind = TEMP_FIXED;
1884     ts->reg = reg;
1885     ts->name = name;
1886     tcg_regset_set_reg(s->reserved_regs, reg);
1887 
1888     return ts;
1889 }
1890 
1891 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1892 {
1893     s->frame_start = start;
1894     s->frame_end = start + size;
1895     s->frame_temp
1896         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1897 }
1898 
1899 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1900                                             const char *name, TCGType type)
1901 {
1902     TCGContext *s = tcg_ctx;
1903     TCGTemp *base_ts = tcgv_ptr_temp(base);
1904     TCGTemp *ts = tcg_global_alloc(s);
1905     int indirect_reg = 0;
1906 
1907     switch (base_ts->kind) {
1908     case TEMP_FIXED:
1909         break;
1910     case TEMP_GLOBAL:
1911         /* We do not support double-indirect registers.  */
1912         tcg_debug_assert(!base_ts->indirect_reg);
1913         base_ts->indirect_base = 1;
1914         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1915                             ? 2 : 1);
1916         indirect_reg = 1;
1917         break;
1918     default:
1919         g_assert_not_reached();
1920     }
1921 
1922     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1923         TCGTemp *ts2 = tcg_global_alloc(s);
1924         char buf[64];
1925 
1926         ts->base_type = TCG_TYPE_I64;
1927         ts->type = TCG_TYPE_I32;
1928         ts->indirect_reg = indirect_reg;
1929         ts->mem_allocated = 1;
1930         ts->mem_base = base_ts;
1931         ts->mem_offset = offset;
1932         pstrcpy(buf, sizeof(buf), name);
1933         pstrcat(buf, sizeof(buf), "_0");
1934         ts->name = strdup(buf);
1935 
1936         tcg_debug_assert(ts2 == ts + 1);
1937         ts2->base_type = TCG_TYPE_I64;
1938         ts2->type = TCG_TYPE_I32;
1939         ts2->indirect_reg = indirect_reg;
1940         ts2->mem_allocated = 1;
1941         ts2->mem_base = base_ts;
1942         ts2->mem_offset = offset + 4;
1943         ts2->temp_subindex = 1;
1944         pstrcpy(buf, sizeof(buf), name);
1945         pstrcat(buf, sizeof(buf), "_1");
1946         ts2->name = strdup(buf);
1947     } else {
1948         ts->base_type = type;
1949         ts->type = type;
1950         ts->indirect_reg = indirect_reg;
1951         ts->mem_allocated = 1;
1952         ts->mem_base = base_ts;
1953         ts->mem_offset = offset;
1954         ts->name = name;
1955     }
1956     return ts;
1957 }
1958 
1959 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1960 {
1961     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1962     return temp_tcgv_i32(ts);
1963 }
1964 
1965 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1966 {
1967     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1968     return temp_tcgv_i64(ts);
1969 }
1970 
1971 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1972 {
1973     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1974     return temp_tcgv_ptr(ts);
1975 }
1976 
1977 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1978 {
1979     TCGContext *s = tcg_ctx;
1980     TCGTemp *ts;
1981     int n;
1982 
1983     if (kind == TEMP_EBB) {
1984         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1985 
1986         if (idx < TCG_MAX_TEMPS) {
1987             /* There is already an available temp with the right type.  */
1988             clear_bit(idx, s->free_temps[type].l);
1989 
1990             ts = &s->temps[idx];
1991             ts->temp_allocated = 1;
1992             tcg_debug_assert(ts->base_type == type);
1993             tcg_debug_assert(ts->kind == kind);
1994             return ts;
1995         }
1996     } else {
1997         tcg_debug_assert(kind == TEMP_TB);
1998     }
1999 
2000     switch (type) {
2001     case TCG_TYPE_I32:
2002     case TCG_TYPE_V64:
2003     case TCG_TYPE_V128:
2004     case TCG_TYPE_V256:
2005         n = 1;
2006         break;
2007     case TCG_TYPE_I64:
2008         n = 64 / TCG_TARGET_REG_BITS;
2009         break;
2010     case TCG_TYPE_I128:
2011         n = 128 / TCG_TARGET_REG_BITS;
2012         break;
2013     default:
2014         g_assert_not_reached();
2015     }
2016 
2017     ts = tcg_temp_alloc(s);
2018     ts->base_type = type;
2019     ts->temp_allocated = 1;
2020     ts->kind = kind;
2021 
2022     if (n == 1) {
2023         ts->type = type;
2024     } else {
2025         ts->type = TCG_TYPE_REG;
2026 
2027         for (int i = 1; i < n; ++i) {
2028             TCGTemp *ts2 = tcg_temp_alloc(s);
2029 
2030             tcg_debug_assert(ts2 == ts + i);
2031             ts2->base_type = type;
2032             ts2->type = TCG_TYPE_REG;
2033             ts2->temp_allocated = 1;
2034             ts2->temp_subindex = i;
2035             ts2->kind = kind;
2036         }
2037     }
2038     return ts;
2039 }
2040 
2041 TCGv_i32 tcg_temp_new_i32(void)
2042 {
2043     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
2044 }
2045 
2046 TCGv_i32 tcg_temp_ebb_new_i32(void)
2047 {
2048     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
2049 }
2050 
2051 TCGv_i64 tcg_temp_new_i64(void)
2052 {
2053     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
2054 }
2055 
2056 TCGv_i64 tcg_temp_ebb_new_i64(void)
2057 {
2058     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
2059 }
2060 
2061 TCGv_ptr tcg_temp_new_ptr(void)
2062 {
2063     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
2064 }
2065 
2066 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2067 {
2068     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2069 }
2070 
2071 TCGv_i128 tcg_temp_new_i128(void)
2072 {
2073     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2074 }
2075 
2076 TCGv_i128 tcg_temp_ebb_new_i128(void)
2077 {
2078     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2079 }
2080 
2081 TCGv_vec tcg_temp_new_vec(TCGType type)
2082 {
2083     TCGTemp *t;
2084 
2085 #ifdef CONFIG_DEBUG_TCG
2086     switch (type) {
2087     case TCG_TYPE_V64:
2088         assert(TCG_TARGET_HAS_v64);
2089         break;
2090     case TCG_TYPE_V128:
2091         assert(TCG_TARGET_HAS_v128);
2092         break;
2093     case TCG_TYPE_V256:
2094         assert(TCG_TARGET_HAS_v256);
2095         break;
2096     default:
2097         g_assert_not_reached();
2098     }
2099 #endif
2100 
2101     t = tcg_temp_new_internal(type, TEMP_EBB);
2102     return temp_tcgv_vec(t);
2103 }
2104 
2105 /* Create a new temp of the same type as an existing temp.  */
2106 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2107 {
2108     TCGTemp *t = tcgv_vec_temp(match);
2109 
2110     tcg_debug_assert(t->temp_allocated != 0);
2111 
2112     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2113     return temp_tcgv_vec(t);
2114 }
2115 
2116 void tcg_temp_free_internal(TCGTemp *ts)
2117 {
2118     TCGContext *s = tcg_ctx;
2119 
2120     switch (ts->kind) {
2121     case TEMP_CONST:
2122     case TEMP_TB:
2123         /* Silently ignore free. */
2124         break;
2125     case TEMP_EBB:
2126         tcg_debug_assert(ts->temp_allocated != 0);
2127         ts->temp_allocated = 0;
2128         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2129         break;
2130     default:
2131         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2132         g_assert_not_reached();
2133     }
2134 }
2135 
2136 void tcg_temp_free_i32(TCGv_i32 arg)
2137 {
2138     tcg_temp_free_internal(tcgv_i32_temp(arg));
2139 }
2140 
2141 void tcg_temp_free_i64(TCGv_i64 arg)
2142 {
2143     tcg_temp_free_internal(tcgv_i64_temp(arg));
2144 }
2145 
2146 void tcg_temp_free_i128(TCGv_i128 arg)
2147 {
2148     tcg_temp_free_internal(tcgv_i128_temp(arg));
2149 }
2150 
2151 void tcg_temp_free_ptr(TCGv_ptr arg)
2152 {
2153     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2154 }
2155 
2156 void tcg_temp_free_vec(TCGv_vec arg)
2157 {
2158     tcg_temp_free_internal(tcgv_vec_temp(arg));
2159 }
2160 
2161 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2162 {
2163     TCGContext *s = tcg_ctx;
2164     GHashTable *h = s->const_table[type];
2165     TCGTemp *ts;
2166 
2167     if (h == NULL) {
2168         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2169         s->const_table[type] = h;
2170     }
2171 
2172     ts = g_hash_table_lookup(h, &val);
2173     if (ts == NULL) {
2174         int64_t *val_ptr;
2175 
2176         ts = tcg_temp_alloc(s);
2177 
2178         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2179             TCGTemp *ts2 = tcg_temp_alloc(s);
2180 
2181             tcg_debug_assert(ts2 == ts + 1);
2182 
2183             ts->base_type = TCG_TYPE_I64;
2184             ts->type = TCG_TYPE_I32;
2185             ts->kind = TEMP_CONST;
2186             ts->temp_allocated = 1;
2187 
2188             ts2->base_type = TCG_TYPE_I64;
2189             ts2->type = TCG_TYPE_I32;
2190             ts2->kind = TEMP_CONST;
2191             ts2->temp_allocated = 1;
2192             ts2->temp_subindex = 1;
2193 
2194             /*
2195              * Retain the full value of the 64-bit constant in the low
2196              * part, so that the hash table works.  Actual uses will
2197              * truncate the value to the low part.
2198              */
2199             ts[HOST_BIG_ENDIAN].val = val;
2200             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2201             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2202         } else {
2203             ts->base_type = type;
2204             ts->type = type;
2205             ts->kind = TEMP_CONST;
2206             ts->temp_allocated = 1;
2207             ts->val = val;
2208             val_ptr = &ts->val;
2209         }
2210         g_hash_table_insert(h, val_ptr, ts);
2211     }
2212 
2213     return ts;
2214 }
2215 
2216 TCGv_i32 tcg_constant_i32(int32_t val)
2217 {
2218     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2219 }
2220 
2221 TCGv_i64 tcg_constant_i64(int64_t val)
2222 {
2223     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2224 }
2225 
2226 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2227 {
2228     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2229 }
2230 
2231 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2232 {
2233     val = dup_const(vece, val);
2234     return temp_tcgv_vec(tcg_constant_internal(type, val));
2235 }
2236 
2237 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2238 {
2239     TCGTemp *t = tcgv_vec_temp(match);
2240 
2241     tcg_debug_assert(t->temp_allocated != 0);
2242     return tcg_constant_vec(t->base_type, vece, val);
2243 }
2244 
2245 #ifdef CONFIG_DEBUG_TCG
2246 size_t temp_idx(TCGTemp *ts)
2247 {
2248     ptrdiff_t n = ts - tcg_ctx->temps;
2249     assert(n >= 0 && n < tcg_ctx->nb_temps);
2250     return n;
2251 }
2252 
2253 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2254 {
2255     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2256 
2257     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2258     assert(o % sizeof(TCGTemp) == 0);
2259 
2260     return (void *)tcg_ctx + (uintptr_t)v;
2261 }
2262 #endif /* CONFIG_DEBUG_TCG */
2263 
2264 /*
2265  * Return true if OP may appear in the opcode stream with TYPE.
2266  * Test the runtime variable that controls each opcode.
2267  */
2268 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2269 {
2270     bool has_type;
2271 
2272     switch (type) {
2273     case TCG_TYPE_I32:
2274         has_type = true;
2275         break;
2276     case TCG_TYPE_I64:
2277         has_type = TCG_TARGET_REG_BITS == 64;
2278         break;
2279     case TCG_TYPE_V64:
2280         has_type = TCG_TARGET_HAS_v64;
2281         break;
2282     case TCG_TYPE_V128:
2283         has_type = TCG_TARGET_HAS_v128;
2284         break;
2285     case TCG_TYPE_V256:
2286         has_type = TCG_TARGET_HAS_v256;
2287         break;
2288     default:
2289         has_type = false;
2290         break;
2291     }
2292 
2293     switch (op) {
2294     case INDEX_op_discard:
2295     case INDEX_op_set_label:
2296     case INDEX_op_call:
2297     case INDEX_op_br:
2298     case INDEX_op_mb:
2299     case INDEX_op_insn_start:
2300     case INDEX_op_exit_tb:
2301     case INDEX_op_goto_tb:
2302     case INDEX_op_goto_ptr:
2303     case INDEX_op_qemu_ld_i32:
2304     case INDEX_op_qemu_st_i32:
2305     case INDEX_op_qemu_ld_i64:
2306     case INDEX_op_qemu_st_i64:
2307         return true;
2308 
2309     case INDEX_op_qemu_st8_i32:
2310         return TCG_TARGET_HAS_qemu_st8_i32;
2311 
2312     case INDEX_op_qemu_ld_i128:
2313     case INDEX_op_qemu_st_i128:
2314         return TCG_TARGET_HAS_qemu_ldst_i128;
2315 
2316     case INDEX_op_add:
2317     case INDEX_op_and:
2318     case INDEX_op_brcond:
2319     case INDEX_op_mov:
2320     case INDEX_op_movcond:
2321     case INDEX_op_negsetcond:
2322     case INDEX_op_or:
2323     case INDEX_op_setcond:
2324     case INDEX_op_xor:
2325         return has_type;
2326 
2327     case INDEX_op_ld8u_i32:
2328     case INDEX_op_ld8s_i32:
2329     case INDEX_op_ld16u_i32:
2330     case INDEX_op_ld16s_i32:
2331     case INDEX_op_ld_i32:
2332     case INDEX_op_st8_i32:
2333     case INDEX_op_st16_i32:
2334     case INDEX_op_st_i32:
2335     case INDEX_op_extract_i32:
2336     case INDEX_op_sextract_i32:
2337     case INDEX_op_deposit_i32:
2338         return true;
2339 
2340     case INDEX_op_extract2_i32:
2341         return TCG_TARGET_HAS_extract2_i32;
2342     case INDEX_op_add2_i32:
2343         return TCG_TARGET_HAS_add2_i32;
2344     case INDEX_op_sub2_i32:
2345         return TCG_TARGET_HAS_sub2_i32;
2346 
2347     case INDEX_op_brcond2_i32:
2348     case INDEX_op_setcond2_i32:
2349         return TCG_TARGET_REG_BITS == 32;
2350 
2351     case INDEX_op_ld8u_i64:
2352     case INDEX_op_ld8s_i64:
2353     case INDEX_op_ld16u_i64:
2354     case INDEX_op_ld16s_i64:
2355     case INDEX_op_ld32u_i64:
2356     case INDEX_op_ld32s_i64:
2357     case INDEX_op_ld_i64:
2358     case INDEX_op_st8_i64:
2359     case INDEX_op_st16_i64:
2360     case INDEX_op_st32_i64:
2361     case INDEX_op_st_i64:
2362     case INDEX_op_ext_i32_i64:
2363     case INDEX_op_extu_i32_i64:
2364     case INDEX_op_extract_i64:
2365     case INDEX_op_sextract_i64:
2366     case INDEX_op_deposit_i64:
2367         return TCG_TARGET_REG_BITS == 64;
2368 
2369     case INDEX_op_extract2_i64:
2370         return TCG_TARGET_HAS_extract2_i64;
2371     case INDEX_op_extrl_i64_i32:
2372     case INDEX_op_extrh_i64_i32:
2373         return TCG_TARGET_HAS_extr_i64_i32;
2374     case INDEX_op_bswap64_i64:
2375         return TCG_TARGET_HAS_bswap64_i64;
2376     case INDEX_op_add2_i64:
2377         return TCG_TARGET_HAS_add2_i64;
2378     case INDEX_op_sub2_i64:
2379         return TCG_TARGET_HAS_sub2_i64;
2380 
2381     case INDEX_op_mov_vec:
2382     case INDEX_op_dup_vec:
2383     case INDEX_op_dupm_vec:
2384     case INDEX_op_ld_vec:
2385     case INDEX_op_st_vec:
2386     case INDEX_op_add_vec:
2387     case INDEX_op_sub_vec:
2388     case INDEX_op_and_vec:
2389     case INDEX_op_or_vec:
2390     case INDEX_op_xor_vec:
2391     case INDEX_op_cmp_vec:
2392         return has_type;
2393     case INDEX_op_dup2_vec:
2394         return has_type && TCG_TARGET_REG_BITS == 32;
2395     case INDEX_op_not_vec:
2396         return has_type && TCG_TARGET_HAS_not_vec;
2397     case INDEX_op_neg_vec:
2398         return has_type && TCG_TARGET_HAS_neg_vec;
2399     case INDEX_op_abs_vec:
2400         return has_type && TCG_TARGET_HAS_abs_vec;
2401     case INDEX_op_andc_vec:
2402         return has_type && TCG_TARGET_HAS_andc_vec;
2403     case INDEX_op_orc_vec:
2404         return has_type && TCG_TARGET_HAS_orc_vec;
2405     case INDEX_op_nand_vec:
2406         return has_type && TCG_TARGET_HAS_nand_vec;
2407     case INDEX_op_nor_vec:
2408         return has_type && TCG_TARGET_HAS_nor_vec;
2409     case INDEX_op_eqv_vec:
2410         return has_type && TCG_TARGET_HAS_eqv_vec;
2411     case INDEX_op_mul_vec:
2412         return has_type && TCG_TARGET_HAS_mul_vec;
2413     case INDEX_op_shli_vec:
2414     case INDEX_op_shri_vec:
2415     case INDEX_op_sari_vec:
2416         return has_type && TCG_TARGET_HAS_shi_vec;
2417     case INDEX_op_shls_vec:
2418     case INDEX_op_shrs_vec:
2419     case INDEX_op_sars_vec:
2420         return has_type && TCG_TARGET_HAS_shs_vec;
2421     case INDEX_op_shlv_vec:
2422     case INDEX_op_shrv_vec:
2423     case INDEX_op_sarv_vec:
2424         return has_type && TCG_TARGET_HAS_shv_vec;
2425     case INDEX_op_rotli_vec:
2426         return has_type && TCG_TARGET_HAS_roti_vec;
2427     case INDEX_op_rotls_vec:
2428         return has_type && TCG_TARGET_HAS_rots_vec;
2429     case INDEX_op_rotlv_vec:
2430     case INDEX_op_rotrv_vec:
2431         return has_type && TCG_TARGET_HAS_rotv_vec;
2432     case INDEX_op_ssadd_vec:
2433     case INDEX_op_usadd_vec:
2434     case INDEX_op_sssub_vec:
2435     case INDEX_op_ussub_vec:
2436         return has_type && TCG_TARGET_HAS_sat_vec;
2437     case INDEX_op_smin_vec:
2438     case INDEX_op_umin_vec:
2439     case INDEX_op_smax_vec:
2440     case INDEX_op_umax_vec:
2441         return has_type && TCG_TARGET_HAS_minmax_vec;
2442     case INDEX_op_bitsel_vec:
2443         return has_type && TCG_TARGET_HAS_bitsel_vec;
2444     case INDEX_op_cmpsel_vec:
2445         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2446 
2447     default:
2448         if (op < INDEX_op_last_generic) {
2449             const TCGOutOp *outop;
2450             TCGConstraintSetIndex con_set;
2451 
2452             if (!has_type) {
2453                 return false;
2454             }
2455 
2456             outop = all_outop[op];
2457             tcg_debug_assert(outop != NULL);
2458 
2459             con_set = outop->static_constraint;
2460             if (con_set == C_Dynamic) {
2461                 con_set = outop->dynamic_constraint(type, flags);
2462             }
2463             if (con_set >= 0) {
2464                 return true;
2465             }
2466             tcg_debug_assert(con_set == C_NotImplemented);
2467             return false;
2468         }
2469         tcg_debug_assert(op < NB_OPS);
2470         return true;
2471 
2472     case INDEX_op_last_generic:
2473         g_assert_not_reached();
2474     }
2475 }
2476 
2477 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2478 {
2479     unsigned width;
2480 
2481     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2482     width = (type == TCG_TYPE_I32 ? 32 : 64);
2483 
2484     tcg_debug_assert(ofs < width);
2485     tcg_debug_assert(len > 0);
2486     tcg_debug_assert(len <= width - ofs);
2487 
2488     return TCG_TARGET_deposit_valid(type, ofs, len);
2489 }
2490 
2491 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2492 
2493 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2494                           TCGTemp *ret, TCGTemp **args)
2495 {
2496     TCGv_i64 extend_free[MAX_CALL_IARGS];
2497     int n_extend = 0;
2498     TCGOp *op;
2499     int i, n, pi = 0, total_args;
2500 
2501     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2502         init_call_layout(info);
2503         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2504     }
2505 
2506     total_args = info->nr_out + info->nr_in + 2;
2507     op = tcg_op_alloc(INDEX_op_call, total_args);
2508 
2509 #ifdef CONFIG_PLUGIN
2510     /* Flag helpers that may affect guest state */
2511     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2512         tcg_ctx->plugin_insn->calls_helpers = true;
2513     }
2514 #endif
2515 
2516     TCGOP_CALLO(op) = n = info->nr_out;
2517     switch (n) {
2518     case 0:
2519         tcg_debug_assert(ret == NULL);
2520         break;
2521     case 1:
2522         tcg_debug_assert(ret != NULL);
2523         op->args[pi++] = temp_arg(ret);
2524         break;
2525     case 2:
2526     case 4:
2527         tcg_debug_assert(ret != NULL);
2528         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2529         tcg_debug_assert(ret->temp_subindex == 0);
2530         for (i = 0; i < n; ++i) {
2531             op->args[pi++] = temp_arg(ret + i);
2532         }
2533         break;
2534     default:
2535         g_assert_not_reached();
2536     }
2537 
2538     TCGOP_CALLI(op) = n = info->nr_in;
2539     for (i = 0; i < n; i++) {
2540         const TCGCallArgumentLoc *loc = &info->in[i];
2541         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2542 
2543         switch (loc->kind) {
2544         case TCG_CALL_ARG_NORMAL:
2545         case TCG_CALL_ARG_BY_REF:
2546         case TCG_CALL_ARG_BY_REF_N:
2547             op->args[pi++] = temp_arg(ts);
2548             break;
2549 
2550         case TCG_CALL_ARG_EXTEND_U:
2551         case TCG_CALL_ARG_EXTEND_S:
2552             {
2553                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2554                 TCGv_i32 orig = temp_tcgv_i32(ts);
2555 
2556                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2557                     tcg_gen_ext_i32_i64(temp, orig);
2558                 } else {
2559                     tcg_gen_extu_i32_i64(temp, orig);
2560                 }
2561                 op->args[pi++] = tcgv_i64_arg(temp);
2562                 extend_free[n_extend++] = temp;
2563             }
2564             break;
2565 
2566         default:
2567             g_assert_not_reached();
2568         }
2569     }
2570     op->args[pi++] = (uintptr_t)func;
2571     op->args[pi++] = (uintptr_t)info;
2572     tcg_debug_assert(pi == total_args);
2573 
2574     if (tcg_ctx->emit_before_op) {
2575         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2576     } else {
2577         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2578     }
2579 
2580     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2581     for (i = 0; i < n_extend; ++i) {
2582         tcg_temp_free_i64(extend_free[i]);
2583     }
2584 }
2585 
2586 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2587 {
2588     tcg_gen_callN(func, info, ret, NULL);
2589 }
2590 
2591 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2592 {
2593     tcg_gen_callN(func, info, ret, &t1);
2594 }
2595 
2596 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2597                    TCGTemp *t1, TCGTemp *t2)
2598 {
2599     TCGTemp *args[2] = { t1, t2 };
2600     tcg_gen_callN(func, info, ret, args);
2601 }
2602 
2603 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2604                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2605 {
2606     TCGTemp *args[3] = { t1, t2, t3 };
2607     tcg_gen_callN(func, info, ret, args);
2608 }
2609 
2610 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2611                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2612 {
2613     TCGTemp *args[4] = { t1, t2, t3, t4 };
2614     tcg_gen_callN(func, info, ret, args);
2615 }
2616 
2617 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2618                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2619 {
2620     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2621     tcg_gen_callN(func, info, ret, args);
2622 }
2623 
2624 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2625                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2626                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2627 {
2628     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2629     tcg_gen_callN(func, info, ret, args);
2630 }
2631 
2632 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2633                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2634                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2635 {
2636     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2637     tcg_gen_callN(func, info, ret, args);
2638 }
2639 
2640 static void tcg_reg_alloc_start(TCGContext *s)
2641 {
2642     int i, n;
2643 
2644     for (i = 0, n = s->nb_temps; i < n; i++) {
2645         TCGTemp *ts = &s->temps[i];
2646         TCGTempVal val = TEMP_VAL_MEM;
2647 
2648         switch (ts->kind) {
2649         case TEMP_CONST:
2650             val = TEMP_VAL_CONST;
2651             break;
2652         case TEMP_FIXED:
2653             val = TEMP_VAL_REG;
2654             break;
2655         case TEMP_GLOBAL:
2656             break;
2657         case TEMP_EBB:
2658             val = TEMP_VAL_DEAD;
2659             /* fall through */
2660         case TEMP_TB:
2661             ts->mem_allocated = 0;
2662             break;
2663         default:
2664             g_assert_not_reached();
2665         }
2666         ts->val_type = val;
2667     }
2668 
2669     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2670 }
2671 
2672 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2673                                  TCGTemp *ts)
2674 {
2675     int idx = temp_idx(ts);
2676 
2677     switch (ts->kind) {
2678     case TEMP_FIXED:
2679     case TEMP_GLOBAL:
2680         pstrcpy(buf, buf_size, ts->name);
2681         break;
2682     case TEMP_TB:
2683         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2684         break;
2685     case TEMP_EBB:
2686         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2687         break;
2688     case TEMP_CONST:
2689         switch (ts->type) {
2690         case TCG_TYPE_I32:
2691             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2692             break;
2693 #if TCG_TARGET_REG_BITS > 32
2694         case TCG_TYPE_I64:
2695             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2696             break;
2697 #endif
2698         case TCG_TYPE_V64:
2699         case TCG_TYPE_V128:
2700         case TCG_TYPE_V256:
2701             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2702                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2703             break;
2704         default:
2705             g_assert_not_reached();
2706         }
2707         break;
2708     }
2709     return buf;
2710 }
2711 
2712 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2713                              int buf_size, TCGArg arg)
2714 {
2715     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2716 }
2717 
2718 static const char * const cond_name[] =
2719 {
2720     [TCG_COND_NEVER] = "never",
2721     [TCG_COND_ALWAYS] = "always",
2722     [TCG_COND_EQ] = "eq",
2723     [TCG_COND_NE] = "ne",
2724     [TCG_COND_LT] = "lt",
2725     [TCG_COND_GE] = "ge",
2726     [TCG_COND_LE] = "le",
2727     [TCG_COND_GT] = "gt",
2728     [TCG_COND_LTU] = "ltu",
2729     [TCG_COND_GEU] = "geu",
2730     [TCG_COND_LEU] = "leu",
2731     [TCG_COND_GTU] = "gtu",
2732     [TCG_COND_TSTEQ] = "tsteq",
2733     [TCG_COND_TSTNE] = "tstne",
2734 };
2735 
2736 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2737 {
2738     [MO_UB]   = "ub",
2739     [MO_SB]   = "sb",
2740     [MO_LEUW] = "leuw",
2741     [MO_LESW] = "lesw",
2742     [MO_LEUL] = "leul",
2743     [MO_LESL] = "lesl",
2744     [MO_LEUQ] = "leq",
2745     [MO_BEUW] = "beuw",
2746     [MO_BESW] = "besw",
2747     [MO_BEUL] = "beul",
2748     [MO_BESL] = "besl",
2749     [MO_BEUQ] = "beq",
2750     [MO_128 + MO_BE] = "beo",
2751     [MO_128 + MO_LE] = "leo",
2752 };
2753 
2754 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2755     [MO_UNALN >> MO_ASHIFT]    = "un+",
2756     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2757     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2758     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2759     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2760     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2761     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2762     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2763 };
2764 
2765 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2766     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2767     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2768     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2769     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2770     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2771     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2772 };
2773 
2774 static const char bswap_flag_name[][6] = {
2775     [TCG_BSWAP_IZ] = "iz",
2776     [TCG_BSWAP_OZ] = "oz",
2777     [TCG_BSWAP_OS] = "os",
2778     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2779     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2780 };
2781 
2782 #ifdef CONFIG_PLUGIN
2783 static const char * const plugin_from_name[] = {
2784     "from-tb",
2785     "from-insn",
2786     "after-insn",
2787     "after-tb",
2788 };
2789 #endif
2790 
2791 static inline bool tcg_regset_single(TCGRegSet d)
2792 {
2793     return (d & (d - 1)) == 0;
2794 }
2795 
2796 static inline TCGReg tcg_regset_first(TCGRegSet d)
2797 {
2798     if (TCG_TARGET_NB_REGS <= 32) {
2799         return ctz32(d);
2800     } else {
2801         return ctz64(d);
2802     }
2803 }
2804 
2805 /* Return only the number of characters output -- no error return. */
2806 #define ne_fprintf(...) \
2807     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2808 
2809 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2810 {
2811     char buf[128];
2812     TCGOp *op;
2813 
2814     QTAILQ_FOREACH(op, &s->ops, link) {
2815         int i, k, nb_oargs, nb_iargs, nb_cargs;
2816         const TCGOpDef *def;
2817         TCGOpcode c;
2818         int col = 0;
2819 
2820         c = op->opc;
2821         def = &tcg_op_defs[c];
2822 
2823         if (c == INDEX_op_insn_start) {
2824             nb_oargs = 0;
2825             col += ne_fprintf(f, "\n ----");
2826 
2827             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2828                 col += ne_fprintf(f, " %016" PRIx64,
2829                                   tcg_get_insn_start_param(op, i));
2830             }
2831         } else if (c == INDEX_op_call) {
2832             const TCGHelperInfo *info = tcg_call_info(op);
2833             void *func = tcg_call_func(op);
2834 
2835             /* variable number of arguments */
2836             nb_oargs = TCGOP_CALLO(op);
2837             nb_iargs = TCGOP_CALLI(op);
2838             nb_cargs = def->nb_cargs;
2839 
2840             col += ne_fprintf(f, " %s ", def->name);
2841 
2842             /*
2843              * Print the function name from TCGHelperInfo, if available.
2844              * Note that plugins have a template function for the info,
2845              * but the actual function pointer comes from the plugin.
2846              */
2847             if (func == info->func) {
2848                 col += ne_fprintf(f, "%s", info->name);
2849             } else {
2850                 col += ne_fprintf(f, "plugin(%p)", func);
2851             }
2852 
2853             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2854             for (i = 0; i < nb_oargs; i++) {
2855                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2856                                                             op->args[i]));
2857             }
2858             for (i = 0; i < nb_iargs; i++) {
2859                 TCGArg arg = op->args[nb_oargs + i];
2860                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2861                 col += ne_fprintf(f, ",%s", t);
2862             }
2863         } else {
2864             if (def->flags & TCG_OPF_INT) {
2865                 col += ne_fprintf(f, " %s_i%d ",
2866                                   def->name,
2867                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2868             } else if (def->flags & TCG_OPF_VECTOR) {
2869                 col += ne_fprintf(f, "%s v%d,e%d,",
2870                                   def->name,
2871                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2872                                   8 << TCGOP_VECE(op));
2873             } else {
2874                 col += ne_fprintf(f, " %s ", def->name);
2875             }
2876 
2877             nb_oargs = def->nb_oargs;
2878             nb_iargs = def->nb_iargs;
2879             nb_cargs = def->nb_cargs;
2880 
2881             k = 0;
2882             for (i = 0; i < nb_oargs; i++) {
2883                 const char *sep =  k ? "," : "";
2884                 col += ne_fprintf(f, "%s%s", sep,
2885                                   tcg_get_arg_str(s, buf, sizeof(buf),
2886                                                   op->args[k++]));
2887             }
2888             for (i = 0; i < nb_iargs; i++) {
2889                 const char *sep =  k ? "," : "";
2890                 col += ne_fprintf(f, "%s%s", sep,
2891                                   tcg_get_arg_str(s, buf, sizeof(buf),
2892                                                   op->args[k++]));
2893             }
2894             switch (c) {
2895             case INDEX_op_brcond:
2896             case INDEX_op_setcond:
2897             case INDEX_op_negsetcond:
2898             case INDEX_op_movcond:
2899             case INDEX_op_brcond2_i32:
2900             case INDEX_op_setcond2_i32:
2901             case INDEX_op_cmp_vec:
2902             case INDEX_op_cmpsel_vec:
2903                 if (op->args[k] < ARRAY_SIZE(cond_name)
2904                     && cond_name[op->args[k]]) {
2905                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2906                 } else {
2907                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2908                 }
2909                 i = 1;
2910                 break;
2911             case INDEX_op_qemu_ld_i32:
2912             case INDEX_op_qemu_st_i32:
2913             case INDEX_op_qemu_st8_i32:
2914             case INDEX_op_qemu_ld_i64:
2915             case INDEX_op_qemu_st_i64:
2916             case INDEX_op_qemu_ld_i128:
2917             case INDEX_op_qemu_st_i128:
2918                 {
2919                     const char *s_al, *s_op, *s_at;
2920                     MemOpIdx oi = op->args[k++];
2921                     MemOp mop = get_memop(oi);
2922                     unsigned ix = get_mmuidx(oi);
2923 
2924                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2925                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2926                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2927                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2928 
2929                     /* If all fields are accounted for, print symbolically. */
2930                     if (!mop && s_al && s_op && s_at) {
2931                         col += ne_fprintf(f, ",%s%s%s,%u",
2932                                           s_at, s_al, s_op, ix);
2933                     } else {
2934                         mop = get_memop(oi);
2935                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2936                     }
2937                     i = 1;
2938                 }
2939                 break;
2940             case INDEX_op_bswap16:
2941             case INDEX_op_bswap32:
2942             case INDEX_op_bswap64_i64:
2943                 {
2944                     TCGArg flags = op->args[k];
2945                     const char *name = NULL;
2946 
2947                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2948                         name = bswap_flag_name[flags];
2949                     }
2950                     if (name) {
2951                         col += ne_fprintf(f, ",%s", name);
2952                     } else {
2953                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2954                     }
2955                     i = k = 1;
2956                 }
2957                 break;
2958 #ifdef CONFIG_PLUGIN
2959             case INDEX_op_plugin_cb:
2960                 {
2961                     TCGArg from = op->args[k++];
2962                     const char *name = NULL;
2963 
2964                     if (from < ARRAY_SIZE(plugin_from_name)) {
2965                         name = plugin_from_name[from];
2966                     }
2967                     if (name) {
2968                         col += ne_fprintf(f, "%s", name);
2969                     } else {
2970                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2971                     }
2972                     i = 1;
2973                 }
2974                 break;
2975 #endif
2976             default:
2977                 i = 0;
2978                 break;
2979             }
2980             switch (c) {
2981             case INDEX_op_set_label:
2982             case INDEX_op_br:
2983             case INDEX_op_brcond:
2984             case INDEX_op_brcond2_i32:
2985                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2986                                   arg_label(op->args[k])->id);
2987                 i++, k++;
2988                 break;
2989             case INDEX_op_mb:
2990                 {
2991                     TCGBar membar = op->args[k];
2992                     const char *b_op, *m_op;
2993 
2994                     switch (membar & TCG_BAR_SC) {
2995                     case 0:
2996                         b_op = "none";
2997                         break;
2998                     case TCG_BAR_LDAQ:
2999                         b_op = "acq";
3000                         break;
3001                     case TCG_BAR_STRL:
3002                         b_op = "rel";
3003                         break;
3004                     case TCG_BAR_SC:
3005                         b_op = "seq";
3006                         break;
3007                     default:
3008                         g_assert_not_reached();
3009                     }
3010 
3011                     switch (membar & TCG_MO_ALL) {
3012                     case 0:
3013                         m_op = "none";
3014                         break;
3015                     case TCG_MO_LD_LD:
3016                         m_op = "rr";
3017                         break;
3018                     case TCG_MO_LD_ST:
3019                         m_op = "rw";
3020                         break;
3021                     case TCG_MO_ST_LD:
3022                         m_op = "wr";
3023                         break;
3024                     case TCG_MO_ST_ST:
3025                         m_op = "ww";
3026                         break;
3027                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3028                         m_op = "rr+rw";
3029                         break;
3030                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3031                         m_op = "rr+wr";
3032                         break;
3033                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3034                         m_op = "rr+ww";
3035                         break;
3036                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3037                         m_op = "rw+wr";
3038                         break;
3039                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3040                         m_op = "rw+ww";
3041                         break;
3042                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3043                         m_op = "wr+ww";
3044                         break;
3045                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3046                         m_op = "rr+rw+wr";
3047                         break;
3048                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3049                         m_op = "rr+rw+ww";
3050                         break;
3051                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3052                         m_op = "rr+wr+ww";
3053                         break;
3054                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3055                         m_op = "rw+wr+ww";
3056                         break;
3057                     case TCG_MO_ALL:
3058                         m_op = "all";
3059                         break;
3060                     default:
3061                         g_assert_not_reached();
3062                     }
3063 
3064                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3065                     i++, k++;
3066                 }
3067                 break;
3068             default:
3069                 break;
3070             }
3071             for (; i < nb_cargs; i++, k++) {
3072                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3073                                   op->args[k]);
3074             }
3075         }
3076 
3077         if (have_prefs || op->life) {
3078             for (; col < 40; ++col) {
3079                 putc(' ', f);
3080             }
3081         }
3082 
3083         if (op->life) {
3084             unsigned life = op->life;
3085 
3086             if (life & (SYNC_ARG * 3)) {
3087                 ne_fprintf(f, "  sync:");
3088                 for (i = 0; i < 2; ++i) {
3089                     if (life & (SYNC_ARG << i)) {
3090                         ne_fprintf(f, " %d", i);
3091                     }
3092                 }
3093             }
3094             life /= DEAD_ARG;
3095             if (life) {
3096                 ne_fprintf(f, "  dead:");
3097                 for (i = 0; life; ++i, life >>= 1) {
3098                     if (life & 1) {
3099                         ne_fprintf(f, " %d", i);
3100                     }
3101                 }
3102             }
3103         }
3104 
3105         if (have_prefs) {
3106             for (i = 0; i < nb_oargs; ++i) {
3107                 TCGRegSet set = output_pref(op, i);
3108 
3109                 if (i == 0) {
3110                     ne_fprintf(f, "  pref=");
3111                 } else {
3112                     ne_fprintf(f, ",");
3113                 }
3114                 if (set == 0) {
3115                     ne_fprintf(f, "none");
3116                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3117                     ne_fprintf(f, "all");
3118 #ifdef CONFIG_DEBUG_TCG
3119                 } else if (tcg_regset_single(set)) {
3120                     TCGReg reg = tcg_regset_first(set);
3121                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3122 #endif
3123                 } else if (TCG_TARGET_NB_REGS <= 32) {
3124                     ne_fprintf(f, "0x%x", (uint32_t)set);
3125                 } else {
3126                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3127                 }
3128             }
3129         }
3130 
3131         putc('\n', f);
3132     }
3133 }
3134 
3135 /* we give more priority to constraints with less registers */
3136 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3137 {
3138     int n;
3139 
3140     arg_ct += k;
3141     n = ctpop64(arg_ct->regs);
3142 
3143     /*
3144      * Sort constraints of a single register first, which includes output
3145      * aliases (which must exactly match the input already allocated).
3146      */
3147     if (n == 1 || arg_ct->oalias) {
3148         return INT_MAX;
3149     }
3150 
3151     /*
3152      * Sort register pairs next, first then second immediately after.
3153      * Arbitrarily sort multiple pairs by the index of the first reg;
3154      * there shouldn't be many pairs.
3155      */
3156     switch (arg_ct->pair) {
3157     case 1:
3158     case 3:
3159         return (k + 1) * 2;
3160     case 2:
3161         return (arg_ct->pair_index + 1) * 2 - 1;
3162     }
3163 
3164     /* Finally, sort by decreasing register count. */
3165     assert(n > 1);
3166     return -n;
3167 }
3168 
3169 /* sort from highest priority to lowest */
3170 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3171 {
3172     int i, j;
3173 
3174     for (i = 0; i < n; i++) {
3175         a[start + i].sort_index = start + i;
3176     }
3177     if (n <= 1) {
3178         return;
3179     }
3180     for (i = 0; i < n - 1; i++) {
3181         for (j = i + 1; j < n; j++) {
3182             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3183             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3184             if (p1 < p2) {
3185                 int tmp = a[start + i].sort_index;
3186                 a[start + i].sort_index = a[start + j].sort_index;
3187                 a[start + j].sort_index = tmp;
3188             }
3189         }
3190     }
3191 }
3192 
3193 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3194 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3195 
3196 static void process_constraint_sets(void)
3197 {
3198     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3199         const TCGConstraintSet *tdefs = &constraint_sets[c];
3200         TCGArgConstraint *args_ct = all_cts[c];
3201         int nb_oargs = tdefs->nb_oargs;
3202         int nb_iargs = tdefs->nb_iargs;
3203         int nb_args = nb_oargs + nb_iargs;
3204         bool saw_alias_pair = false;
3205 
3206         for (int i = 0; i < nb_args; i++) {
3207             const char *ct_str = tdefs->args_ct_str[i];
3208             bool input_p = i >= nb_oargs;
3209             int o;
3210 
3211             switch (*ct_str) {
3212             case '0' ... '9':
3213                 o = *ct_str - '0';
3214                 tcg_debug_assert(input_p);
3215                 tcg_debug_assert(o < nb_oargs);
3216                 tcg_debug_assert(args_ct[o].regs != 0);
3217                 tcg_debug_assert(!args_ct[o].oalias);
3218                 args_ct[i] = args_ct[o];
3219                 /* The output sets oalias.  */
3220                 args_ct[o].oalias = 1;
3221                 args_ct[o].alias_index = i;
3222                 /* The input sets ialias. */
3223                 args_ct[i].ialias = 1;
3224                 args_ct[i].alias_index = o;
3225                 if (args_ct[i].pair) {
3226                     saw_alias_pair = true;
3227                 }
3228                 tcg_debug_assert(ct_str[1] == '\0');
3229                 continue;
3230 
3231             case '&':
3232                 tcg_debug_assert(!input_p);
3233                 args_ct[i].newreg = true;
3234                 ct_str++;
3235                 break;
3236 
3237             case 'p': /* plus */
3238                 /* Allocate to the register after the previous. */
3239                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3240                 o = i - 1;
3241                 tcg_debug_assert(!args_ct[o].pair);
3242                 tcg_debug_assert(!args_ct[o].ct);
3243                 args_ct[i] = (TCGArgConstraint){
3244                     .pair = 2,
3245                     .pair_index = o,
3246                     .regs = args_ct[o].regs << 1,
3247                     .newreg = args_ct[o].newreg,
3248                 };
3249                 args_ct[o].pair = 1;
3250                 args_ct[o].pair_index = i;
3251                 tcg_debug_assert(ct_str[1] == '\0');
3252                 continue;
3253 
3254             case 'm': /* minus */
3255                 /* Allocate to the register before the previous. */
3256                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3257                 o = i - 1;
3258                 tcg_debug_assert(!args_ct[o].pair);
3259                 tcg_debug_assert(!args_ct[o].ct);
3260                 args_ct[i] = (TCGArgConstraint){
3261                     .pair = 1,
3262                     .pair_index = o,
3263                     .regs = args_ct[o].regs >> 1,
3264                     .newreg = args_ct[o].newreg,
3265                 };
3266                 args_ct[o].pair = 2;
3267                 args_ct[o].pair_index = i;
3268                 tcg_debug_assert(ct_str[1] == '\0');
3269                 continue;
3270             }
3271 
3272             do {
3273                 switch (*ct_str) {
3274                 case 'i':
3275                     args_ct[i].ct |= TCG_CT_CONST;
3276                     break;
3277 #ifdef TCG_REG_ZERO
3278                 case 'z':
3279                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3280                     break;
3281 #endif
3282 
3283                 /* Include all of the target-specific constraints. */
3284 
3285 #undef CONST
3286 #define CONST(CASE, MASK) \
3287     case CASE: args_ct[i].ct |= MASK; break;
3288 #define REGS(CASE, MASK) \
3289     case CASE: args_ct[i].regs |= MASK; break;
3290 
3291 #include "tcg-target-con-str.h"
3292 
3293 #undef REGS
3294 #undef CONST
3295                 default:
3296                 case '0' ... '9':
3297                 case '&':
3298                 case 'p':
3299                 case 'm':
3300                     /* Typo in TCGConstraintSet constraint. */
3301                     g_assert_not_reached();
3302                 }
3303             } while (*++ct_str != '\0');
3304         }
3305 
3306         /*
3307          * Fix up output pairs that are aliased with inputs.
3308          * When we created the alias, we copied pair from the output.
3309          * There are three cases:
3310          *    (1a) Pairs of inputs alias pairs of outputs.
3311          *    (1b) One input aliases the first of a pair of outputs.
3312          *    (2)  One input aliases the second of a pair of outputs.
3313          *
3314          * Case 1a is handled by making sure that the pair_index'es are
3315          * properly updated so that they appear the same as a pair of inputs.
3316          *
3317          * Case 1b is handled by setting the pair_index of the input to
3318          * itself, simply so it doesn't point to an unrelated argument.
3319          * Since we don't encounter the "second" during the input allocation
3320          * phase, nothing happens with the second half of the input pair.
3321          *
3322          * Case 2 is handled by setting the second input to pair=3, the
3323          * first output to pair=3, and the pair_index'es to match.
3324          */
3325         if (saw_alias_pair) {
3326             for (int i = nb_oargs; i < nb_args; i++) {
3327                 int o, o2, i2;
3328 
3329                 /*
3330                  * Since [0-9pm] must be alone in the constraint string,
3331                  * the only way they can both be set is if the pair comes
3332                  * from the output alias.
3333                  */
3334                 if (!args_ct[i].ialias) {
3335                     continue;
3336                 }
3337                 switch (args_ct[i].pair) {
3338                 case 0:
3339                     break;
3340                 case 1:
3341                     o = args_ct[i].alias_index;
3342                     o2 = args_ct[o].pair_index;
3343                     tcg_debug_assert(args_ct[o].pair == 1);
3344                     tcg_debug_assert(args_ct[o2].pair == 2);
3345                     if (args_ct[o2].oalias) {
3346                         /* Case 1a */
3347                         i2 = args_ct[o2].alias_index;
3348                         tcg_debug_assert(args_ct[i2].pair == 2);
3349                         args_ct[i2].pair_index = i;
3350                         args_ct[i].pair_index = i2;
3351                     } else {
3352                         /* Case 1b */
3353                         args_ct[i].pair_index = i;
3354                     }
3355                     break;
3356                 case 2:
3357                     o = args_ct[i].alias_index;
3358                     o2 = args_ct[o].pair_index;
3359                     tcg_debug_assert(args_ct[o].pair == 2);
3360                     tcg_debug_assert(args_ct[o2].pair == 1);
3361                     if (args_ct[o2].oalias) {
3362                         /* Case 1a */
3363                         i2 = args_ct[o2].alias_index;
3364                         tcg_debug_assert(args_ct[i2].pair == 1);
3365                         args_ct[i2].pair_index = i;
3366                         args_ct[i].pair_index = i2;
3367                     } else {
3368                         /* Case 2 */
3369                         args_ct[i].pair = 3;
3370                         args_ct[o2].pair = 3;
3371                         args_ct[i].pair_index = o2;
3372                         args_ct[o2].pair_index = i;
3373                     }
3374                     break;
3375                 default:
3376                     g_assert_not_reached();
3377                 }
3378             }
3379         }
3380 
3381         /* sort the constraints (XXX: this is just an heuristic) */
3382         sort_constraints(args_ct, 0, nb_oargs);
3383         sort_constraints(args_ct, nb_oargs, nb_iargs);
3384     }
3385 }
3386 
3387 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3388 {
3389     TCGOpcode opc = op->opc;
3390     TCGType type = TCGOP_TYPE(op);
3391     unsigned flags = TCGOP_FLAGS(op);
3392     const TCGOpDef *def = &tcg_op_defs[opc];
3393     const TCGOutOp *outop = all_outop[opc];
3394     TCGConstraintSetIndex con_set;
3395 
3396     if (def->flags & TCG_OPF_NOT_PRESENT) {
3397         return empty_cts;
3398     }
3399 
3400     if (outop) {
3401         con_set = outop->static_constraint;
3402         if (con_set == C_Dynamic) {
3403             con_set = outop->dynamic_constraint(type, flags);
3404         }
3405     } else {
3406         con_set = tcg_target_op_def(opc, type, flags);
3407     }
3408     tcg_debug_assert(con_set >= 0);
3409     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3410 
3411     /* The constraint arguments must match TCGOpcode arguments. */
3412     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3413     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3414 
3415     return all_cts[con_set];
3416 }
3417 
3418 static void remove_label_use(TCGOp *op, int idx)
3419 {
3420     TCGLabel *label = arg_label(op->args[idx]);
3421     TCGLabelUse *use;
3422 
3423     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3424         if (use->op == op) {
3425             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3426             return;
3427         }
3428     }
3429     g_assert_not_reached();
3430 }
3431 
3432 void tcg_op_remove(TCGContext *s, TCGOp *op)
3433 {
3434     switch (op->opc) {
3435     case INDEX_op_br:
3436         remove_label_use(op, 0);
3437         break;
3438     case INDEX_op_brcond:
3439         remove_label_use(op, 3);
3440         break;
3441     case INDEX_op_brcond2_i32:
3442         remove_label_use(op, 5);
3443         break;
3444     default:
3445         break;
3446     }
3447 
3448     QTAILQ_REMOVE(&s->ops, op, link);
3449     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3450     s->nb_ops--;
3451 }
3452 
3453 void tcg_remove_ops_after(TCGOp *op)
3454 {
3455     TCGContext *s = tcg_ctx;
3456 
3457     while (true) {
3458         TCGOp *last = tcg_last_op();
3459         if (last == op) {
3460             return;
3461         }
3462         tcg_op_remove(s, last);
3463     }
3464 }
3465 
3466 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3467 {
3468     TCGContext *s = tcg_ctx;
3469     TCGOp *op = NULL;
3470 
3471     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3472         QTAILQ_FOREACH(op, &s->free_ops, link) {
3473             if (nargs <= op->nargs) {
3474                 QTAILQ_REMOVE(&s->free_ops, op, link);
3475                 nargs = op->nargs;
3476                 goto found;
3477             }
3478         }
3479     }
3480 
3481     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3482     nargs = MAX(4, nargs);
3483     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3484 
3485  found:
3486     memset(op, 0, offsetof(TCGOp, link));
3487     op->opc = opc;
3488     op->nargs = nargs;
3489 
3490     /* Check for bitfield overflow. */
3491     tcg_debug_assert(op->nargs == nargs);
3492 
3493     s->nb_ops++;
3494     return op;
3495 }
3496 
3497 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3498 {
3499     TCGOp *op = tcg_op_alloc(opc, nargs);
3500 
3501     if (tcg_ctx->emit_before_op) {
3502         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3503     } else {
3504         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3505     }
3506     return op;
3507 }
3508 
3509 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3510                             TCGOpcode opc, TCGType type, unsigned nargs)
3511 {
3512     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3513 
3514     TCGOP_TYPE(new_op) = type;
3515     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3516     return new_op;
3517 }
3518 
3519 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3520                            TCGOpcode opc, TCGType type, unsigned nargs)
3521 {
3522     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3523 
3524     TCGOP_TYPE(new_op) = type;
3525     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3526     return new_op;
3527 }
3528 
3529 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3530 {
3531     TCGLabelUse *u;
3532 
3533     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3534         TCGOp *op = u->op;
3535         switch (op->opc) {
3536         case INDEX_op_br:
3537             op->args[0] = label_arg(to);
3538             break;
3539         case INDEX_op_brcond:
3540             op->args[3] = label_arg(to);
3541             break;
3542         case INDEX_op_brcond2_i32:
3543             op->args[5] = label_arg(to);
3544             break;
3545         default:
3546             g_assert_not_reached();
3547         }
3548     }
3549 
3550     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3551 }
3552 
3553 /* Reachable analysis : remove unreachable code.  */
3554 static void __attribute__((noinline))
3555 reachable_code_pass(TCGContext *s)
3556 {
3557     TCGOp *op, *op_next, *op_prev;
3558     bool dead = false;
3559 
3560     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3561         bool remove = dead;
3562         TCGLabel *label;
3563 
3564         switch (op->opc) {
3565         case INDEX_op_set_label:
3566             label = arg_label(op->args[0]);
3567 
3568             /*
3569              * Note that the first op in the TB is always a load,
3570              * so there is always something before a label.
3571              */
3572             op_prev = QTAILQ_PREV(op, link);
3573 
3574             /*
3575              * If we find two sequential labels, move all branches to
3576              * reference the second label and remove the first label.
3577              * Do this before branch to next optimization, so that the
3578              * middle label is out of the way.
3579              */
3580             if (op_prev->opc == INDEX_op_set_label) {
3581                 move_label_uses(label, arg_label(op_prev->args[0]));
3582                 tcg_op_remove(s, op_prev);
3583                 op_prev = QTAILQ_PREV(op, link);
3584             }
3585 
3586             /*
3587              * Optimization can fold conditional branches to unconditional.
3588              * If we find a label which is preceded by an unconditional
3589              * branch to next, remove the branch.  We couldn't do this when
3590              * processing the branch because any dead code between the branch
3591              * and label had not yet been removed.
3592              */
3593             if (op_prev->opc == INDEX_op_br &&
3594                 label == arg_label(op_prev->args[0])) {
3595                 tcg_op_remove(s, op_prev);
3596                 /* Fall through means insns become live again.  */
3597                 dead = false;
3598             }
3599 
3600             if (QSIMPLEQ_EMPTY(&label->branches)) {
3601                 /*
3602                  * While there is an occasional backward branch, virtually
3603                  * all branches generated by the translators are forward.
3604                  * Which means that generally we will have already removed
3605                  * all references to the label that will be, and there is
3606                  * little to be gained by iterating.
3607                  */
3608                 remove = true;
3609             } else {
3610                 /* Once we see a label, insns become live again.  */
3611                 dead = false;
3612                 remove = false;
3613             }
3614             break;
3615 
3616         case INDEX_op_br:
3617         case INDEX_op_exit_tb:
3618         case INDEX_op_goto_ptr:
3619             /* Unconditional branches; everything following is dead.  */
3620             dead = true;
3621             break;
3622 
3623         case INDEX_op_call:
3624             /* Notice noreturn helper calls, raising exceptions.  */
3625             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3626                 dead = true;
3627             }
3628             break;
3629 
3630         case INDEX_op_insn_start:
3631             /* Never remove -- we need to keep these for unwind.  */
3632             remove = false;
3633             break;
3634 
3635         default:
3636             break;
3637         }
3638 
3639         if (remove) {
3640             tcg_op_remove(s, op);
3641         }
3642     }
3643 }
3644 
3645 #define TS_DEAD  1
3646 #define TS_MEM   2
3647 
3648 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3649 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3650 
3651 /* For liveness_pass_1, the register preferences for a given temp.  */
3652 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3653 {
3654     return ts->state_ptr;
3655 }
3656 
3657 /* For liveness_pass_1, reset the preferences for a given temp to the
3658  * maximal regset for its type.
3659  */
3660 static inline void la_reset_pref(TCGTemp *ts)
3661 {
3662     *la_temp_pref(ts)
3663         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3664 }
3665 
3666 /* liveness analysis: end of function: all temps are dead, and globals
3667    should be in memory. */
3668 static void la_func_end(TCGContext *s, int ng, int nt)
3669 {
3670     int i;
3671 
3672     for (i = 0; i < ng; ++i) {
3673         s->temps[i].state = TS_DEAD | TS_MEM;
3674         la_reset_pref(&s->temps[i]);
3675     }
3676     for (i = ng; i < nt; ++i) {
3677         s->temps[i].state = TS_DEAD;
3678         la_reset_pref(&s->temps[i]);
3679     }
3680 }
3681 
3682 /* liveness analysis: end of basic block: all temps are dead, globals
3683    and local temps should be in memory. */
3684 static void la_bb_end(TCGContext *s, int ng, int nt)
3685 {
3686     int i;
3687 
3688     for (i = 0; i < nt; ++i) {
3689         TCGTemp *ts = &s->temps[i];
3690         int state;
3691 
3692         switch (ts->kind) {
3693         case TEMP_FIXED:
3694         case TEMP_GLOBAL:
3695         case TEMP_TB:
3696             state = TS_DEAD | TS_MEM;
3697             break;
3698         case TEMP_EBB:
3699         case TEMP_CONST:
3700             state = TS_DEAD;
3701             break;
3702         default:
3703             g_assert_not_reached();
3704         }
3705         ts->state = state;
3706         la_reset_pref(ts);
3707     }
3708 }
3709 
3710 /* liveness analysis: sync globals back to memory.  */
3711 static void la_global_sync(TCGContext *s, int ng)
3712 {
3713     int i;
3714 
3715     for (i = 0; i < ng; ++i) {
3716         int state = s->temps[i].state;
3717         s->temps[i].state = state | TS_MEM;
3718         if (state == TS_DEAD) {
3719             /* If the global was previously dead, reset prefs.  */
3720             la_reset_pref(&s->temps[i]);
3721         }
3722     }
3723 }
3724 
3725 /*
3726  * liveness analysis: conditional branch: all temps are dead unless
3727  * explicitly live-across-conditional-branch, globals and local temps
3728  * should be synced.
3729  */
3730 static void la_bb_sync(TCGContext *s, int ng, int nt)
3731 {
3732     la_global_sync(s, ng);
3733 
3734     for (int i = ng; i < nt; ++i) {
3735         TCGTemp *ts = &s->temps[i];
3736         int state;
3737 
3738         switch (ts->kind) {
3739         case TEMP_TB:
3740             state = ts->state;
3741             ts->state = state | TS_MEM;
3742             if (state != TS_DEAD) {
3743                 continue;
3744             }
3745             break;
3746         case TEMP_EBB:
3747         case TEMP_CONST:
3748             continue;
3749         default:
3750             g_assert_not_reached();
3751         }
3752         la_reset_pref(&s->temps[i]);
3753     }
3754 }
3755 
3756 /* liveness analysis: sync globals back to memory and kill.  */
3757 static void la_global_kill(TCGContext *s, int ng)
3758 {
3759     int i;
3760 
3761     for (i = 0; i < ng; i++) {
3762         s->temps[i].state = TS_DEAD | TS_MEM;
3763         la_reset_pref(&s->temps[i]);
3764     }
3765 }
3766 
3767 /* liveness analysis: note live globals crossing calls.  */
3768 static void la_cross_call(TCGContext *s, int nt)
3769 {
3770     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3771     int i;
3772 
3773     for (i = 0; i < nt; i++) {
3774         TCGTemp *ts = &s->temps[i];
3775         if (!(ts->state & TS_DEAD)) {
3776             TCGRegSet *pset = la_temp_pref(ts);
3777             TCGRegSet set = *pset;
3778 
3779             set &= mask;
3780             /* If the combination is not possible, restart.  */
3781             if (set == 0) {
3782                 set = tcg_target_available_regs[ts->type] & mask;
3783             }
3784             *pset = set;
3785         }
3786     }
3787 }
3788 
3789 /*
3790  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3791  * to TEMP_EBB, if possible.
3792  */
3793 static void __attribute__((noinline))
3794 liveness_pass_0(TCGContext *s)
3795 {
3796     void * const multiple_ebb = (void *)(uintptr_t)-1;
3797     int nb_temps = s->nb_temps;
3798     TCGOp *op, *ebb;
3799 
3800     for (int i = s->nb_globals; i < nb_temps; ++i) {
3801         s->temps[i].state_ptr = NULL;
3802     }
3803 
3804     /*
3805      * Represent each EBB by the op at which it begins.  In the case of
3806      * the first EBB, this is the first op, otherwise it is a label.
3807      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3808      * within a single EBB, else MULTIPLE_EBB.
3809      */
3810     ebb = QTAILQ_FIRST(&s->ops);
3811     QTAILQ_FOREACH(op, &s->ops, link) {
3812         const TCGOpDef *def;
3813         int nb_oargs, nb_iargs;
3814 
3815         switch (op->opc) {
3816         case INDEX_op_set_label:
3817             ebb = op;
3818             continue;
3819         case INDEX_op_discard:
3820             continue;
3821         case INDEX_op_call:
3822             nb_oargs = TCGOP_CALLO(op);
3823             nb_iargs = TCGOP_CALLI(op);
3824             break;
3825         default:
3826             def = &tcg_op_defs[op->opc];
3827             nb_oargs = def->nb_oargs;
3828             nb_iargs = def->nb_iargs;
3829             break;
3830         }
3831 
3832         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3833             TCGTemp *ts = arg_temp(op->args[i]);
3834 
3835             if (ts->kind != TEMP_TB) {
3836                 continue;
3837             }
3838             if (ts->state_ptr == NULL) {
3839                 ts->state_ptr = ebb;
3840             } else if (ts->state_ptr != ebb) {
3841                 ts->state_ptr = multiple_ebb;
3842             }
3843         }
3844     }
3845 
3846     /*
3847      * For TEMP_TB that turned out not to be used beyond one EBB,
3848      * reduce the liveness to TEMP_EBB.
3849      */
3850     for (int i = s->nb_globals; i < nb_temps; ++i) {
3851         TCGTemp *ts = &s->temps[i];
3852         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3853             ts->kind = TEMP_EBB;
3854         }
3855     }
3856 }
3857 
3858 /* Liveness analysis : update the opc_arg_life array to tell if a
3859    given input arguments is dead. Instructions updating dead
3860    temporaries are removed. */
3861 static void __attribute__((noinline))
3862 liveness_pass_1(TCGContext *s)
3863 {
3864     int nb_globals = s->nb_globals;
3865     int nb_temps = s->nb_temps;
3866     TCGOp *op, *op_prev;
3867     TCGRegSet *prefs;
3868     int i;
3869 
3870     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3871     for (i = 0; i < nb_temps; ++i) {
3872         s->temps[i].state_ptr = prefs + i;
3873     }
3874 
3875     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3876     la_func_end(s, nb_globals, nb_temps);
3877 
3878     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3879         int nb_iargs, nb_oargs;
3880         TCGOpcode opc_new, opc_new2;
3881         TCGLifeData arg_life = 0;
3882         TCGTemp *ts;
3883         TCGOpcode opc = op->opc;
3884         const TCGOpDef *def = &tcg_op_defs[opc];
3885         const TCGArgConstraint *args_ct;
3886 
3887         switch (opc) {
3888         case INDEX_op_call:
3889             {
3890                 const TCGHelperInfo *info = tcg_call_info(op);
3891                 int call_flags = tcg_call_flags(op);
3892 
3893                 nb_oargs = TCGOP_CALLO(op);
3894                 nb_iargs = TCGOP_CALLI(op);
3895 
3896                 /* pure functions can be removed if their result is unused */
3897                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3898                     for (i = 0; i < nb_oargs; i++) {
3899                         ts = arg_temp(op->args[i]);
3900                         if (ts->state != TS_DEAD) {
3901                             goto do_not_remove_call;
3902                         }
3903                     }
3904                     goto do_remove;
3905                 }
3906             do_not_remove_call:
3907 
3908                 /* Output args are dead.  */
3909                 for (i = 0; i < nb_oargs; i++) {
3910                     ts = arg_temp(op->args[i]);
3911                     if (ts->state & TS_DEAD) {
3912                         arg_life |= DEAD_ARG << i;
3913                     }
3914                     if (ts->state & TS_MEM) {
3915                         arg_life |= SYNC_ARG << i;
3916                     }
3917                     ts->state = TS_DEAD;
3918                     la_reset_pref(ts);
3919                 }
3920 
3921                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3922                 memset(op->output_pref, 0, sizeof(op->output_pref));
3923 
3924                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3925                                     TCG_CALL_NO_READ_GLOBALS))) {
3926                     la_global_kill(s, nb_globals);
3927                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3928                     la_global_sync(s, nb_globals);
3929                 }
3930 
3931                 /* Record arguments that die in this helper.  */
3932                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3933                     ts = arg_temp(op->args[i]);
3934                     if (ts->state & TS_DEAD) {
3935                         arg_life |= DEAD_ARG << i;
3936                     }
3937                 }
3938 
3939                 /* For all live registers, remove call-clobbered prefs.  */
3940                 la_cross_call(s, nb_temps);
3941 
3942                 /*
3943                  * Input arguments are live for preceding opcodes.
3944                  *
3945                  * For those arguments that die, and will be allocated in
3946                  * registers, clear the register set for that arg, to be
3947                  * filled in below.  For args that will be on the stack,
3948                  * reset to any available reg.  Process arguments in reverse
3949                  * order so that if a temp is used more than once, the stack
3950                  * reset to max happens before the register reset to 0.
3951                  */
3952                 for (i = nb_iargs - 1; i >= 0; i--) {
3953                     const TCGCallArgumentLoc *loc = &info->in[i];
3954                     ts = arg_temp(op->args[nb_oargs + i]);
3955 
3956                     if (ts->state & TS_DEAD) {
3957                         switch (loc->kind) {
3958                         case TCG_CALL_ARG_NORMAL:
3959                         case TCG_CALL_ARG_EXTEND_U:
3960                         case TCG_CALL_ARG_EXTEND_S:
3961                             if (arg_slot_reg_p(loc->arg_slot)) {
3962                                 *la_temp_pref(ts) = 0;
3963                                 break;
3964                             }
3965                             /* fall through */
3966                         default:
3967                             *la_temp_pref(ts) =
3968                                 tcg_target_available_regs[ts->type];
3969                             break;
3970                         }
3971                         ts->state &= ~TS_DEAD;
3972                     }
3973                 }
3974 
3975                 /*
3976                  * For each input argument, add its input register to prefs.
3977                  * If a temp is used once, this produces a single set bit;
3978                  * if a temp is used multiple times, this produces a set.
3979                  */
3980                 for (i = 0; i < nb_iargs; i++) {
3981                     const TCGCallArgumentLoc *loc = &info->in[i];
3982                     ts = arg_temp(op->args[nb_oargs + i]);
3983 
3984                     switch (loc->kind) {
3985                     case TCG_CALL_ARG_NORMAL:
3986                     case TCG_CALL_ARG_EXTEND_U:
3987                     case TCG_CALL_ARG_EXTEND_S:
3988                         if (arg_slot_reg_p(loc->arg_slot)) {
3989                             tcg_regset_set_reg(*la_temp_pref(ts),
3990                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3991                         }
3992                         break;
3993                     default:
3994                         break;
3995                     }
3996                 }
3997             }
3998             break;
3999         case INDEX_op_insn_start:
4000             break;
4001         case INDEX_op_discard:
4002             /* mark the temporary as dead */
4003             ts = arg_temp(op->args[0]);
4004             ts->state = TS_DEAD;
4005             la_reset_pref(ts);
4006             break;
4007 
4008         case INDEX_op_add2_i32:
4009         case INDEX_op_add2_i64:
4010             opc_new = INDEX_op_add;
4011             goto do_addsub2;
4012         case INDEX_op_sub2_i32:
4013         case INDEX_op_sub2_i64:
4014             opc_new = INDEX_op_sub;
4015         do_addsub2:
4016             nb_iargs = 4;
4017             nb_oargs = 2;
4018             /* Test if the high part of the operation is dead, but not
4019                the low part.  The result can be optimized to a simple
4020                add or sub.  This happens often for x86_64 guest when the
4021                cpu mode is set to 32 bit.  */
4022             if (arg_temp(op->args[1])->state == TS_DEAD) {
4023                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4024                     goto do_remove;
4025                 }
4026                 /* Replace the opcode and adjust the args in place,
4027                    leaving 3 unused args at the end.  */
4028                 op->opc = opc = opc_new;
4029                 op->args[1] = op->args[2];
4030                 op->args[2] = op->args[4];
4031                 /* Fall through and mark the single-word operation live.  */
4032                 nb_iargs = 2;
4033                 nb_oargs = 1;
4034             }
4035             goto do_not_remove;
4036 
4037         case INDEX_op_muls2:
4038             opc_new = INDEX_op_mul;
4039             opc_new2 = INDEX_op_mulsh;
4040             goto do_mul2;
4041         case INDEX_op_mulu2:
4042             opc_new = INDEX_op_mul;
4043             opc_new2 = INDEX_op_muluh;
4044         do_mul2:
4045             nb_iargs = 2;
4046             nb_oargs = 2;
4047             if (arg_temp(op->args[1])->state == TS_DEAD) {
4048                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4049                     /* Both parts of the operation are dead.  */
4050                     goto do_remove;
4051                 }
4052                 /* The high part of the operation is dead; generate the low. */
4053                 op->opc = opc = opc_new;
4054                 op->args[1] = op->args[2];
4055                 op->args[2] = op->args[3];
4056             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4057                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4058                 /* The low part of the operation is dead; generate the high. */
4059                 op->opc = opc = opc_new2;
4060                 op->args[0] = op->args[1];
4061                 op->args[1] = op->args[2];
4062                 op->args[2] = op->args[3];
4063             } else {
4064                 goto do_not_remove;
4065             }
4066             /* Mark the single-word operation live.  */
4067             nb_oargs = 1;
4068             goto do_not_remove;
4069 
4070         default:
4071             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4072             nb_iargs = def->nb_iargs;
4073             nb_oargs = def->nb_oargs;
4074 
4075             /* Test if the operation can be removed because all
4076                its outputs are dead. We assume that nb_oargs == 0
4077                implies side effects */
4078             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4079                 for (i = 0; i < nb_oargs; i++) {
4080                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4081                         goto do_not_remove;
4082                     }
4083                 }
4084                 goto do_remove;
4085             }
4086             goto do_not_remove;
4087 
4088         do_remove:
4089             tcg_op_remove(s, op);
4090             break;
4091 
4092         do_not_remove:
4093             for (i = 0; i < nb_oargs; i++) {
4094                 ts = arg_temp(op->args[i]);
4095 
4096                 /* Remember the preference of the uses that followed.  */
4097                 if (i < ARRAY_SIZE(op->output_pref)) {
4098                     op->output_pref[i] = *la_temp_pref(ts);
4099                 }
4100 
4101                 /* Output args are dead.  */
4102                 if (ts->state & TS_DEAD) {
4103                     arg_life |= DEAD_ARG << i;
4104                 }
4105                 if (ts->state & TS_MEM) {
4106                     arg_life |= SYNC_ARG << i;
4107                 }
4108                 ts->state = TS_DEAD;
4109                 la_reset_pref(ts);
4110             }
4111 
4112             /* If end of basic block, update.  */
4113             if (def->flags & TCG_OPF_BB_EXIT) {
4114                 la_func_end(s, nb_globals, nb_temps);
4115             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4116                 la_bb_sync(s, nb_globals, nb_temps);
4117             } else if (def->flags & TCG_OPF_BB_END) {
4118                 la_bb_end(s, nb_globals, nb_temps);
4119             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4120                 la_global_sync(s, nb_globals);
4121                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4122                     la_cross_call(s, nb_temps);
4123                 }
4124             }
4125 
4126             /* Record arguments that die in this opcode.  */
4127             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4128                 ts = arg_temp(op->args[i]);
4129                 if (ts->state & TS_DEAD) {
4130                     arg_life |= DEAD_ARG << i;
4131                 }
4132             }
4133 
4134             /* Input arguments are live for preceding opcodes.  */
4135             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4136                 ts = arg_temp(op->args[i]);
4137                 if (ts->state & TS_DEAD) {
4138                     /* For operands that were dead, initially allow
4139                        all regs for the type.  */
4140                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4141                     ts->state &= ~TS_DEAD;
4142                 }
4143             }
4144 
4145             /* Incorporate constraints for this operand.  */
4146             switch (opc) {
4147             case INDEX_op_mov:
4148                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4149                    have proper constraints.  That said, special case
4150                    moves to propagate preferences backward.  */
4151                 if (IS_DEAD_ARG(1)) {
4152                     *la_temp_pref(arg_temp(op->args[0]))
4153                         = *la_temp_pref(arg_temp(op->args[1]));
4154                 }
4155                 break;
4156 
4157             default:
4158                 args_ct = opcode_args_ct(op);
4159                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4160                     const TCGArgConstraint *ct = &args_ct[i];
4161                     TCGRegSet set, *pset;
4162 
4163                     ts = arg_temp(op->args[i]);
4164                     pset = la_temp_pref(ts);
4165                     set = *pset;
4166 
4167                     set &= ct->regs;
4168                     if (ct->ialias) {
4169                         set &= output_pref(op, ct->alias_index);
4170                     }
4171                     /* If the combination is not possible, restart.  */
4172                     if (set == 0) {
4173                         set = ct->regs;
4174                     }
4175                     *pset = set;
4176                 }
4177                 break;
4178             }
4179             break;
4180         }
4181         op->life = arg_life;
4182     }
4183 }
4184 
4185 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4186 static bool __attribute__((noinline))
4187 liveness_pass_2(TCGContext *s)
4188 {
4189     int nb_globals = s->nb_globals;
4190     int nb_temps, i;
4191     bool changes = false;
4192     TCGOp *op, *op_next;
4193 
4194     /* Create a temporary for each indirect global.  */
4195     for (i = 0; i < nb_globals; ++i) {
4196         TCGTemp *its = &s->temps[i];
4197         if (its->indirect_reg) {
4198             TCGTemp *dts = tcg_temp_alloc(s);
4199             dts->type = its->type;
4200             dts->base_type = its->base_type;
4201             dts->temp_subindex = its->temp_subindex;
4202             dts->kind = TEMP_EBB;
4203             its->state_ptr = dts;
4204         } else {
4205             its->state_ptr = NULL;
4206         }
4207         /* All globals begin dead.  */
4208         its->state = TS_DEAD;
4209     }
4210     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4211         TCGTemp *its = &s->temps[i];
4212         its->state_ptr = NULL;
4213         its->state = TS_DEAD;
4214     }
4215 
4216     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4217         TCGOpcode opc = op->opc;
4218         const TCGOpDef *def = &tcg_op_defs[opc];
4219         TCGLifeData arg_life = op->life;
4220         int nb_iargs, nb_oargs, call_flags;
4221         TCGTemp *arg_ts, *dir_ts;
4222 
4223         if (opc == INDEX_op_call) {
4224             nb_oargs = TCGOP_CALLO(op);
4225             nb_iargs = TCGOP_CALLI(op);
4226             call_flags = tcg_call_flags(op);
4227         } else {
4228             nb_iargs = def->nb_iargs;
4229             nb_oargs = def->nb_oargs;
4230 
4231             /* Set flags similar to how calls require.  */
4232             if (def->flags & TCG_OPF_COND_BRANCH) {
4233                 /* Like reading globals: sync_globals */
4234                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4235             } else if (def->flags & TCG_OPF_BB_END) {
4236                 /* Like writing globals: save_globals */
4237                 call_flags = 0;
4238             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4239                 /* Like reading globals: sync_globals */
4240                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4241             } else {
4242                 /* No effect on globals.  */
4243                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4244                               TCG_CALL_NO_WRITE_GLOBALS);
4245             }
4246         }
4247 
4248         /* Make sure that input arguments are available.  */
4249         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4250             arg_ts = arg_temp(op->args[i]);
4251             dir_ts = arg_ts->state_ptr;
4252             if (dir_ts && arg_ts->state == TS_DEAD) {
4253                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4254                                   ? INDEX_op_ld_i32
4255                                   : INDEX_op_ld_i64);
4256                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4257                                                   arg_ts->type, 3);
4258 
4259                 lop->args[0] = temp_arg(dir_ts);
4260                 lop->args[1] = temp_arg(arg_ts->mem_base);
4261                 lop->args[2] = arg_ts->mem_offset;
4262 
4263                 /* Loaded, but synced with memory.  */
4264                 arg_ts->state = TS_MEM;
4265             }
4266         }
4267 
4268         /* Perform input replacement, and mark inputs that became dead.
4269            No action is required except keeping temp_state up to date
4270            so that we reload when needed.  */
4271         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4272             arg_ts = arg_temp(op->args[i]);
4273             dir_ts = arg_ts->state_ptr;
4274             if (dir_ts) {
4275                 op->args[i] = temp_arg(dir_ts);
4276                 changes = true;
4277                 if (IS_DEAD_ARG(i)) {
4278                     arg_ts->state = TS_DEAD;
4279                 }
4280             }
4281         }
4282 
4283         /* Liveness analysis should ensure that the following are
4284            all correct, for call sites and basic block end points.  */
4285         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4286             /* Nothing to do */
4287         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4288             for (i = 0; i < nb_globals; ++i) {
4289                 /* Liveness should see that globals are synced back,
4290                    that is, either TS_DEAD or TS_MEM.  */
4291                 arg_ts = &s->temps[i];
4292                 tcg_debug_assert(arg_ts->state_ptr == 0
4293                                  || arg_ts->state != 0);
4294             }
4295         } else {
4296             for (i = 0; i < nb_globals; ++i) {
4297                 /* Liveness should see that globals are saved back,
4298                    that is, TS_DEAD, waiting to be reloaded.  */
4299                 arg_ts = &s->temps[i];
4300                 tcg_debug_assert(arg_ts->state_ptr == 0
4301                                  || arg_ts->state == TS_DEAD);
4302             }
4303         }
4304 
4305         /* Outputs become available.  */
4306         if (opc == INDEX_op_mov) {
4307             arg_ts = arg_temp(op->args[0]);
4308             dir_ts = arg_ts->state_ptr;
4309             if (dir_ts) {
4310                 op->args[0] = temp_arg(dir_ts);
4311                 changes = true;
4312 
4313                 /* The output is now live and modified.  */
4314                 arg_ts->state = 0;
4315 
4316                 if (NEED_SYNC_ARG(0)) {
4317                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4318                                       ? INDEX_op_st_i32
4319                                       : INDEX_op_st_i64);
4320                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4321                                                      arg_ts->type, 3);
4322                     TCGTemp *out_ts = dir_ts;
4323 
4324                     if (IS_DEAD_ARG(0)) {
4325                         out_ts = arg_temp(op->args[1]);
4326                         arg_ts->state = TS_DEAD;
4327                         tcg_op_remove(s, op);
4328                     } else {
4329                         arg_ts->state = TS_MEM;
4330                     }
4331 
4332                     sop->args[0] = temp_arg(out_ts);
4333                     sop->args[1] = temp_arg(arg_ts->mem_base);
4334                     sop->args[2] = arg_ts->mem_offset;
4335                 } else {
4336                     tcg_debug_assert(!IS_DEAD_ARG(0));
4337                 }
4338             }
4339         } else {
4340             for (i = 0; i < nb_oargs; i++) {
4341                 arg_ts = arg_temp(op->args[i]);
4342                 dir_ts = arg_ts->state_ptr;
4343                 if (!dir_ts) {
4344                     continue;
4345                 }
4346                 op->args[i] = temp_arg(dir_ts);
4347                 changes = true;
4348 
4349                 /* The output is now live and modified.  */
4350                 arg_ts->state = 0;
4351 
4352                 /* Sync outputs upon their last write.  */
4353                 if (NEED_SYNC_ARG(i)) {
4354                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4355                                       ? INDEX_op_st_i32
4356                                       : INDEX_op_st_i64);
4357                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4358                                                      arg_ts->type, 3);
4359 
4360                     sop->args[0] = temp_arg(dir_ts);
4361                     sop->args[1] = temp_arg(arg_ts->mem_base);
4362                     sop->args[2] = arg_ts->mem_offset;
4363 
4364                     arg_ts->state = TS_MEM;
4365                 }
4366                 /* Drop outputs that are dead.  */
4367                 if (IS_DEAD_ARG(i)) {
4368                     arg_ts->state = TS_DEAD;
4369                 }
4370             }
4371         }
4372     }
4373 
4374     return changes;
4375 }
4376 
4377 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4378 {
4379     intptr_t off;
4380     int size, align;
4381 
4382     /* When allocating an object, look at the full type. */
4383     size = tcg_type_size(ts->base_type);
4384     switch (ts->base_type) {
4385     case TCG_TYPE_I32:
4386         align = 4;
4387         break;
4388     case TCG_TYPE_I64:
4389     case TCG_TYPE_V64:
4390         align = 8;
4391         break;
4392     case TCG_TYPE_I128:
4393     case TCG_TYPE_V128:
4394     case TCG_TYPE_V256:
4395         /*
4396          * Note that we do not require aligned storage for V256,
4397          * and that we provide alignment for I128 to match V128,
4398          * even if that's above what the host ABI requires.
4399          */
4400         align = 16;
4401         break;
4402     default:
4403         g_assert_not_reached();
4404     }
4405 
4406     /*
4407      * Assume the stack is sufficiently aligned.
4408      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4409      * and do not require 16 byte vector alignment.  This seems slightly
4410      * easier than fully parameterizing the above switch statement.
4411      */
4412     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4413     off = ROUND_UP(s->current_frame_offset, align);
4414 
4415     /* If we've exhausted the stack frame, restart with a smaller TB. */
4416     if (off + size > s->frame_end) {
4417         tcg_raise_tb_overflow(s);
4418     }
4419     s->current_frame_offset = off + size;
4420 #if defined(__sparc__)
4421     off += TCG_TARGET_STACK_BIAS;
4422 #endif
4423 
4424     /* If the object was subdivided, assign memory to all the parts. */
4425     if (ts->base_type != ts->type) {
4426         int part_size = tcg_type_size(ts->type);
4427         int part_count = size / part_size;
4428 
4429         /*
4430          * Each part is allocated sequentially in tcg_temp_new_internal.
4431          * Jump back to the first part by subtracting the current index.
4432          */
4433         ts -= ts->temp_subindex;
4434         for (int i = 0; i < part_count; ++i) {
4435             ts[i].mem_offset = off + i * part_size;
4436             ts[i].mem_base = s->frame_temp;
4437             ts[i].mem_allocated = 1;
4438         }
4439     } else {
4440         ts->mem_offset = off;
4441         ts->mem_base = s->frame_temp;
4442         ts->mem_allocated = 1;
4443     }
4444 }
4445 
4446 /* Assign @reg to @ts, and update reg_to_temp[]. */
4447 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4448 {
4449     if (ts->val_type == TEMP_VAL_REG) {
4450         TCGReg old = ts->reg;
4451         tcg_debug_assert(s->reg_to_temp[old] == ts);
4452         if (old == reg) {
4453             return;
4454         }
4455         s->reg_to_temp[old] = NULL;
4456     }
4457     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4458     s->reg_to_temp[reg] = ts;
4459     ts->val_type = TEMP_VAL_REG;
4460     ts->reg = reg;
4461 }
4462 
4463 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4464 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4465 {
4466     tcg_debug_assert(type != TEMP_VAL_REG);
4467     if (ts->val_type == TEMP_VAL_REG) {
4468         TCGReg reg = ts->reg;
4469         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4470         s->reg_to_temp[reg] = NULL;
4471     }
4472     ts->val_type = type;
4473 }
4474 
4475 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4476 
4477 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4478    mark it free; otherwise mark it dead.  */
4479 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4480 {
4481     TCGTempVal new_type;
4482 
4483     switch (ts->kind) {
4484     case TEMP_FIXED:
4485         return;
4486     case TEMP_GLOBAL:
4487     case TEMP_TB:
4488         new_type = TEMP_VAL_MEM;
4489         break;
4490     case TEMP_EBB:
4491         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4492         break;
4493     case TEMP_CONST:
4494         new_type = TEMP_VAL_CONST;
4495         break;
4496     default:
4497         g_assert_not_reached();
4498     }
4499     set_temp_val_nonreg(s, ts, new_type);
4500 }
4501 
4502 /* Mark a temporary as dead.  */
4503 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4504 {
4505     temp_free_or_dead(s, ts, 1);
4506 }
4507 
4508 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4509    registers needs to be allocated to store a constant.  If 'free_or_dead'
4510    is non-zero, subsequently release the temporary; if it is positive, the
4511    temp is dead; if it is negative, the temp is free.  */
4512 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4513                       TCGRegSet preferred_regs, int free_or_dead)
4514 {
4515     if (!temp_readonly(ts) && !ts->mem_coherent) {
4516         if (!ts->mem_allocated) {
4517             temp_allocate_frame(s, ts);
4518         }
4519         switch (ts->val_type) {
4520         case TEMP_VAL_CONST:
4521             /* If we're going to free the temp immediately, then we won't
4522                require it later in a register, so attempt to store the
4523                constant to memory directly.  */
4524             if (free_or_dead
4525                 && tcg_out_sti(s, ts->type, ts->val,
4526                                ts->mem_base->reg, ts->mem_offset)) {
4527                 break;
4528             }
4529             temp_load(s, ts, tcg_target_available_regs[ts->type],
4530                       allocated_regs, preferred_regs);
4531             /* fallthrough */
4532 
4533         case TEMP_VAL_REG:
4534             tcg_out_st(s, ts->type, ts->reg,
4535                        ts->mem_base->reg, ts->mem_offset);
4536             break;
4537 
4538         case TEMP_VAL_MEM:
4539             break;
4540 
4541         case TEMP_VAL_DEAD:
4542         default:
4543             g_assert_not_reached();
4544         }
4545         ts->mem_coherent = 1;
4546     }
4547     if (free_or_dead) {
4548         temp_free_or_dead(s, ts, free_or_dead);
4549     }
4550 }
4551 
4552 /* free register 'reg' by spilling the corresponding temporary if necessary */
4553 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4554 {
4555     TCGTemp *ts = s->reg_to_temp[reg];
4556     if (ts != NULL) {
4557         temp_sync(s, ts, allocated_regs, 0, -1);
4558     }
4559 }
4560 
4561 /**
4562  * tcg_reg_alloc:
4563  * @required_regs: Set of registers in which we must allocate.
4564  * @allocated_regs: Set of registers which must be avoided.
4565  * @preferred_regs: Set of registers we should prefer.
4566  * @rev: True if we search the registers in "indirect" order.
4567  *
4568  * The allocated register must be in @required_regs & ~@allocated_regs,
4569  * but if we can put it in @preferred_regs we may save a move later.
4570  */
4571 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4572                             TCGRegSet allocated_regs,
4573                             TCGRegSet preferred_regs, bool rev)
4574 {
4575     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4576     TCGRegSet reg_ct[2];
4577     const int *order;
4578 
4579     reg_ct[1] = required_regs & ~allocated_regs;
4580     tcg_debug_assert(reg_ct[1] != 0);
4581     reg_ct[0] = reg_ct[1] & preferred_regs;
4582 
4583     /* Skip the preferred_regs option if it cannot be satisfied,
4584        or if the preference made no difference.  */
4585     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4586 
4587     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4588 
4589     /* Try free registers, preferences first.  */
4590     for (j = f; j < 2; j++) {
4591         TCGRegSet set = reg_ct[j];
4592 
4593         if (tcg_regset_single(set)) {
4594             /* One register in the set.  */
4595             TCGReg reg = tcg_regset_first(set);
4596             if (s->reg_to_temp[reg] == NULL) {
4597                 return reg;
4598             }
4599         } else {
4600             for (i = 0; i < n; i++) {
4601                 TCGReg reg = order[i];
4602                 if (s->reg_to_temp[reg] == NULL &&
4603                     tcg_regset_test_reg(set, reg)) {
4604                     return reg;
4605                 }
4606             }
4607         }
4608     }
4609 
4610     /* We must spill something.  */
4611     for (j = f; j < 2; j++) {
4612         TCGRegSet set = reg_ct[j];
4613 
4614         if (tcg_regset_single(set)) {
4615             /* One register in the set.  */
4616             TCGReg reg = tcg_regset_first(set);
4617             tcg_reg_free(s, reg, allocated_regs);
4618             return reg;
4619         } else {
4620             for (i = 0; i < n; i++) {
4621                 TCGReg reg = order[i];
4622                 if (tcg_regset_test_reg(set, reg)) {
4623                     tcg_reg_free(s, reg, allocated_regs);
4624                     return reg;
4625                 }
4626             }
4627         }
4628     }
4629 
4630     g_assert_not_reached();
4631 }
4632 
4633 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4634                                  TCGRegSet allocated_regs,
4635                                  TCGRegSet preferred_regs, bool rev)
4636 {
4637     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4638     TCGRegSet reg_ct[2];
4639     const int *order;
4640 
4641     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4642     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4643     tcg_debug_assert(reg_ct[1] != 0);
4644     reg_ct[0] = reg_ct[1] & preferred_regs;
4645 
4646     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4647 
4648     /*
4649      * Skip the preferred_regs option if it cannot be satisfied,
4650      * or if the preference made no difference.
4651      */
4652     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4653 
4654     /*
4655      * Minimize the number of flushes by looking for 2 free registers first,
4656      * then a single flush, then two flushes.
4657      */
4658     for (fmin = 2; fmin >= 0; fmin--) {
4659         for (j = k; j < 2; j++) {
4660             TCGRegSet set = reg_ct[j];
4661 
4662             for (i = 0; i < n; i++) {
4663                 TCGReg reg = order[i];
4664 
4665                 if (tcg_regset_test_reg(set, reg)) {
4666                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4667                     if (f >= fmin) {
4668                         tcg_reg_free(s, reg, allocated_regs);
4669                         tcg_reg_free(s, reg + 1, allocated_regs);
4670                         return reg;
4671                     }
4672                 }
4673             }
4674         }
4675     }
4676     g_assert_not_reached();
4677 }
4678 
4679 /* Make sure the temporary is in a register.  If needed, allocate the register
4680    from DESIRED while avoiding ALLOCATED.  */
4681 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4682                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4683 {
4684     TCGReg reg;
4685 
4686     switch (ts->val_type) {
4687     case TEMP_VAL_REG:
4688         return;
4689     case TEMP_VAL_CONST:
4690         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4691                             preferred_regs, ts->indirect_base);
4692         if (ts->type <= TCG_TYPE_I64) {
4693             tcg_out_movi(s, ts->type, reg, ts->val);
4694         } else {
4695             uint64_t val = ts->val;
4696             MemOp vece = MO_64;
4697 
4698             /*
4699              * Find the minimal vector element that matches the constant.
4700              * The targets will, in general, have to do this search anyway,
4701              * do this generically.
4702              */
4703             if (val == dup_const(MO_8, val)) {
4704                 vece = MO_8;
4705             } else if (val == dup_const(MO_16, val)) {
4706                 vece = MO_16;
4707             } else if (val == dup_const(MO_32, val)) {
4708                 vece = MO_32;
4709             }
4710 
4711             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4712         }
4713         ts->mem_coherent = 0;
4714         break;
4715     case TEMP_VAL_MEM:
4716         if (!ts->mem_allocated) {
4717             temp_allocate_frame(s, ts);
4718         }
4719         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4720                             preferred_regs, ts->indirect_base);
4721         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4722         ts->mem_coherent = 1;
4723         break;
4724     case TEMP_VAL_DEAD:
4725     default:
4726         g_assert_not_reached();
4727     }
4728     set_temp_val_reg(s, ts, reg);
4729 }
4730 
4731 /* Save a temporary to memory. 'allocated_regs' is used in case a
4732    temporary registers needs to be allocated to store a constant.  */
4733 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4734 {
4735     /* The liveness analysis already ensures that globals are back
4736        in memory. Keep an tcg_debug_assert for safety. */
4737     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4738 }
4739 
4740 /* save globals to their canonical location and assume they can be
4741    modified be the following code. 'allocated_regs' is used in case a
4742    temporary registers needs to be allocated to store a constant. */
4743 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4744 {
4745     int i, n;
4746 
4747     for (i = 0, n = s->nb_globals; i < n; i++) {
4748         temp_save(s, &s->temps[i], allocated_regs);
4749     }
4750 }
4751 
4752 /* sync globals to their canonical location and assume they can be
4753    read by the following code. 'allocated_regs' is used in case a
4754    temporary registers needs to be allocated to store a constant. */
4755 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4756 {
4757     int i, n;
4758 
4759     for (i = 0, n = s->nb_globals; i < n; i++) {
4760         TCGTemp *ts = &s->temps[i];
4761         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4762                          || ts->kind == TEMP_FIXED
4763                          || ts->mem_coherent);
4764     }
4765 }
4766 
4767 /* at the end of a basic block, we assume all temporaries are dead and
4768    all globals are stored at their canonical location. */
4769 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4770 {
4771     int i;
4772 
4773     for (i = s->nb_globals; i < s->nb_temps; i++) {
4774         TCGTemp *ts = &s->temps[i];
4775 
4776         switch (ts->kind) {
4777         case TEMP_TB:
4778             temp_save(s, ts, allocated_regs);
4779             break;
4780         case TEMP_EBB:
4781             /* The liveness analysis already ensures that temps are dead.
4782                Keep an tcg_debug_assert for safety. */
4783             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4784             break;
4785         case TEMP_CONST:
4786             /* Similarly, we should have freed any allocated register. */
4787             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4788             break;
4789         default:
4790             g_assert_not_reached();
4791         }
4792     }
4793 
4794     save_globals(s, allocated_regs);
4795 }
4796 
4797 /*
4798  * At a conditional branch, we assume all temporaries are dead unless
4799  * explicitly live-across-conditional-branch; all globals and local
4800  * temps are synced to their location.
4801  */
4802 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4803 {
4804     sync_globals(s, allocated_regs);
4805 
4806     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4807         TCGTemp *ts = &s->temps[i];
4808         /*
4809          * The liveness analysis already ensures that temps are dead.
4810          * Keep tcg_debug_asserts for safety.
4811          */
4812         switch (ts->kind) {
4813         case TEMP_TB:
4814             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4815             break;
4816         case TEMP_EBB:
4817         case TEMP_CONST:
4818             break;
4819         default:
4820             g_assert_not_reached();
4821         }
4822     }
4823 }
4824 
4825 /*
4826  * Specialized code generation for INDEX_op_mov_* with a constant.
4827  */
4828 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4829                                   tcg_target_ulong val, TCGLifeData arg_life,
4830                                   TCGRegSet preferred_regs)
4831 {
4832     /* ENV should not be modified.  */
4833     tcg_debug_assert(!temp_readonly(ots));
4834 
4835     /* The movi is not explicitly generated here.  */
4836     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4837     ots->val = val;
4838     ots->mem_coherent = 0;
4839     if (NEED_SYNC_ARG(0)) {
4840         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4841     } else if (IS_DEAD_ARG(0)) {
4842         temp_dead(s, ots);
4843     }
4844 }
4845 
4846 /*
4847  * Specialized code generation for INDEX_op_mov_*.
4848  */
4849 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4850 {
4851     const TCGLifeData arg_life = op->life;
4852     TCGRegSet allocated_regs, preferred_regs;
4853     TCGTemp *ts, *ots;
4854     TCGType otype, itype;
4855     TCGReg oreg, ireg;
4856 
4857     allocated_regs = s->reserved_regs;
4858     preferred_regs = output_pref(op, 0);
4859     ots = arg_temp(op->args[0]);
4860     ts = arg_temp(op->args[1]);
4861 
4862     /* ENV should not be modified.  */
4863     tcg_debug_assert(!temp_readonly(ots));
4864 
4865     /* Note that otype != itype for no-op truncation.  */
4866     otype = ots->type;
4867     itype = ts->type;
4868 
4869     if (ts->val_type == TEMP_VAL_CONST) {
4870         /* propagate constant or generate sti */
4871         tcg_target_ulong val = ts->val;
4872         if (IS_DEAD_ARG(1)) {
4873             temp_dead(s, ts);
4874         }
4875         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4876         return;
4877     }
4878 
4879     /* If the source value is in memory we're going to be forced
4880        to have it in a register in order to perform the copy.  Copy
4881        the SOURCE value into its own register first, that way we
4882        don't have to reload SOURCE the next time it is used. */
4883     if (ts->val_type == TEMP_VAL_MEM) {
4884         temp_load(s, ts, tcg_target_available_regs[itype],
4885                   allocated_regs, preferred_regs);
4886     }
4887     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4888     ireg = ts->reg;
4889 
4890     if (IS_DEAD_ARG(0)) {
4891         /* mov to a non-saved dead register makes no sense (even with
4892            liveness analysis disabled). */
4893         tcg_debug_assert(NEED_SYNC_ARG(0));
4894         if (!ots->mem_allocated) {
4895             temp_allocate_frame(s, ots);
4896         }
4897         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4898         if (IS_DEAD_ARG(1)) {
4899             temp_dead(s, ts);
4900         }
4901         temp_dead(s, ots);
4902         return;
4903     }
4904 
4905     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4906         /*
4907          * The mov can be suppressed.  Kill input first, so that it
4908          * is unlinked from reg_to_temp, then set the output to the
4909          * reg that we saved from the input.
4910          */
4911         temp_dead(s, ts);
4912         oreg = ireg;
4913     } else {
4914         if (ots->val_type == TEMP_VAL_REG) {
4915             oreg = ots->reg;
4916         } else {
4917             /* Make sure to not spill the input register during allocation. */
4918             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4919                                  allocated_regs | ((TCGRegSet)1 << ireg),
4920                                  preferred_regs, ots->indirect_base);
4921         }
4922         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4923             /*
4924              * Cross register class move not supported.
4925              * Store the source register into the destination slot
4926              * and leave the destination temp as TEMP_VAL_MEM.
4927              */
4928             assert(!temp_readonly(ots));
4929             if (!ts->mem_allocated) {
4930                 temp_allocate_frame(s, ots);
4931             }
4932             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4933             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4934             ots->mem_coherent = 1;
4935             return;
4936         }
4937     }
4938     set_temp_val_reg(s, ots, oreg);
4939     ots->mem_coherent = 0;
4940 
4941     if (NEED_SYNC_ARG(0)) {
4942         temp_sync(s, ots, allocated_regs, 0, 0);
4943     }
4944 }
4945 
4946 /*
4947  * Specialized code generation for INDEX_op_dup_vec.
4948  */
4949 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4950 {
4951     const TCGLifeData arg_life = op->life;
4952     TCGRegSet dup_out_regs, dup_in_regs;
4953     const TCGArgConstraint *dup_args_ct;
4954     TCGTemp *its, *ots;
4955     TCGType itype, vtype;
4956     unsigned vece;
4957     int lowpart_ofs;
4958     bool ok;
4959 
4960     ots = arg_temp(op->args[0]);
4961     its = arg_temp(op->args[1]);
4962 
4963     /* ENV should not be modified.  */
4964     tcg_debug_assert(!temp_readonly(ots));
4965 
4966     itype = its->type;
4967     vece = TCGOP_VECE(op);
4968     vtype = TCGOP_TYPE(op);
4969 
4970     if (its->val_type == TEMP_VAL_CONST) {
4971         /* Propagate constant via movi -> dupi.  */
4972         tcg_target_ulong val = its->val;
4973         if (IS_DEAD_ARG(1)) {
4974             temp_dead(s, its);
4975         }
4976         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4977         return;
4978     }
4979 
4980     dup_args_ct = opcode_args_ct(op);
4981     dup_out_regs = dup_args_ct[0].regs;
4982     dup_in_regs = dup_args_ct[1].regs;
4983 
4984     /* Allocate the output register now.  */
4985     if (ots->val_type != TEMP_VAL_REG) {
4986         TCGRegSet allocated_regs = s->reserved_regs;
4987         TCGReg oreg;
4988 
4989         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4990             /* Make sure to not spill the input register. */
4991             tcg_regset_set_reg(allocated_regs, its->reg);
4992         }
4993         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4994                              output_pref(op, 0), ots->indirect_base);
4995         set_temp_val_reg(s, ots, oreg);
4996     }
4997 
4998     switch (its->val_type) {
4999     case TEMP_VAL_REG:
5000         /*
5001          * The dup constriaints must be broad, covering all possible VECE.
5002          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5003          * to fail, indicating that extra moves are required for that case.
5004          */
5005         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5006             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5007                 goto done;
5008             }
5009             /* Try again from memory or a vector input register.  */
5010         }
5011         if (!its->mem_coherent) {
5012             /*
5013              * The input register is not synced, and so an extra store
5014              * would be required to use memory.  Attempt an integer-vector
5015              * register move first.  We do not have a TCGRegSet for this.
5016              */
5017             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5018                 break;
5019             }
5020             /* Sync the temp back to its slot and load from there.  */
5021             temp_sync(s, its, s->reserved_regs, 0, 0);
5022         }
5023         /* fall through */
5024 
5025     case TEMP_VAL_MEM:
5026         lowpart_ofs = 0;
5027         if (HOST_BIG_ENDIAN) {
5028             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5029         }
5030         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5031                              its->mem_offset + lowpart_ofs)) {
5032             goto done;
5033         }
5034         /* Load the input into the destination vector register. */
5035         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5036         break;
5037 
5038     default:
5039         g_assert_not_reached();
5040     }
5041 
5042     /* We now have a vector input register, so dup must succeed. */
5043     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5044     tcg_debug_assert(ok);
5045 
5046  done:
5047     ots->mem_coherent = 0;
5048     if (IS_DEAD_ARG(1)) {
5049         temp_dead(s, its);
5050     }
5051     if (NEED_SYNC_ARG(0)) {
5052         temp_sync(s, ots, s->reserved_regs, 0, 0);
5053     }
5054     if (IS_DEAD_ARG(0)) {
5055         temp_dead(s, ots);
5056     }
5057 }
5058 
5059 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5060 {
5061     const TCGLifeData arg_life = op->life;
5062     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5063     TCGRegSet i_allocated_regs;
5064     TCGRegSet o_allocated_regs;
5065     int i, k, nb_iargs, nb_oargs;
5066     TCGReg reg;
5067     TCGArg arg;
5068     const TCGArgConstraint *args_ct;
5069     const TCGArgConstraint *arg_ct;
5070     TCGTemp *ts;
5071     TCGArg new_args[TCG_MAX_OP_ARGS];
5072     int const_args[TCG_MAX_OP_ARGS];
5073     TCGCond op_cond;
5074 
5075     nb_oargs = def->nb_oargs;
5076     nb_iargs = def->nb_iargs;
5077 
5078     /* copy constants */
5079     memcpy(new_args + nb_oargs + nb_iargs,
5080            op->args + nb_oargs + nb_iargs,
5081            sizeof(TCGArg) * def->nb_cargs);
5082 
5083     i_allocated_regs = s->reserved_regs;
5084     o_allocated_regs = s->reserved_regs;
5085 
5086     switch (op->opc) {
5087     case INDEX_op_brcond:
5088         op_cond = op->args[2];
5089         break;
5090     case INDEX_op_setcond:
5091     case INDEX_op_negsetcond:
5092     case INDEX_op_cmp_vec:
5093         op_cond = op->args[3];
5094         break;
5095     case INDEX_op_brcond2_i32:
5096         op_cond = op->args[4];
5097         break;
5098     case INDEX_op_movcond:
5099     case INDEX_op_setcond2_i32:
5100     case INDEX_op_cmpsel_vec:
5101         op_cond = op->args[5];
5102         break;
5103     default:
5104         /* No condition within opcode. */
5105         op_cond = TCG_COND_ALWAYS;
5106         break;
5107     }
5108 
5109     args_ct = opcode_args_ct(op);
5110 
5111     /* satisfy input constraints */
5112     for (k = 0; k < nb_iargs; k++) {
5113         TCGRegSet i_preferred_regs, i_required_regs;
5114         bool allocate_new_reg, copyto_new_reg;
5115         TCGTemp *ts2;
5116         int i1, i2;
5117 
5118         i = args_ct[nb_oargs + k].sort_index;
5119         arg = op->args[i];
5120         arg_ct = &args_ct[i];
5121         ts = arg_temp(arg);
5122 
5123         if (ts->val_type == TEMP_VAL_CONST) {
5124 #ifdef TCG_REG_ZERO
5125             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5126                 /* Hardware zero register: indicate register via non-const. */
5127                 const_args[i] = 0;
5128                 new_args[i] = TCG_REG_ZERO;
5129                 continue;
5130             }
5131 #endif
5132 
5133             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5134                                        op_cond, TCGOP_VECE(op))) {
5135                 /* constant is OK for instruction */
5136                 const_args[i] = 1;
5137                 new_args[i] = ts->val;
5138                 continue;
5139             }
5140         }
5141 
5142         reg = ts->reg;
5143         i_preferred_regs = 0;
5144         i_required_regs = arg_ct->regs;
5145         allocate_new_reg = false;
5146         copyto_new_reg = false;
5147 
5148         switch (arg_ct->pair) {
5149         case 0: /* not paired */
5150             if (arg_ct->ialias) {
5151                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5152 
5153                 /*
5154                  * If the input is readonly, then it cannot also be an
5155                  * output and aliased to itself.  If the input is not
5156                  * dead after the instruction, we must allocate a new
5157                  * register and move it.
5158                  */
5159                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5160                     || args_ct[arg_ct->alias_index].newreg) {
5161                     allocate_new_reg = true;
5162                 } else if (ts->val_type == TEMP_VAL_REG) {
5163                     /*
5164                      * Check if the current register has already been
5165                      * allocated for another input.
5166                      */
5167                     allocate_new_reg =
5168                         tcg_regset_test_reg(i_allocated_regs, reg);
5169                 }
5170             }
5171             if (!allocate_new_reg) {
5172                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5173                           i_preferred_regs);
5174                 reg = ts->reg;
5175                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5176             }
5177             if (allocate_new_reg) {
5178                 /*
5179                  * Allocate a new register matching the constraint
5180                  * and move the temporary register into it.
5181                  */
5182                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5183                           i_allocated_regs, 0);
5184                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5185                                     i_preferred_regs, ts->indirect_base);
5186                 copyto_new_reg = true;
5187             }
5188             break;
5189 
5190         case 1:
5191             /* First of an input pair; if i1 == i2, the second is an output. */
5192             i1 = i;
5193             i2 = arg_ct->pair_index;
5194             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5195 
5196             /*
5197              * It is easier to default to allocating a new pair
5198              * and to identify a few cases where it's not required.
5199              */
5200             if (arg_ct->ialias) {
5201                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5202                 if (IS_DEAD_ARG(i1) &&
5203                     IS_DEAD_ARG(i2) &&
5204                     !temp_readonly(ts) &&
5205                     ts->val_type == TEMP_VAL_REG &&
5206                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5207                     tcg_regset_test_reg(i_required_regs, reg) &&
5208                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5209                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5210                     (ts2
5211                      ? ts2->val_type == TEMP_VAL_REG &&
5212                        ts2->reg == reg + 1 &&
5213                        !temp_readonly(ts2)
5214                      : s->reg_to_temp[reg + 1] == NULL)) {
5215                     break;
5216                 }
5217             } else {
5218                 /* Without aliasing, the pair must also be an input. */
5219                 tcg_debug_assert(ts2);
5220                 if (ts->val_type == TEMP_VAL_REG &&
5221                     ts2->val_type == TEMP_VAL_REG &&
5222                     ts2->reg == reg + 1 &&
5223                     tcg_regset_test_reg(i_required_regs, reg)) {
5224                     break;
5225                 }
5226             }
5227             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5228                                      0, ts->indirect_base);
5229             goto do_pair;
5230 
5231         case 2: /* pair second */
5232             reg = new_args[arg_ct->pair_index] + 1;
5233             goto do_pair;
5234 
5235         case 3: /* ialias with second output, no first input */
5236             tcg_debug_assert(arg_ct->ialias);
5237             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5238 
5239             if (IS_DEAD_ARG(i) &&
5240                 !temp_readonly(ts) &&
5241                 ts->val_type == TEMP_VAL_REG &&
5242                 reg > 0 &&
5243                 s->reg_to_temp[reg - 1] == NULL &&
5244                 tcg_regset_test_reg(i_required_regs, reg) &&
5245                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5246                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5247                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5248                 break;
5249             }
5250             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5251                                      i_allocated_regs, 0,
5252                                      ts->indirect_base);
5253             tcg_regset_set_reg(i_allocated_regs, reg);
5254             reg += 1;
5255             goto do_pair;
5256 
5257         do_pair:
5258             /*
5259              * If an aliased input is not dead after the instruction,
5260              * we must allocate a new register and move it.
5261              */
5262             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5263                 TCGRegSet t_allocated_regs = i_allocated_regs;
5264 
5265                 /*
5266                  * Because of the alias, and the continued life, make sure
5267                  * that the temp is somewhere *other* than the reg pair,
5268                  * and we get a copy in reg.
5269                  */
5270                 tcg_regset_set_reg(t_allocated_regs, reg);
5271                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5272                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5273                     /* If ts was already in reg, copy it somewhere else. */
5274                     TCGReg nr;
5275                     bool ok;
5276 
5277                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5278                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5279                                        t_allocated_regs, 0, ts->indirect_base);
5280                     ok = tcg_out_mov(s, ts->type, nr, reg);
5281                     tcg_debug_assert(ok);
5282 
5283                     set_temp_val_reg(s, ts, nr);
5284                 } else {
5285                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5286                               t_allocated_regs, 0);
5287                     copyto_new_reg = true;
5288                 }
5289             } else {
5290                 /* Preferably allocate to reg, otherwise copy. */
5291                 i_required_regs = (TCGRegSet)1 << reg;
5292                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5293                           i_preferred_regs);
5294                 copyto_new_reg = ts->reg != reg;
5295             }
5296             break;
5297 
5298         default:
5299             g_assert_not_reached();
5300         }
5301 
5302         if (copyto_new_reg) {
5303             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5304                 /*
5305                  * Cross register class move not supported.  Sync the
5306                  * temp back to its slot and load from there.
5307                  */
5308                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5309                 tcg_out_ld(s, ts->type, reg,
5310                            ts->mem_base->reg, ts->mem_offset);
5311             }
5312         }
5313         new_args[i] = reg;
5314         const_args[i] = 0;
5315         tcg_regset_set_reg(i_allocated_regs, reg);
5316     }
5317 
5318     /* mark dead temporaries and free the associated registers */
5319     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5320         if (IS_DEAD_ARG(i)) {
5321             temp_dead(s, arg_temp(op->args[i]));
5322         }
5323     }
5324 
5325     if (def->flags & TCG_OPF_COND_BRANCH) {
5326         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5327     } else if (def->flags & TCG_OPF_BB_END) {
5328         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5329     } else {
5330         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5331             /* XXX: permit generic clobber register list ? */
5332             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5333                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5334                     tcg_reg_free(s, i, i_allocated_regs);
5335                 }
5336             }
5337         }
5338         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5339             /* sync globals if the op has side effects and might trigger
5340                an exception. */
5341             sync_globals(s, i_allocated_regs);
5342         }
5343 
5344         /* satisfy the output constraints */
5345         for (k = 0; k < nb_oargs; k++) {
5346             i = args_ct[k].sort_index;
5347             arg = op->args[i];
5348             arg_ct = &args_ct[i];
5349             ts = arg_temp(arg);
5350 
5351             /* ENV should not be modified.  */
5352             tcg_debug_assert(!temp_readonly(ts));
5353 
5354             switch (arg_ct->pair) {
5355             case 0: /* not paired */
5356                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5357                     reg = new_args[arg_ct->alias_index];
5358                 } else if (arg_ct->newreg) {
5359                     reg = tcg_reg_alloc(s, arg_ct->regs,
5360                                         i_allocated_regs | o_allocated_regs,
5361                                         output_pref(op, k), ts->indirect_base);
5362                 } else {
5363                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5364                                         output_pref(op, k), ts->indirect_base);
5365                 }
5366                 break;
5367 
5368             case 1: /* first of pair */
5369                 if (arg_ct->oalias) {
5370                     reg = new_args[arg_ct->alias_index];
5371                 } else if (arg_ct->newreg) {
5372                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5373                                              i_allocated_regs | o_allocated_regs,
5374                                              output_pref(op, k),
5375                                              ts->indirect_base);
5376                 } else {
5377                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5378                                              output_pref(op, k),
5379                                              ts->indirect_base);
5380                 }
5381                 break;
5382 
5383             case 2: /* second of pair */
5384                 if (arg_ct->oalias) {
5385                     reg = new_args[arg_ct->alias_index];
5386                 } else {
5387                     reg = new_args[arg_ct->pair_index] + 1;
5388                 }
5389                 break;
5390 
5391             case 3: /* first of pair, aliasing with a second input */
5392                 tcg_debug_assert(!arg_ct->newreg);
5393                 reg = new_args[arg_ct->pair_index] - 1;
5394                 break;
5395 
5396             default:
5397                 g_assert_not_reached();
5398             }
5399             tcg_regset_set_reg(o_allocated_regs, reg);
5400             set_temp_val_reg(s, ts, reg);
5401             ts->mem_coherent = 0;
5402             new_args[i] = reg;
5403         }
5404     }
5405 
5406     /* emit instruction */
5407     TCGType type = TCGOP_TYPE(op);
5408     switch (op->opc) {
5409     case INDEX_op_ext_i32_i64:
5410         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5411         break;
5412     case INDEX_op_extu_i32_i64:
5413         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5414         break;
5415     case INDEX_op_extrl_i64_i32:
5416         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5417         break;
5418 
5419     case INDEX_op_add:
5420     case INDEX_op_and:
5421     case INDEX_op_andc:
5422     case INDEX_op_clz:
5423     case INDEX_op_ctz:
5424     case INDEX_op_divs:
5425     case INDEX_op_divu:
5426     case INDEX_op_eqv:
5427     case INDEX_op_mul:
5428     case INDEX_op_mulsh:
5429     case INDEX_op_muluh:
5430     case INDEX_op_nand:
5431     case INDEX_op_nor:
5432     case INDEX_op_or:
5433     case INDEX_op_orc:
5434     case INDEX_op_rems:
5435     case INDEX_op_remu:
5436     case INDEX_op_rotl:
5437     case INDEX_op_rotr:
5438     case INDEX_op_sar:
5439     case INDEX_op_shl:
5440     case INDEX_op_shr:
5441     case INDEX_op_xor:
5442         {
5443             const TCGOutOpBinary *out =
5444                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5445 
5446             /* Constants should never appear in the first source operand. */
5447             tcg_debug_assert(!const_args[1]);
5448             if (const_args[2]) {
5449                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5450             } else {
5451                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5452             }
5453         }
5454         break;
5455 
5456     case INDEX_op_sub:
5457         {
5458             const TCGOutOpSubtract *out = &outop_sub;
5459 
5460             /*
5461              * Constants should never appear in the second source operand.
5462              * These are folded to add with negative constant.
5463              */
5464             tcg_debug_assert(!const_args[2]);
5465             if (const_args[1]) {
5466                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5467             } else {
5468                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5469             }
5470         }
5471         break;
5472 
5473     case INDEX_op_ctpop:
5474     case INDEX_op_neg:
5475     case INDEX_op_not:
5476         {
5477             const TCGOutOpUnary *out =
5478                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5479 
5480             /* Constants should have been folded. */
5481             tcg_debug_assert(!const_args[1]);
5482             out->out_rr(s, type, new_args[0], new_args[1]);
5483         }
5484         break;
5485 
5486     case INDEX_op_bswap16:
5487     case INDEX_op_bswap32:
5488         {
5489             const TCGOutOpBswap *out =
5490                 container_of(all_outop[op->opc], TCGOutOpBswap, base);
5491 
5492             tcg_debug_assert(!const_args[1]);
5493             out->out_rr(s, type, new_args[0], new_args[1], new_args[2]);
5494         }
5495         break;
5496 
5497     case INDEX_op_divs2:
5498     case INDEX_op_divu2:
5499         {
5500             const TCGOutOpDivRem *out =
5501                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5502 
5503             /* Only used by x86 and s390x, which use matching constraints. */
5504             tcg_debug_assert(new_args[0] == new_args[2]);
5505             tcg_debug_assert(new_args[1] == new_args[3]);
5506             tcg_debug_assert(!const_args[4]);
5507             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5508         }
5509         break;
5510 
5511     case INDEX_op_muls2:
5512     case INDEX_op_mulu2:
5513         {
5514             const TCGOutOpMul2 *out =
5515                 container_of(all_outop[op->opc], TCGOutOpMul2, base);
5516 
5517             tcg_debug_assert(!const_args[2]);
5518             tcg_debug_assert(!const_args[3]);
5519             out->out_rrrr(s, type, new_args[0], new_args[1],
5520                           new_args[2], new_args[3]);
5521         }
5522         break;
5523 
5524     case INDEX_op_brcond:
5525         {
5526             const TCGOutOpBrcond *out = &outop_brcond;
5527             TCGCond cond = new_args[2];
5528             TCGLabel *label = arg_label(new_args[3]);
5529 
5530             tcg_debug_assert(!const_args[0]);
5531             if (const_args[1]) {
5532                 out->out_ri(s, type, cond, new_args[0], new_args[1], label);
5533             } else {
5534                 out->out_rr(s, type, cond, new_args[0], new_args[1], label);
5535             }
5536         }
5537         break;
5538 
5539     case INDEX_op_movcond:
5540         {
5541             const TCGOutOpMovcond *out = &outop_movcond;
5542             TCGCond cond = new_args[5];
5543 
5544             tcg_debug_assert(!const_args[1]);
5545             out->out(s, type, cond, new_args[0],
5546                      new_args[1], new_args[2], const_args[2],
5547                      new_args[3], const_args[3],
5548                      new_args[4], const_args[4]);
5549         }
5550         break;
5551 
5552     case INDEX_op_setcond:
5553     case INDEX_op_negsetcond:
5554         {
5555             const TCGOutOpSetcond *out =
5556                 container_of(all_outop[op->opc], TCGOutOpSetcond, base);
5557             TCGCond cond = new_args[3];
5558 
5559             tcg_debug_assert(!const_args[1]);
5560             if (const_args[2]) {
5561                 out->out_rri(s, type, cond,
5562                              new_args[0], new_args[1], new_args[2]);
5563             } else {
5564                 out->out_rrr(s, type, cond,
5565                              new_args[0], new_args[1], new_args[2]);
5566             }
5567         }
5568         break;
5569 
5570 #if TCG_TARGET_REG_BITS == 32
5571     case INDEX_op_brcond2_i32:
5572         {
5573             const TCGOutOpBrcond2 *out = &outop_brcond2;
5574             TCGCond cond = new_args[4];
5575             TCGLabel *label = arg_label(new_args[5]);
5576 
5577             tcg_debug_assert(!const_args[0]);
5578             tcg_debug_assert(!const_args[1]);
5579             out->out(s, cond, new_args[0], new_args[1],
5580                      new_args[2], const_args[2],
5581                      new_args[3], const_args[3], label);
5582         }
5583         break;
5584     case INDEX_op_setcond2_i32:
5585         {
5586             const TCGOutOpSetcond2 *out = &outop_setcond2;
5587             TCGCond cond = new_args[5];
5588 
5589             tcg_debug_assert(!const_args[1]);
5590             tcg_debug_assert(!const_args[2]);
5591             out->out(s, cond, new_args[0], new_args[1], new_args[2],
5592                      new_args[3], const_args[3], new_args[4], const_args[4]);
5593         }
5594         break;
5595 #else
5596     case INDEX_op_brcond2_i32:
5597     case INDEX_op_setcond2_i32:
5598         g_assert_not_reached();
5599 #endif
5600 
5601     default:
5602         if (def->flags & TCG_OPF_VECTOR) {
5603             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5604                            TCGOP_VECE(op), new_args, const_args);
5605         } else {
5606             tcg_out_op(s, op->opc, type, new_args, const_args);
5607         }
5608         break;
5609     }
5610 
5611     /* move the outputs in the correct register if needed */
5612     for(i = 0; i < nb_oargs; i++) {
5613         ts = arg_temp(op->args[i]);
5614 
5615         /* ENV should not be modified.  */
5616         tcg_debug_assert(!temp_readonly(ts));
5617 
5618         if (NEED_SYNC_ARG(i)) {
5619             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5620         } else if (IS_DEAD_ARG(i)) {
5621             temp_dead(s, ts);
5622         }
5623     }
5624 }
5625 
5626 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5627 {
5628     const TCGLifeData arg_life = op->life;
5629     TCGTemp *ots, *itsl, *itsh;
5630     TCGType vtype = TCGOP_TYPE(op);
5631 
5632     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5633     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5634     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5635 
5636     ots = arg_temp(op->args[0]);
5637     itsl = arg_temp(op->args[1]);
5638     itsh = arg_temp(op->args[2]);
5639 
5640     /* ENV should not be modified.  */
5641     tcg_debug_assert(!temp_readonly(ots));
5642 
5643     /* Allocate the output register now.  */
5644     if (ots->val_type != TEMP_VAL_REG) {
5645         TCGRegSet allocated_regs = s->reserved_regs;
5646         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5647         TCGReg oreg;
5648 
5649         /* Make sure to not spill the input registers. */
5650         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5651             tcg_regset_set_reg(allocated_regs, itsl->reg);
5652         }
5653         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5654             tcg_regset_set_reg(allocated_regs, itsh->reg);
5655         }
5656 
5657         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5658                              output_pref(op, 0), ots->indirect_base);
5659         set_temp_val_reg(s, ots, oreg);
5660     }
5661 
5662     /* Promote dup2 of immediates to dupi_vec. */
5663     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5664         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5665         MemOp vece = MO_64;
5666 
5667         if (val == dup_const(MO_8, val)) {
5668             vece = MO_8;
5669         } else if (val == dup_const(MO_16, val)) {
5670             vece = MO_16;
5671         } else if (val == dup_const(MO_32, val)) {
5672             vece = MO_32;
5673         }
5674 
5675         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5676         goto done;
5677     }
5678 
5679     /* If the two inputs form one 64-bit value, try dupm_vec. */
5680     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5681         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5682         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5683         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5684 
5685         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5686         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5687 
5688         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5689                              its->mem_base->reg, its->mem_offset)) {
5690             goto done;
5691         }
5692     }
5693 
5694     /* Fall back to generic expansion. */
5695     return false;
5696 
5697  done:
5698     ots->mem_coherent = 0;
5699     if (IS_DEAD_ARG(1)) {
5700         temp_dead(s, itsl);
5701     }
5702     if (IS_DEAD_ARG(2)) {
5703         temp_dead(s, itsh);
5704     }
5705     if (NEED_SYNC_ARG(0)) {
5706         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5707     } else if (IS_DEAD_ARG(0)) {
5708         temp_dead(s, ots);
5709     }
5710     return true;
5711 }
5712 
5713 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5714                          TCGRegSet allocated_regs)
5715 {
5716     if (ts->val_type == TEMP_VAL_REG) {
5717         if (ts->reg != reg) {
5718             tcg_reg_free(s, reg, allocated_regs);
5719             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5720                 /*
5721                  * Cross register class move not supported.  Sync the
5722                  * temp back to its slot and load from there.
5723                  */
5724                 temp_sync(s, ts, allocated_regs, 0, 0);
5725                 tcg_out_ld(s, ts->type, reg,
5726                            ts->mem_base->reg, ts->mem_offset);
5727             }
5728         }
5729     } else {
5730         TCGRegSet arg_set = 0;
5731 
5732         tcg_reg_free(s, reg, allocated_regs);
5733         tcg_regset_set_reg(arg_set, reg);
5734         temp_load(s, ts, arg_set, allocated_regs, 0);
5735     }
5736 }
5737 
5738 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5739                          TCGRegSet allocated_regs)
5740 {
5741     /*
5742      * When the destination is on the stack, load up the temp and store.
5743      * If there are many call-saved registers, the temp might live to
5744      * see another use; otherwise it'll be discarded.
5745      */
5746     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5747     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5748                arg_slot_stk_ofs(arg_slot));
5749 }
5750 
5751 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5752                             TCGTemp *ts, TCGRegSet *allocated_regs)
5753 {
5754     if (arg_slot_reg_p(l->arg_slot)) {
5755         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5756         load_arg_reg(s, reg, ts, *allocated_regs);
5757         tcg_regset_set_reg(*allocated_regs, reg);
5758     } else {
5759         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5760     }
5761 }
5762 
5763 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5764                          intptr_t ref_off, TCGRegSet *allocated_regs)
5765 {
5766     TCGReg reg;
5767 
5768     if (arg_slot_reg_p(arg_slot)) {
5769         reg = tcg_target_call_iarg_regs[arg_slot];
5770         tcg_reg_free(s, reg, *allocated_regs);
5771         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5772         tcg_regset_set_reg(*allocated_regs, reg);
5773     } else {
5774         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5775                             *allocated_regs, 0, false);
5776         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5777         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5778                    arg_slot_stk_ofs(arg_slot));
5779     }
5780 }
5781 
5782 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5783 {
5784     const int nb_oargs = TCGOP_CALLO(op);
5785     const int nb_iargs = TCGOP_CALLI(op);
5786     const TCGLifeData arg_life = op->life;
5787     const TCGHelperInfo *info = tcg_call_info(op);
5788     TCGRegSet allocated_regs = s->reserved_regs;
5789     int i;
5790 
5791     /*
5792      * Move inputs into place in reverse order,
5793      * so that we place stacked arguments first.
5794      */
5795     for (i = nb_iargs - 1; i >= 0; --i) {
5796         const TCGCallArgumentLoc *loc = &info->in[i];
5797         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5798 
5799         switch (loc->kind) {
5800         case TCG_CALL_ARG_NORMAL:
5801         case TCG_CALL_ARG_EXTEND_U:
5802         case TCG_CALL_ARG_EXTEND_S:
5803             load_arg_normal(s, loc, ts, &allocated_regs);
5804             break;
5805         case TCG_CALL_ARG_BY_REF:
5806             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5807             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5808                          arg_slot_stk_ofs(loc->ref_slot),
5809                          &allocated_regs);
5810             break;
5811         case TCG_CALL_ARG_BY_REF_N:
5812             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5813             break;
5814         default:
5815             g_assert_not_reached();
5816         }
5817     }
5818 
5819     /* Mark dead temporaries and free the associated registers.  */
5820     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5821         if (IS_DEAD_ARG(i)) {
5822             temp_dead(s, arg_temp(op->args[i]));
5823         }
5824     }
5825 
5826     /* Clobber call registers.  */
5827     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5828         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5829             tcg_reg_free(s, i, allocated_regs);
5830         }
5831     }
5832 
5833     /*
5834      * Save globals if they might be written by the helper,
5835      * sync them if they might be read.
5836      */
5837     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5838         /* Nothing to do */
5839     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5840         sync_globals(s, allocated_regs);
5841     } else {
5842         save_globals(s, allocated_regs);
5843     }
5844 
5845     /*
5846      * If the ABI passes a pointer to the returned struct as the first
5847      * argument, load that now.  Pass a pointer to the output home slot.
5848      */
5849     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5850         TCGTemp *ts = arg_temp(op->args[0]);
5851 
5852         if (!ts->mem_allocated) {
5853             temp_allocate_frame(s, ts);
5854         }
5855         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5856     }
5857 
5858     tcg_out_call(s, tcg_call_func(op), info);
5859 
5860     /* Assign output registers and emit moves if needed.  */
5861     switch (info->out_kind) {
5862     case TCG_CALL_RET_NORMAL:
5863         for (i = 0; i < nb_oargs; i++) {
5864             TCGTemp *ts = arg_temp(op->args[i]);
5865             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5866 
5867             /* ENV should not be modified.  */
5868             tcg_debug_assert(!temp_readonly(ts));
5869 
5870             set_temp_val_reg(s, ts, reg);
5871             ts->mem_coherent = 0;
5872         }
5873         break;
5874 
5875     case TCG_CALL_RET_BY_VEC:
5876         {
5877             TCGTemp *ts = arg_temp(op->args[0]);
5878 
5879             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5880             tcg_debug_assert(ts->temp_subindex == 0);
5881             if (!ts->mem_allocated) {
5882                 temp_allocate_frame(s, ts);
5883             }
5884             tcg_out_st(s, TCG_TYPE_V128,
5885                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5886                        ts->mem_base->reg, ts->mem_offset);
5887         }
5888         /* fall through to mark all parts in memory */
5889 
5890     case TCG_CALL_RET_BY_REF:
5891         /* The callee has performed a write through the reference. */
5892         for (i = 0; i < nb_oargs; i++) {
5893             TCGTemp *ts = arg_temp(op->args[i]);
5894             ts->val_type = TEMP_VAL_MEM;
5895         }
5896         break;
5897 
5898     default:
5899         g_assert_not_reached();
5900     }
5901 
5902     /* Flush or discard output registers as needed. */
5903     for (i = 0; i < nb_oargs; i++) {
5904         TCGTemp *ts = arg_temp(op->args[i]);
5905         if (NEED_SYNC_ARG(i)) {
5906             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5907         } else if (IS_DEAD_ARG(i)) {
5908             temp_dead(s, ts);
5909         }
5910     }
5911 }
5912 
5913 /**
5914  * atom_and_align_for_opc:
5915  * @s: tcg context
5916  * @opc: memory operation code
5917  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5918  * @allow_two_ops: true if we are prepared to issue two operations
5919  *
5920  * Return the alignment and atomicity to use for the inline fast path
5921  * for the given memory operation.  The alignment may be larger than
5922  * that specified in @opc, and the correct alignment will be diagnosed
5923  * by the slow path helper.
5924  *
5925  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5926  * and issue two loads or stores for subalignment.
5927  */
5928 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5929                                            MemOp host_atom, bool allow_two_ops)
5930 {
5931     MemOp align = memop_alignment_bits(opc);
5932     MemOp size = opc & MO_SIZE;
5933     MemOp half = size ? size - 1 : 0;
5934     MemOp atom = opc & MO_ATOM_MASK;
5935     MemOp atmax;
5936 
5937     switch (atom) {
5938     case MO_ATOM_NONE:
5939         /* The operation requires no specific atomicity. */
5940         atmax = MO_8;
5941         break;
5942 
5943     case MO_ATOM_IFALIGN:
5944         atmax = size;
5945         break;
5946 
5947     case MO_ATOM_IFALIGN_PAIR:
5948         atmax = half;
5949         break;
5950 
5951     case MO_ATOM_WITHIN16:
5952         atmax = size;
5953         if (size == MO_128) {
5954             /* Misalignment implies !within16, and therefore no atomicity. */
5955         } else if (host_atom != MO_ATOM_WITHIN16) {
5956             /* The host does not implement within16, so require alignment. */
5957             align = MAX(align, size);
5958         }
5959         break;
5960 
5961     case MO_ATOM_WITHIN16_PAIR:
5962         atmax = size;
5963         /*
5964          * Misalignment implies !within16, and therefore half atomicity.
5965          * Any host prepared for two operations can implement this with
5966          * half alignment.
5967          */
5968         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5969             align = MAX(align, half);
5970         }
5971         break;
5972 
5973     case MO_ATOM_SUBALIGN:
5974         atmax = size;
5975         if (host_atom != MO_ATOM_SUBALIGN) {
5976             /* If unaligned but not odd, there are subobjects up to half. */
5977             if (allow_two_ops) {
5978                 align = MAX(align, half);
5979             } else {
5980                 align = MAX(align, size);
5981             }
5982         }
5983         break;
5984 
5985     default:
5986         g_assert_not_reached();
5987     }
5988 
5989     return (TCGAtomAlign){ .atom = atmax, .align = align };
5990 }
5991 
5992 /*
5993  * Similarly for qemu_ld/st slow path helpers.
5994  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5995  * using only the provided backend tcg_out_* functions.
5996  */
5997 
5998 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5999 {
6000     int ofs = arg_slot_stk_ofs(slot);
6001 
6002     /*
6003      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
6004      * require extension to uint64_t, adjust the address for uint32_t.
6005      */
6006     if (HOST_BIG_ENDIAN &&
6007         TCG_TARGET_REG_BITS == 64 &&
6008         type == TCG_TYPE_I32) {
6009         ofs += 4;
6010     }
6011     return ofs;
6012 }
6013 
6014 static void tcg_out_helper_load_slots(TCGContext *s,
6015                                       unsigned nmov, TCGMovExtend *mov,
6016                                       const TCGLdstHelperParam *parm)
6017 {
6018     unsigned i;
6019     TCGReg dst3;
6020 
6021     /*
6022      * Start from the end, storing to the stack first.
6023      * This frees those registers, so we need not consider overlap.
6024      */
6025     for (i = nmov; i-- > 0; ) {
6026         unsigned slot = mov[i].dst;
6027 
6028         if (arg_slot_reg_p(slot)) {
6029             goto found_reg;
6030         }
6031 
6032         TCGReg src = mov[i].src;
6033         TCGType dst_type = mov[i].dst_type;
6034         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6035 
6036         /* The argument is going onto the stack; extend into scratch. */
6037         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
6038             tcg_debug_assert(parm->ntmp != 0);
6039             mov[i].dst = src = parm->tmp[0];
6040             tcg_out_movext1(s, &mov[i]);
6041         }
6042 
6043         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
6044                    tcg_out_helper_stk_ofs(dst_type, slot));
6045     }
6046     return;
6047 
6048  found_reg:
6049     /*
6050      * The remaining arguments are in registers.
6051      * Convert slot numbers to argument registers.
6052      */
6053     nmov = i + 1;
6054     for (i = 0; i < nmov; ++i) {
6055         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
6056     }
6057 
6058     switch (nmov) {
6059     case 4:
6060         /* The backend must have provided enough temps for the worst case. */
6061         tcg_debug_assert(parm->ntmp >= 2);
6062 
6063         dst3 = mov[3].dst;
6064         for (unsigned j = 0; j < 3; ++j) {
6065             if (dst3 == mov[j].src) {
6066                 /*
6067                  * Conflict. Copy the source to a temporary, perform the
6068                  * remaining moves, then the extension from our scratch
6069                  * on the way out.
6070                  */
6071                 TCGReg scratch = parm->tmp[1];
6072 
6073                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
6074                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
6075                 tcg_out_movext1_new_src(s, &mov[3], scratch);
6076                 break;
6077             }
6078         }
6079 
6080         /* No conflicts: perform this move and continue. */
6081         tcg_out_movext1(s, &mov[3]);
6082         /* fall through */
6083 
6084     case 3:
6085         tcg_out_movext3(s, mov, mov + 1, mov + 2,
6086                         parm->ntmp ? parm->tmp[0] : -1);
6087         break;
6088     case 2:
6089         tcg_out_movext2(s, mov, mov + 1,
6090                         parm->ntmp ? parm->tmp[0] : -1);
6091         break;
6092     case 1:
6093         tcg_out_movext1(s, mov);
6094         break;
6095     default:
6096         g_assert_not_reached();
6097     }
6098 }
6099 
6100 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
6101                                     TCGType type, tcg_target_long imm,
6102                                     const TCGLdstHelperParam *parm)
6103 {
6104     if (arg_slot_reg_p(slot)) {
6105         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
6106     } else {
6107         int ofs = tcg_out_helper_stk_ofs(type, slot);
6108         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
6109             tcg_debug_assert(parm->ntmp != 0);
6110             tcg_out_movi(s, type, parm->tmp[0], imm);
6111             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
6112         }
6113     }
6114 }
6115 
6116 static void tcg_out_helper_load_common_args(TCGContext *s,
6117                                             const TCGLabelQemuLdst *ldst,
6118                                             const TCGLdstHelperParam *parm,
6119                                             const TCGHelperInfo *info,
6120                                             unsigned next_arg)
6121 {
6122     TCGMovExtend ptr_mov = {
6123         .dst_type = TCG_TYPE_PTR,
6124         .src_type = TCG_TYPE_PTR,
6125         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6126     };
6127     const TCGCallArgumentLoc *loc = &info->in[0];
6128     TCGType type;
6129     unsigned slot;
6130     tcg_target_ulong imm;
6131 
6132     /*
6133      * Handle env, which is always first.
6134      */
6135     ptr_mov.dst = loc->arg_slot;
6136     ptr_mov.src = TCG_AREG0;
6137     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6138 
6139     /*
6140      * Handle oi.
6141      */
6142     imm = ldst->oi;
6143     loc = &info->in[next_arg];
6144     type = TCG_TYPE_I32;
6145     switch (loc->kind) {
6146     case TCG_CALL_ARG_NORMAL:
6147         break;
6148     case TCG_CALL_ARG_EXTEND_U:
6149     case TCG_CALL_ARG_EXTEND_S:
6150         /* No extension required for MemOpIdx. */
6151         tcg_debug_assert(imm <= INT32_MAX);
6152         type = TCG_TYPE_REG;
6153         break;
6154     default:
6155         g_assert_not_reached();
6156     }
6157     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6158     next_arg++;
6159 
6160     /*
6161      * Handle ra.
6162      */
6163     loc = &info->in[next_arg];
6164     slot = loc->arg_slot;
6165     if (parm->ra_gen) {
6166         int arg_reg = -1;
6167         TCGReg ra_reg;
6168 
6169         if (arg_slot_reg_p(slot)) {
6170             arg_reg = tcg_target_call_iarg_regs[slot];
6171         }
6172         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6173 
6174         ptr_mov.dst = slot;
6175         ptr_mov.src = ra_reg;
6176         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6177     } else {
6178         imm = (uintptr_t)ldst->raddr;
6179         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6180     }
6181 }
6182 
6183 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6184                                        const TCGCallArgumentLoc *loc,
6185                                        TCGType dst_type, TCGType src_type,
6186                                        TCGReg lo, TCGReg hi)
6187 {
6188     MemOp reg_mo;
6189 
6190     if (dst_type <= TCG_TYPE_REG) {
6191         MemOp src_ext;
6192 
6193         switch (loc->kind) {
6194         case TCG_CALL_ARG_NORMAL:
6195             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6196             break;
6197         case TCG_CALL_ARG_EXTEND_U:
6198             dst_type = TCG_TYPE_REG;
6199             src_ext = MO_UL;
6200             break;
6201         case TCG_CALL_ARG_EXTEND_S:
6202             dst_type = TCG_TYPE_REG;
6203             src_ext = MO_SL;
6204             break;
6205         default:
6206             g_assert_not_reached();
6207         }
6208 
6209         mov[0].dst = loc->arg_slot;
6210         mov[0].dst_type = dst_type;
6211         mov[0].src = lo;
6212         mov[0].src_type = src_type;
6213         mov[0].src_ext = src_ext;
6214         return 1;
6215     }
6216 
6217     if (TCG_TARGET_REG_BITS == 32) {
6218         assert(dst_type == TCG_TYPE_I64);
6219         reg_mo = MO_32;
6220     } else {
6221         assert(dst_type == TCG_TYPE_I128);
6222         reg_mo = MO_64;
6223     }
6224 
6225     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6226     mov[0].src = lo;
6227     mov[0].dst_type = TCG_TYPE_REG;
6228     mov[0].src_type = TCG_TYPE_REG;
6229     mov[0].src_ext = reg_mo;
6230 
6231     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6232     mov[1].src = hi;
6233     mov[1].dst_type = TCG_TYPE_REG;
6234     mov[1].src_type = TCG_TYPE_REG;
6235     mov[1].src_ext = reg_mo;
6236 
6237     return 2;
6238 }
6239 
6240 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6241                                    const TCGLdstHelperParam *parm)
6242 {
6243     const TCGHelperInfo *info;
6244     const TCGCallArgumentLoc *loc;
6245     TCGMovExtend mov[2];
6246     unsigned next_arg, nmov;
6247     MemOp mop = get_memop(ldst->oi);
6248 
6249     switch (mop & MO_SIZE) {
6250     case MO_8:
6251     case MO_16:
6252     case MO_32:
6253         info = &info_helper_ld32_mmu;
6254         break;
6255     case MO_64:
6256         info = &info_helper_ld64_mmu;
6257         break;
6258     case MO_128:
6259         info = &info_helper_ld128_mmu;
6260         break;
6261     default:
6262         g_assert_not_reached();
6263     }
6264 
6265     /* Defer env argument. */
6266     next_arg = 1;
6267 
6268     loc = &info->in[next_arg];
6269     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6270         /*
6271          * 32-bit host with 32-bit guest: zero-extend the guest address
6272          * to 64-bits for the helper by storing the low part, then
6273          * load a zero for the high part.
6274          */
6275         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6276                                TCG_TYPE_I32, TCG_TYPE_I32,
6277                                ldst->addr_reg, -1);
6278         tcg_out_helper_load_slots(s, 1, mov, parm);
6279 
6280         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6281                                 TCG_TYPE_I32, 0, parm);
6282         next_arg += 2;
6283     } else {
6284         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6285                                       ldst->addr_reg, -1);
6286         tcg_out_helper_load_slots(s, nmov, mov, parm);
6287         next_arg += nmov;
6288     }
6289 
6290     switch (info->out_kind) {
6291     case TCG_CALL_RET_NORMAL:
6292     case TCG_CALL_RET_BY_VEC:
6293         break;
6294     case TCG_CALL_RET_BY_REF:
6295         /*
6296          * The return reference is in the first argument slot.
6297          * We need memory in which to return: re-use the top of stack.
6298          */
6299         {
6300             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6301 
6302             if (arg_slot_reg_p(0)) {
6303                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6304                                  TCG_REG_CALL_STACK, ofs_slot0);
6305             } else {
6306                 tcg_debug_assert(parm->ntmp != 0);
6307                 tcg_out_addi_ptr(s, parm->tmp[0],
6308                                  TCG_REG_CALL_STACK, ofs_slot0);
6309                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6310                            TCG_REG_CALL_STACK, ofs_slot0);
6311             }
6312         }
6313         break;
6314     default:
6315         g_assert_not_reached();
6316     }
6317 
6318     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6319 }
6320 
6321 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6322                                   bool load_sign,
6323                                   const TCGLdstHelperParam *parm)
6324 {
6325     MemOp mop = get_memop(ldst->oi);
6326     TCGMovExtend mov[2];
6327     int ofs_slot0;
6328 
6329     switch (ldst->type) {
6330     case TCG_TYPE_I64:
6331         if (TCG_TARGET_REG_BITS == 32) {
6332             break;
6333         }
6334         /* fall through */
6335 
6336     case TCG_TYPE_I32:
6337         mov[0].dst = ldst->datalo_reg;
6338         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6339         mov[0].dst_type = ldst->type;
6340         mov[0].src_type = TCG_TYPE_REG;
6341 
6342         /*
6343          * If load_sign, then we allowed the helper to perform the
6344          * appropriate sign extension to tcg_target_ulong, and all
6345          * we need now is a plain move.
6346          *
6347          * If they do not, then we expect the relevant extension
6348          * instruction to be no more expensive than a move, and
6349          * we thus save the icache etc by only using one of two
6350          * helper functions.
6351          */
6352         if (load_sign || !(mop & MO_SIGN)) {
6353             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6354                 mov[0].src_ext = MO_32;
6355             } else {
6356                 mov[0].src_ext = MO_64;
6357             }
6358         } else {
6359             mov[0].src_ext = mop & MO_SSIZE;
6360         }
6361         tcg_out_movext1(s, mov);
6362         return;
6363 
6364     case TCG_TYPE_I128:
6365         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6366         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6367         switch (TCG_TARGET_CALL_RET_I128) {
6368         case TCG_CALL_RET_NORMAL:
6369             break;
6370         case TCG_CALL_RET_BY_VEC:
6371             tcg_out_st(s, TCG_TYPE_V128,
6372                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6373                        TCG_REG_CALL_STACK, ofs_slot0);
6374             /* fall through */
6375         case TCG_CALL_RET_BY_REF:
6376             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6377                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6378             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6379                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6380             return;
6381         default:
6382             g_assert_not_reached();
6383         }
6384         break;
6385 
6386     default:
6387         g_assert_not_reached();
6388     }
6389 
6390     mov[0].dst = ldst->datalo_reg;
6391     mov[0].src =
6392         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6393     mov[0].dst_type = TCG_TYPE_REG;
6394     mov[0].src_type = TCG_TYPE_REG;
6395     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6396 
6397     mov[1].dst = ldst->datahi_reg;
6398     mov[1].src =
6399         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6400     mov[1].dst_type = TCG_TYPE_REG;
6401     mov[1].src_type = TCG_TYPE_REG;
6402     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6403 
6404     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6405 }
6406 
6407 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6408                                    const TCGLdstHelperParam *parm)
6409 {
6410     const TCGHelperInfo *info;
6411     const TCGCallArgumentLoc *loc;
6412     TCGMovExtend mov[4];
6413     TCGType data_type;
6414     unsigned next_arg, nmov, n;
6415     MemOp mop = get_memop(ldst->oi);
6416 
6417     switch (mop & MO_SIZE) {
6418     case MO_8:
6419     case MO_16:
6420     case MO_32:
6421         info = &info_helper_st32_mmu;
6422         data_type = TCG_TYPE_I32;
6423         break;
6424     case MO_64:
6425         info = &info_helper_st64_mmu;
6426         data_type = TCG_TYPE_I64;
6427         break;
6428     case MO_128:
6429         info = &info_helper_st128_mmu;
6430         data_type = TCG_TYPE_I128;
6431         break;
6432     default:
6433         g_assert_not_reached();
6434     }
6435 
6436     /* Defer env argument. */
6437     next_arg = 1;
6438     nmov = 0;
6439 
6440     /* Handle addr argument. */
6441     loc = &info->in[next_arg];
6442     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6443     if (TCG_TARGET_REG_BITS == 32) {
6444         /*
6445          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6446          * to 64-bits for the helper by storing the low part.  Later,
6447          * after we have processed the register inputs, we will load a
6448          * zero for the high part.
6449          */
6450         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6451                                TCG_TYPE_I32, TCG_TYPE_I32,
6452                                ldst->addr_reg, -1);
6453         next_arg += 2;
6454         nmov += 1;
6455     } else {
6456         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6457                                    ldst->addr_reg, -1);
6458         next_arg += n;
6459         nmov += n;
6460     }
6461 
6462     /* Handle data argument. */
6463     loc = &info->in[next_arg];
6464     switch (loc->kind) {
6465     case TCG_CALL_ARG_NORMAL:
6466     case TCG_CALL_ARG_EXTEND_U:
6467     case TCG_CALL_ARG_EXTEND_S:
6468         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6469                                    ldst->datalo_reg, ldst->datahi_reg);
6470         next_arg += n;
6471         nmov += n;
6472         tcg_out_helper_load_slots(s, nmov, mov, parm);
6473         break;
6474 
6475     case TCG_CALL_ARG_BY_REF:
6476         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6477         tcg_debug_assert(data_type == TCG_TYPE_I128);
6478         tcg_out_st(s, TCG_TYPE_I64,
6479                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6480                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6481         tcg_out_st(s, TCG_TYPE_I64,
6482                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6483                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6484 
6485         tcg_out_helper_load_slots(s, nmov, mov, parm);
6486 
6487         if (arg_slot_reg_p(loc->arg_slot)) {
6488             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6489                              TCG_REG_CALL_STACK,
6490                              arg_slot_stk_ofs(loc->ref_slot));
6491         } else {
6492             tcg_debug_assert(parm->ntmp != 0);
6493             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6494                              arg_slot_stk_ofs(loc->ref_slot));
6495             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6496                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6497         }
6498         next_arg += 2;
6499         break;
6500 
6501     default:
6502         g_assert_not_reached();
6503     }
6504 
6505     if (TCG_TARGET_REG_BITS == 32) {
6506         /* Zero extend the address by loading a zero for the high part. */
6507         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6508         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6509     }
6510 
6511     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6512 }
6513 
6514 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6515 {
6516     int i, start_words, num_insns;
6517     TCGOp *op;
6518 
6519     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6520                  && qemu_log_in_addr_range(pc_start))) {
6521         FILE *logfile = qemu_log_trylock();
6522         if (logfile) {
6523             fprintf(logfile, "OP:\n");
6524             tcg_dump_ops(s, logfile, false);
6525             fprintf(logfile, "\n");
6526             qemu_log_unlock(logfile);
6527         }
6528     }
6529 
6530 #ifdef CONFIG_DEBUG_TCG
6531     /* Ensure all labels referenced have been emitted.  */
6532     {
6533         TCGLabel *l;
6534         bool error = false;
6535 
6536         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6537             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6538                 qemu_log_mask(CPU_LOG_TB_OP,
6539                               "$L%d referenced but not present.\n", l->id);
6540                 error = true;
6541             }
6542         }
6543         assert(!error);
6544     }
6545 #endif
6546 
6547     /* Do not reuse any EBB that may be allocated within the TB. */
6548     tcg_temp_ebb_reset_freed(s);
6549 
6550     tcg_optimize(s);
6551 
6552     reachable_code_pass(s);
6553     liveness_pass_0(s);
6554     liveness_pass_1(s);
6555 
6556     if (s->nb_indirects > 0) {
6557         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6558                      && qemu_log_in_addr_range(pc_start))) {
6559             FILE *logfile = qemu_log_trylock();
6560             if (logfile) {
6561                 fprintf(logfile, "OP before indirect lowering:\n");
6562                 tcg_dump_ops(s, logfile, false);
6563                 fprintf(logfile, "\n");
6564                 qemu_log_unlock(logfile);
6565             }
6566         }
6567 
6568         /* Replace indirect temps with direct temps.  */
6569         if (liveness_pass_2(s)) {
6570             /* If changes were made, re-run liveness.  */
6571             liveness_pass_1(s);
6572         }
6573     }
6574 
6575     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6576                  && qemu_log_in_addr_range(pc_start))) {
6577         FILE *logfile = qemu_log_trylock();
6578         if (logfile) {
6579             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6580             tcg_dump_ops(s, logfile, true);
6581             fprintf(logfile, "\n");
6582             qemu_log_unlock(logfile);
6583         }
6584     }
6585 
6586     /* Initialize goto_tb jump offsets. */
6587     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6588     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6589     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6590     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6591 
6592     tcg_reg_alloc_start(s);
6593 
6594     /*
6595      * Reset the buffer pointers when restarting after overflow.
6596      * TODO: Move this into translate-all.c with the rest of the
6597      * buffer management.  Having only this done here is confusing.
6598      */
6599     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6600     s->code_ptr = s->code_buf;
6601     s->data_gen_ptr = NULL;
6602 
6603     QSIMPLEQ_INIT(&s->ldst_labels);
6604     s->pool_labels = NULL;
6605 
6606     start_words = s->insn_start_words;
6607     s->gen_insn_data =
6608         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6609 
6610     tcg_out_tb_start(s);
6611 
6612     num_insns = -1;
6613     QTAILQ_FOREACH(op, &s->ops, link) {
6614         TCGOpcode opc = op->opc;
6615 
6616         switch (opc) {
6617         case INDEX_op_mov:
6618         case INDEX_op_mov_vec:
6619             tcg_reg_alloc_mov(s, op);
6620             break;
6621         case INDEX_op_dup_vec:
6622             tcg_reg_alloc_dup(s, op);
6623             break;
6624         case INDEX_op_insn_start:
6625             if (num_insns >= 0) {
6626                 size_t off = tcg_current_code_size(s);
6627                 s->gen_insn_end_off[num_insns] = off;
6628                 /* Assert that we do not overflow our stored offset.  */
6629                 assert(s->gen_insn_end_off[num_insns] == off);
6630             }
6631             num_insns++;
6632             for (i = 0; i < start_words; ++i) {
6633                 s->gen_insn_data[num_insns * start_words + i] =
6634                     tcg_get_insn_start_param(op, i);
6635             }
6636             break;
6637         case INDEX_op_discard:
6638             temp_dead(s, arg_temp(op->args[0]));
6639             break;
6640         case INDEX_op_set_label:
6641             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6642             tcg_out_label(s, arg_label(op->args[0]));
6643             break;
6644         case INDEX_op_call:
6645             tcg_reg_alloc_call(s, op);
6646             break;
6647         case INDEX_op_exit_tb:
6648             tcg_out_exit_tb(s, op->args[0]);
6649             break;
6650         case INDEX_op_goto_tb:
6651             tcg_out_goto_tb(s, op->args[0]);
6652             break;
6653         case INDEX_op_dup2_vec:
6654             if (tcg_reg_alloc_dup2(s, op)) {
6655                 break;
6656             }
6657             /* fall through */
6658         default:
6659             /* Sanity check that we've not introduced any unhandled opcodes. */
6660             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6661                                               TCGOP_FLAGS(op)));
6662             /* Note: in order to speed up the code, it would be much
6663                faster to have specialized register allocator functions for
6664                some common argument patterns */
6665             tcg_reg_alloc_op(s, op);
6666             break;
6667         }
6668         /* Test for (pending) buffer overflow.  The assumption is that any
6669            one operation beginning below the high water mark cannot overrun
6670            the buffer completely.  Thus we can test for overflow after
6671            generating code without having to check during generation.  */
6672         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6673             return -1;
6674         }
6675         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6676         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6677             return -2;
6678         }
6679     }
6680     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6681     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6682 
6683     /* Generate TB finalization at the end of block */
6684     i = tcg_out_ldst_finalize(s);
6685     if (i < 0) {
6686         return i;
6687     }
6688     i = tcg_out_pool_finalize(s);
6689     if (i < 0) {
6690         return i;
6691     }
6692     if (!tcg_resolve_relocs(s)) {
6693         return -2;
6694     }
6695 
6696 #ifndef CONFIG_TCG_INTERPRETER
6697     /* flush instruction cache */
6698     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6699                         (uintptr_t)s->code_buf,
6700                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6701 #endif
6702 
6703     return tcg_current_code_size(s);
6704 }
6705 
6706 #ifdef ELF_HOST_MACHINE
6707 /* In order to use this feature, the backend needs to do three things:
6708 
6709    (1) Define ELF_HOST_MACHINE to indicate both what value to
6710        put into the ELF image and to indicate support for the feature.
6711 
6712    (2) Define tcg_register_jit.  This should create a buffer containing
6713        the contents of a .debug_frame section that describes the post-
6714        prologue unwind info for the tcg machine.
6715 
6716    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6717 */
6718 
6719 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6720 typedef enum {
6721     JIT_NOACTION = 0,
6722     JIT_REGISTER_FN,
6723     JIT_UNREGISTER_FN
6724 } jit_actions_t;
6725 
6726 struct jit_code_entry {
6727     struct jit_code_entry *next_entry;
6728     struct jit_code_entry *prev_entry;
6729     const void *symfile_addr;
6730     uint64_t symfile_size;
6731 };
6732 
6733 struct jit_descriptor {
6734     uint32_t version;
6735     uint32_t action_flag;
6736     struct jit_code_entry *relevant_entry;
6737     struct jit_code_entry *first_entry;
6738 };
6739 
6740 void __jit_debug_register_code(void) __attribute__((noinline));
6741 void __jit_debug_register_code(void)
6742 {
6743     asm("");
6744 }
6745 
6746 /* Must statically initialize the version, because GDB may check
6747    the version before we can set it.  */
6748 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6749 
6750 /* End GDB interface.  */
6751 
6752 static int find_string(const char *strtab, const char *str)
6753 {
6754     const char *p = strtab + 1;
6755 
6756     while (1) {
6757         if (strcmp(p, str) == 0) {
6758             return p - strtab;
6759         }
6760         p += strlen(p) + 1;
6761     }
6762 }
6763 
6764 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6765                                  const void *debug_frame,
6766                                  size_t debug_frame_size)
6767 {
6768     struct __attribute__((packed)) DebugInfo {
6769         uint32_t  len;
6770         uint16_t  version;
6771         uint32_t  abbrev;
6772         uint8_t   ptr_size;
6773         uint8_t   cu_die;
6774         uint16_t  cu_lang;
6775         uintptr_t cu_low_pc;
6776         uintptr_t cu_high_pc;
6777         uint8_t   fn_die;
6778         char      fn_name[16];
6779         uintptr_t fn_low_pc;
6780         uintptr_t fn_high_pc;
6781         uint8_t   cu_eoc;
6782     };
6783 
6784     struct ElfImage {
6785         ElfW(Ehdr) ehdr;
6786         ElfW(Phdr) phdr;
6787         ElfW(Shdr) shdr[7];
6788         ElfW(Sym)  sym[2];
6789         struct DebugInfo di;
6790         uint8_t    da[24];
6791         char       str[80];
6792     };
6793 
6794     struct ElfImage *img;
6795 
6796     static const struct ElfImage img_template = {
6797         .ehdr = {
6798             .e_ident[EI_MAG0] = ELFMAG0,
6799             .e_ident[EI_MAG1] = ELFMAG1,
6800             .e_ident[EI_MAG2] = ELFMAG2,
6801             .e_ident[EI_MAG3] = ELFMAG3,
6802             .e_ident[EI_CLASS] = ELF_CLASS,
6803             .e_ident[EI_DATA] = ELF_DATA,
6804             .e_ident[EI_VERSION] = EV_CURRENT,
6805             .e_type = ET_EXEC,
6806             .e_machine = ELF_HOST_MACHINE,
6807             .e_version = EV_CURRENT,
6808             .e_phoff = offsetof(struct ElfImage, phdr),
6809             .e_shoff = offsetof(struct ElfImage, shdr),
6810             .e_ehsize = sizeof(ElfW(Shdr)),
6811             .e_phentsize = sizeof(ElfW(Phdr)),
6812             .e_phnum = 1,
6813             .e_shentsize = sizeof(ElfW(Shdr)),
6814             .e_shnum = ARRAY_SIZE(img->shdr),
6815             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6816 #ifdef ELF_HOST_FLAGS
6817             .e_flags = ELF_HOST_FLAGS,
6818 #endif
6819 #ifdef ELF_OSABI
6820             .e_ident[EI_OSABI] = ELF_OSABI,
6821 #endif
6822         },
6823         .phdr = {
6824             .p_type = PT_LOAD,
6825             .p_flags = PF_X,
6826         },
6827         .shdr = {
6828             [0] = { .sh_type = SHT_NULL },
6829             /* Trick: The contents of code_gen_buffer are not present in
6830                this fake ELF file; that got allocated elsewhere.  Therefore
6831                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6832                will not look for contents.  We can record any address.  */
6833             [1] = { /* .text */
6834                 .sh_type = SHT_NOBITS,
6835                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6836             },
6837             [2] = { /* .debug_info */
6838                 .sh_type = SHT_PROGBITS,
6839                 .sh_offset = offsetof(struct ElfImage, di),
6840                 .sh_size = sizeof(struct DebugInfo),
6841             },
6842             [3] = { /* .debug_abbrev */
6843                 .sh_type = SHT_PROGBITS,
6844                 .sh_offset = offsetof(struct ElfImage, da),
6845                 .sh_size = sizeof(img->da),
6846             },
6847             [4] = { /* .debug_frame */
6848                 .sh_type = SHT_PROGBITS,
6849                 .sh_offset = sizeof(struct ElfImage),
6850             },
6851             [5] = { /* .symtab */
6852                 .sh_type = SHT_SYMTAB,
6853                 .sh_offset = offsetof(struct ElfImage, sym),
6854                 .sh_size = sizeof(img->sym),
6855                 .sh_info = 1,
6856                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6857                 .sh_entsize = sizeof(ElfW(Sym)),
6858             },
6859             [6] = { /* .strtab */
6860                 .sh_type = SHT_STRTAB,
6861                 .sh_offset = offsetof(struct ElfImage, str),
6862                 .sh_size = sizeof(img->str),
6863             }
6864         },
6865         .sym = {
6866             [1] = { /* code_gen_buffer */
6867                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6868                 .st_shndx = 1,
6869             }
6870         },
6871         .di = {
6872             .len = sizeof(struct DebugInfo) - 4,
6873             .version = 2,
6874             .ptr_size = sizeof(void *),
6875             .cu_die = 1,
6876             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6877             .fn_die = 2,
6878             .fn_name = "code_gen_buffer"
6879         },
6880         .da = {
6881             1,          /* abbrev number (the cu) */
6882             0x11, 1,    /* DW_TAG_compile_unit, has children */
6883             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6884             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6885             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6886             0, 0,       /* end of abbrev */
6887             2,          /* abbrev number (the fn) */
6888             0x2e, 0,    /* DW_TAG_subprogram, no children */
6889             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6890             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6891             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6892             0, 0,       /* end of abbrev */
6893             0           /* no more abbrev */
6894         },
6895         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6896                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6897     };
6898 
6899     /* We only need a single jit entry; statically allocate it.  */
6900     static struct jit_code_entry one_entry;
6901 
6902     uintptr_t buf = (uintptr_t)buf_ptr;
6903     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6904     DebugFrameHeader *dfh;
6905 
6906     img = g_malloc(img_size);
6907     *img = img_template;
6908 
6909     img->phdr.p_vaddr = buf;
6910     img->phdr.p_paddr = buf;
6911     img->phdr.p_memsz = buf_size;
6912 
6913     img->shdr[1].sh_name = find_string(img->str, ".text");
6914     img->shdr[1].sh_addr = buf;
6915     img->shdr[1].sh_size = buf_size;
6916 
6917     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6918     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6919 
6920     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6921     img->shdr[4].sh_size = debug_frame_size;
6922 
6923     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6924     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6925 
6926     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6927     img->sym[1].st_value = buf;
6928     img->sym[1].st_size = buf_size;
6929 
6930     img->di.cu_low_pc = buf;
6931     img->di.cu_high_pc = buf + buf_size;
6932     img->di.fn_low_pc = buf;
6933     img->di.fn_high_pc = buf + buf_size;
6934 
6935     dfh = (DebugFrameHeader *)(img + 1);
6936     memcpy(dfh, debug_frame, debug_frame_size);
6937     dfh->fde.func_start = buf;
6938     dfh->fde.func_len = buf_size;
6939 
6940 #ifdef DEBUG_JIT
6941     /* Enable this block to be able to debug the ELF image file creation.
6942        One can use readelf, objdump, or other inspection utilities.  */
6943     {
6944         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6945         FILE *f = fopen(jit, "w+b");
6946         if (f) {
6947             if (fwrite(img, img_size, 1, f) != img_size) {
6948                 /* Avoid stupid unused return value warning for fwrite.  */
6949             }
6950             fclose(f);
6951         }
6952     }
6953 #endif
6954 
6955     one_entry.symfile_addr = img;
6956     one_entry.symfile_size = img_size;
6957 
6958     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6959     __jit_debug_descriptor.relevant_entry = &one_entry;
6960     __jit_debug_descriptor.first_entry = &one_entry;
6961     __jit_debug_register_code();
6962 }
6963 #else
6964 /* No support for the feature.  Provide the entry point expected by exec.c,
6965    and implement the internal function we declared earlier.  */
6966 
6967 static void tcg_register_jit_int(const void *buf, size_t size,
6968                                  const void *debug_frame,
6969                                  size_t debug_frame_size)
6970 {
6971 }
6972 
6973 void tcg_register_jit(const void *buf, size_t buf_size)
6974 {
6975 }
6976 #endif /* ELF_HOST_MACHINE */
6977 
6978 #if !TCG_TARGET_MAYBE_vec
6979 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6980 {
6981     g_assert_not_reached();
6982 }
6983 #endif
6984