xref: /openbmc/qemu/tcg/tcg.c (revision 0dd07ee1122abaf1adb4f1e00a8e0b89937f53bd)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpBrcond {
990     TCGOutOp base;
991     void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond,
992                    TCGReg a1, TCGReg a2, TCGLabel *label);
993     void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond,
994                    TCGReg a1, tcg_target_long a2, TCGLabel *label);
995 } TCGOutOpBrcond;
996 
997 typedef struct TCGOutOpBrcond2 {
998     TCGOutOp base;
999     void (*out)(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
1000                 TCGArg bl, bool const_bl,
1001                 TCGArg bh, bool const_bh, TCGLabel *l);
1002 } TCGOutOpBrcond2;
1003 
1004 typedef struct TCGOutOpBswap {
1005     TCGOutOp base;
1006     void (*out_rr)(TCGContext *s, TCGType type,
1007                    TCGReg a0, TCGReg a1, unsigned flags);
1008 } TCGOutOpBswap;
1009 
1010 typedef struct TCGOutOpDivRem {
1011     TCGOutOp base;
1012     void (*out_rr01r)(TCGContext *s, TCGType type,
1013                       TCGReg a0, TCGReg a1, TCGReg a4);
1014 } TCGOutOpDivRem;
1015 
1016 typedef struct TCGOutOpMovcond {
1017     TCGOutOp base;
1018     void (*out)(TCGContext *s, TCGType type, TCGCond cond,
1019                 TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2,
1020                 TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf);
1021 } TCGOutOpMovcond;
1022 
1023 typedef struct TCGOutOpMul2 {
1024     TCGOutOp base;
1025     void (*out_rrrr)(TCGContext *s, TCGType type,
1026                      TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3);
1027 } TCGOutOpMul2;
1028 
1029 typedef struct TCGOutOpUnary {
1030     TCGOutOp base;
1031     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
1032 } TCGOutOpUnary;
1033 
1034 typedef struct TCGOutOpSetcond {
1035     TCGOutOp base;
1036     void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond,
1037                     TCGReg ret, TCGReg a1, TCGReg a2);
1038     void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond,
1039                     TCGReg ret, TCGReg a1, tcg_target_long a2);
1040 } TCGOutOpSetcond;
1041 
1042 typedef struct TCGOutOpSetcond2 {
1043     TCGOutOp base;
1044     void (*out)(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah,
1045                 TCGArg bl, bool const_bl, TCGArg bh, bool const_bh);
1046 } TCGOutOpSetcond2;
1047 
1048 typedef struct TCGOutOpSubtract {
1049     TCGOutOp base;
1050     void (*out_rrr)(TCGContext *s, TCGType type,
1051                     TCGReg a0, TCGReg a1, TCGReg a2);
1052     void (*out_rir)(TCGContext *s, TCGType type,
1053                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1054 } TCGOutOpSubtract;
1055 
1056 #include "tcg-target.c.inc"
1057 
1058 #ifndef CONFIG_TCG_INTERPRETER
1059 /* Validate CPUTLBDescFast placement. */
1060 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1061                         sizeof(CPUNegativeOffsetState))
1062                   < MIN_TLB_MASK_TABLE_OFS);
1063 #endif
1064 
1065 /*
1066  * Register V as the TCGOutOp for O.
1067  * This verifies that V is of type T, otherwise give a nice compiler error.
1068  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1069  */
1070 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1071 
1072 /* Register allocation descriptions for every TCGOpcode. */
1073 static const TCGOutOp * const all_outop[NB_OPS] = {
1074     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1075     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1076     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1077     OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond),
1078     OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16),
1079     OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz),
1080     OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop),
1081     OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz),
1082     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1083     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1084     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1085     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1086     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1087     OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond),
1088     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1089     OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2),
1090     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1091     OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2),
1092     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1093     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1094     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1095     OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond),
1096     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1097     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1098     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1099     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1100     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1101     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1102     OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl),
1103     OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr),
1104     OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1105     OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond),
1106     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1107     OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1108     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1109     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1110 
1111 #if TCG_TARGET_REG_BITS == 32
1112     OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2),
1113     OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2),
1114 #endif
1115 };
1116 
1117 #undef OUTOP
1118 
1119 /*
1120  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1121  * and registered the target's TCG globals) must register with this function
1122  * before initiating translation.
1123  *
1124  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1125  * of tcg_region_init() for the reasoning behind this.
1126  *
1127  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1128  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1129  * is not used anymore for translation once this function is called.
1130  *
1131  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1132  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1133  * modes.
1134  */
1135 #ifdef CONFIG_USER_ONLY
1136 void tcg_register_thread(void)
1137 {
1138     tcg_ctx = &tcg_init_ctx;
1139 }
1140 #else
1141 void tcg_register_thread(void)
1142 {
1143     TCGContext *s = g_malloc(sizeof(*s));
1144     unsigned int i, n;
1145 
1146     *s = tcg_init_ctx;
1147 
1148     /* Relink mem_base.  */
1149     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1150         if (tcg_init_ctx.temps[i].mem_base) {
1151             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1152             tcg_debug_assert(b >= 0 && b < n);
1153             s->temps[i].mem_base = &s->temps[b];
1154         }
1155     }
1156 
1157     /* Claim an entry in tcg_ctxs */
1158     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1159     g_assert(n < tcg_max_ctxs);
1160     qatomic_set(&tcg_ctxs[n], s);
1161 
1162     if (n > 0) {
1163         tcg_region_initial_alloc(s);
1164     }
1165 
1166     tcg_ctx = s;
1167 }
1168 #endif /* !CONFIG_USER_ONLY */
1169 
1170 /* pool based memory allocation */
1171 void *tcg_malloc_internal(TCGContext *s, int size)
1172 {
1173     TCGPool *p;
1174     int pool_size;
1175 
1176     if (size > TCG_POOL_CHUNK_SIZE) {
1177         /* big malloc: insert a new pool (XXX: could optimize) */
1178         p = g_malloc(sizeof(TCGPool) + size);
1179         p->size = size;
1180         p->next = s->pool_first_large;
1181         s->pool_first_large = p;
1182         return p->data;
1183     } else {
1184         p = s->pool_current;
1185         if (!p) {
1186             p = s->pool_first;
1187             if (!p)
1188                 goto new_pool;
1189         } else {
1190             if (!p->next) {
1191             new_pool:
1192                 pool_size = TCG_POOL_CHUNK_SIZE;
1193                 p = g_malloc(sizeof(TCGPool) + pool_size);
1194                 p->size = pool_size;
1195                 p->next = NULL;
1196                 if (s->pool_current) {
1197                     s->pool_current->next = p;
1198                 } else {
1199                     s->pool_first = p;
1200                 }
1201             } else {
1202                 p = p->next;
1203             }
1204         }
1205     }
1206     s->pool_current = p;
1207     s->pool_cur = p->data + size;
1208     s->pool_end = p->data + p->size;
1209     return p->data;
1210 }
1211 
1212 void tcg_pool_reset(TCGContext *s)
1213 {
1214     TCGPool *p, *t;
1215     for (p = s->pool_first_large; p; p = t) {
1216         t = p->next;
1217         g_free(p);
1218     }
1219     s->pool_first_large = NULL;
1220     s->pool_cur = s->pool_end = NULL;
1221     s->pool_current = NULL;
1222 }
1223 
1224 /*
1225  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1226  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1227  * We only use these for layout in tcg_out_ld_helper_ret and
1228  * tcg_out_st_helper_args, and share them between several of
1229  * the helpers, with the end result that it's easier to build manually.
1230  */
1231 
1232 #if TCG_TARGET_REG_BITS == 32
1233 # define dh_typecode_ttl  dh_typecode_i32
1234 #else
1235 # define dh_typecode_ttl  dh_typecode_i64
1236 #endif
1237 
1238 static TCGHelperInfo info_helper_ld32_mmu = {
1239     .flags = TCG_CALL_NO_WG,
1240     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1241               | dh_typemask(env, 1)
1242               | dh_typemask(i64, 2)  /* uint64_t addr */
1243               | dh_typemask(i32, 3)  /* unsigned oi */
1244               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1245 };
1246 
1247 static TCGHelperInfo info_helper_ld64_mmu = {
1248     .flags = TCG_CALL_NO_WG,
1249     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1250               | dh_typemask(env, 1)
1251               | dh_typemask(i64, 2)  /* uint64_t addr */
1252               | dh_typemask(i32, 3)  /* unsigned oi */
1253               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1254 };
1255 
1256 static TCGHelperInfo info_helper_ld128_mmu = {
1257     .flags = TCG_CALL_NO_WG,
1258     .typemask = dh_typemask(i128, 0) /* return Int128 */
1259               | dh_typemask(env, 1)
1260               | dh_typemask(i64, 2)  /* uint64_t addr */
1261               | dh_typemask(i32, 3)  /* unsigned oi */
1262               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1263 };
1264 
1265 static TCGHelperInfo info_helper_st32_mmu = {
1266     .flags = TCG_CALL_NO_WG,
1267     .typemask = dh_typemask(void, 0)
1268               | dh_typemask(env, 1)
1269               | dh_typemask(i64, 2)  /* uint64_t addr */
1270               | dh_typemask(i32, 3)  /* uint32_t data */
1271               | dh_typemask(i32, 4)  /* unsigned oi */
1272               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1273 };
1274 
1275 static TCGHelperInfo info_helper_st64_mmu = {
1276     .flags = TCG_CALL_NO_WG,
1277     .typemask = dh_typemask(void, 0)
1278               | dh_typemask(env, 1)
1279               | dh_typemask(i64, 2)  /* uint64_t addr */
1280               | dh_typemask(i64, 3)  /* uint64_t data */
1281               | dh_typemask(i32, 4)  /* unsigned oi */
1282               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1283 };
1284 
1285 static TCGHelperInfo info_helper_st128_mmu = {
1286     .flags = TCG_CALL_NO_WG,
1287     .typemask = dh_typemask(void, 0)
1288               | dh_typemask(env, 1)
1289               | dh_typemask(i64, 2)  /* uint64_t addr */
1290               | dh_typemask(i128, 3) /* Int128 data */
1291               | dh_typemask(i32, 4)  /* unsigned oi */
1292               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1293 };
1294 
1295 #ifdef CONFIG_TCG_INTERPRETER
1296 static ffi_type *typecode_to_ffi(int argmask)
1297 {
1298     /*
1299      * libffi does not support __int128_t, so we have forced Int128
1300      * to use the structure definition instead of the builtin type.
1301      */
1302     static ffi_type *ffi_type_i128_elements[3] = {
1303         &ffi_type_uint64,
1304         &ffi_type_uint64,
1305         NULL
1306     };
1307     static ffi_type ffi_type_i128 = {
1308         .size = 16,
1309         .alignment = __alignof__(Int128),
1310         .type = FFI_TYPE_STRUCT,
1311         .elements = ffi_type_i128_elements,
1312     };
1313 
1314     switch (argmask) {
1315     case dh_typecode_void:
1316         return &ffi_type_void;
1317     case dh_typecode_i32:
1318         return &ffi_type_uint32;
1319     case dh_typecode_s32:
1320         return &ffi_type_sint32;
1321     case dh_typecode_i64:
1322         return &ffi_type_uint64;
1323     case dh_typecode_s64:
1324         return &ffi_type_sint64;
1325     case dh_typecode_ptr:
1326         return &ffi_type_pointer;
1327     case dh_typecode_i128:
1328         return &ffi_type_i128;
1329     }
1330     g_assert_not_reached();
1331 }
1332 
1333 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1334 {
1335     unsigned typemask = info->typemask;
1336     struct {
1337         ffi_cif cif;
1338         ffi_type *args[];
1339     } *ca;
1340     ffi_status status;
1341     int nargs;
1342 
1343     /* Ignoring the return type, find the last non-zero field. */
1344     nargs = 32 - clz32(typemask >> 3);
1345     nargs = DIV_ROUND_UP(nargs, 3);
1346     assert(nargs <= MAX_CALL_IARGS);
1347 
1348     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1349     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1350     ca->cif.nargs = nargs;
1351 
1352     if (nargs != 0) {
1353         ca->cif.arg_types = ca->args;
1354         for (int j = 0; j < nargs; ++j) {
1355             int typecode = extract32(typemask, (j + 1) * 3, 3);
1356             ca->args[j] = typecode_to_ffi(typecode);
1357         }
1358     }
1359 
1360     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1361                           ca->cif.rtype, ca->cif.arg_types);
1362     assert(status == FFI_OK);
1363 
1364     return &ca->cif;
1365 }
1366 
1367 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1368 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1369 #else
1370 #define HELPER_INFO_INIT(I)      (&(I)->init)
1371 #define HELPER_INFO_INIT_VAL(I)  1
1372 #endif /* CONFIG_TCG_INTERPRETER */
1373 
1374 static inline bool arg_slot_reg_p(unsigned arg_slot)
1375 {
1376     /*
1377      * Split the sizeof away from the comparison to avoid Werror from
1378      * "unsigned < 0 is always false", when iarg_regs is empty.
1379      */
1380     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1381     return arg_slot < nreg;
1382 }
1383 
1384 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1385 {
1386     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1387     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1388 
1389     tcg_debug_assert(stk_slot < max);
1390     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1391 }
1392 
1393 typedef struct TCGCumulativeArgs {
1394     int arg_idx;                /* tcg_gen_callN args[] */
1395     int info_in_idx;            /* TCGHelperInfo in[] */
1396     int arg_slot;               /* regs+stack slot */
1397     int ref_slot;               /* stack slots for references */
1398 } TCGCumulativeArgs;
1399 
1400 static void layout_arg_even(TCGCumulativeArgs *cum)
1401 {
1402     cum->arg_slot += cum->arg_slot & 1;
1403 }
1404 
1405 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1406                          TCGCallArgumentKind kind)
1407 {
1408     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1409 
1410     *loc = (TCGCallArgumentLoc){
1411         .kind = kind,
1412         .arg_idx = cum->arg_idx,
1413         .arg_slot = cum->arg_slot,
1414     };
1415     cum->info_in_idx++;
1416     cum->arg_slot++;
1417 }
1418 
1419 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1420                                 TCGHelperInfo *info, int n)
1421 {
1422     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1423 
1424     for (int i = 0; i < n; ++i) {
1425         /* Layout all using the same arg_idx, adjusting the subindex. */
1426         loc[i] = (TCGCallArgumentLoc){
1427             .kind = TCG_CALL_ARG_NORMAL,
1428             .arg_idx = cum->arg_idx,
1429             .tmp_subindex = i,
1430             .arg_slot = cum->arg_slot + i,
1431         };
1432     }
1433     cum->info_in_idx += n;
1434     cum->arg_slot += n;
1435 }
1436 
1437 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1438 {
1439     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1440     int n = 128 / TCG_TARGET_REG_BITS;
1441 
1442     /* The first subindex carries the pointer. */
1443     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1444 
1445     /*
1446      * The callee is allowed to clobber memory associated with
1447      * structure pass by-reference.  Therefore we must make copies.
1448      * Allocate space from "ref_slot", which will be adjusted to
1449      * follow the parameters on the stack.
1450      */
1451     loc[0].ref_slot = cum->ref_slot;
1452 
1453     /*
1454      * Subsequent words also go into the reference slot, but
1455      * do not accumulate into the regular arguments.
1456      */
1457     for (int i = 1; i < n; ++i) {
1458         loc[i] = (TCGCallArgumentLoc){
1459             .kind = TCG_CALL_ARG_BY_REF_N,
1460             .arg_idx = cum->arg_idx,
1461             .tmp_subindex = i,
1462             .ref_slot = cum->ref_slot + i,
1463         };
1464     }
1465     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1466     cum->ref_slot += n;
1467 }
1468 
1469 static void init_call_layout(TCGHelperInfo *info)
1470 {
1471     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1472     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1473     unsigned typemask = info->typemask;
1474     unsigned typecode;
1475     TCGCumulativeArgs cum = { };
1476 
1477     /*
1478      * Parse and place any function return value.
1479      */
1480     typecode = typemask & 7;
1481     switch (typecode) {
1482     case dh_typecode_void:
1483         info->nr_out = 0;
1484         break;
1485     case dh_typecode_i32:
1486     case dh_typecode_s32:
1487     case dh_typecode_ptr:
1488         info->nr_out = 1;
1489         info->out_kind = TCG_CALL_RET_NORMAL;
1490         break;
1491     case dh_typecode_i64:
1492     case dh_typecode_s64:
1493         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1494         info->out_kind = TCG_CALL_RET_NORMAL;
1495         /* Query the last register now to trigger any assert early. */
1496         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1497         break;
1498     case dh_typecode_i128:
1499         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1500         info->out_kind = TCG_TARGET_CALL_RET_I128;
1501         switch (TCG_TARGET_CALL_RET_I128) {
1502         case TCG_CALL_RET_NORMAL:
1503             /* Query the last register now to trigger any assert early. */
1504             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1505             break;
1506         case TCG_CALL_RET_BY_VEC:
1507             /* Query the single register now to trigger any assert early. */
1508             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1509             break;
1510         case TCG_CALL_RET_BY_REF:
1511             /*
1512              * Allocate the first argument to the output.
1513              * We don't need to store this anywhere, just make it
1514              * unavailable for use in the input loop below.
1515              */
1516             cum.arg_slot = 1;
1517             break;
1518         default:
1519             qemu_build_not_reached();
1520         }
1521         break;
1522     default:
1523         g_assert_not_reached();
1524     }
1525 
1526     /*
1527      * Parse and place function arguments.
1528      */
1529     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1530         TCGCallArgumentKind kind;
1531         TCGType type;
1532 
1533         typecode = typemask & 7;
1534         switch (typecode) {
1535         case dh_typecode_i32:
1536         case dh_typecode_s32:
1537             type = TCG_TYPE_I32;
1538             break;
1539         case dh_typecode_i64:
1540         case dh_typecode_s64:
1541             type = TCG_TYPE_I64;
1542             break;
1543         case dh_typecode_ptr:
1544             type = TCG_TYPE_PTR;
1545             break;
1546         case dh_typecode_i128:
1547             type = TCG_TYPE_I128;
1548             break;
1549         default:
1550             g_assert_not_reached();
1551         }
1552 
1553         switch (type) {
1554         case TCG_TYPE_I32:
1555             switch (TCG_TARGET_CALL_ARG_I32) {
1556             case TCG_CALL_ARG_EVEN:
1557                 layout_arg_even(&cum);
1558                 /* fall through */
1559             case TCG_CALL_ARG_NORMAL:
1560                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1561                 break;
1562             case TCG_CALL_ARG_EXTEND:
1563                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1564                 layout_arg_1(&cum, info, kind);
1565                 break;
1566             default:
1567                 qemu_build_not_reached();
1568             }
1569             break;
1570 
1571         case TCG_TYPE_I64:
1572             switch (TCG_TARGET_CALL_ARG_I64) {
1573             case TCG_CALL_ARG_EVEN:
1574                 layout_arg_even(&cum);
1575                 /* fall through */
1576             case TCG_CALL_ARG_NORMAL:
1577                 if (TCG_TARGET_REG_BITS == 32) {
1578                     layout_arg_normal_n(&cum, info, 2);
1579                 } else {
1580                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1581                 }
1582                 break;
1583             default:
1584                 qemu_build_not_reached();
1585             }
1586             break;
1587 
1588         case TCG_TYPE_I128:
1589             switch (TCG_TARGET_CALL_ARG_I128) {
1590             case TCG_CALL_ARG_EVEN:
1591                 layout_arg_even(&cum);
1592                 /* fall through */
1593             case TCG_CALL_ARG_NORMAL:
1594                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1595                 break;
1596             case TCG_CALL_ARG_BY_REF:
1597                 layout_arg_by_ref(&cum, info);
1598                 break;
1599             default:
1600                 qemu_build_not_reached();
1601             }
1602             break;
1603 
1604         default:
1605             g_assert_not_reached();
1606         }
1607     }
1608     info->nr_in = cum.info_in_idx;
1609 
1610     /* Validate that we didn't overrun the input array. */
1611     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1612     /* Validate the backend has enough argument space. */
1613     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1614 
1615     /*
1616      * Relocate the "ref_slot" area to the end of the parameters.
1617      * Minimizing this stack offset helps code size for x86,
1618      * which has a signed 8-bit offset encoding.
1619      */
1620     if (cum.ref_slot != 0) {
1621         int ref_base = 0;
1622 
1623         if (cum.arg_slot > max_reg_slots) {
1624             int align = __alignof(Int128) / sizeof(tcg_target_long);
1625 
1626             ref_base = cum.arg_slot - max_reg_slots;
1627             if (align > 1) {
1628                 ref_base = ROUND_UP(ref_base, align);
1629             }
1630         }
1631         assert(ref_base + cum.ref_slot <= max_stk_slots);
1632         ref_base += max_reg_slots;
1633 
1634         if (ref_base != 0) {
1635             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1636                 TCGCallArgumentLoc *loc = &info->in[i];
1637                 switch (loc->kind) {
1638                 case TCG_CALL_ARG_BY_REF:
1639                 case TCG_CALL_ARG_BY_REF_N:
1640                     loc->ref_slot += ref_base;
1641                     break;
1642                 default:
1643                     break;
1644                 }
1645             }
1646         }
1647     }
1648 }
1649 
1650 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1651 static void process_constraint_sets(void);
1652 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1653                                             TCGReg reg, const char *name);
1654 
1655 static void tcg_context_init(unsigned max_threads)
1656 {
1657     TCGContext *s = &tcg_init_ctx;
1658     int n, i;
1659     TCGTemp *ts;
1660 
1661     memset(s, 0, sizeof(*s));
1662     s->nb_globals = 0;
1663 
1664     init_call_layout(&info_helper_ld32_mmu);
1665     init_call_layout(&info_helper_ld64_mmu);
1666     init_call_layout(&info_helper_ld128_mmu);
1667     init_call_layout(&info_helper_st32_mmu);
1668     init_call_layout(&info_helper_st64_mmu);
1669     init_call_layout(&info_helper_st128_mmu);
1670 
1671     tcg_target_init(s);
1672     process_constraint_sets();
1673 
1674     /* Reverse the order of the saved registers, assuming they're all at
1675        the start of tcg_target_reg_alloc_order.  */
1676     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1677         int r = tcg_target_reg_alloc_order[n];
1678         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1679             break;
1680         }
1681     }
1682     for (i = 0; i < n; ++i) {
1683         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1684     }
1685     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1686         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1687     }
1688 
1689     tcg_ctx = s;
1690     /*
1691      * In user-mode we simply share the init context among threads, since we
1692      * use a single region. See the documentation tcg_region_init() for the
1693      * reasoning behind this.
1694      * In system-mode we will have at most max_threads TCG threads.
1695      */
1696 #ifdef CONFIG_USER_ONLY
1697     tcg_ctxs = &tcg_ctx;
1698     tcg_cur_ctxs = 1;
1699     tcg_max_ctxs = 1;
1700 #else
1701     tcg_max_ctxs = max_threads;
1702     tcg_ctxs = g_new0(TCGContext *, max_threads);
1703 #endif
1704 
1705     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1706     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1707     tcg_env = temp_tcgv_ptr(ts);
1708 }
1709 
1710 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1711 {
1712     tcg_context_init(max_threads);
1713     tcg_region_init(tb_size, splitwx, max_threads);
1714 }
1715 
1716 /*
1717  * Allocate TBs right before their corresponding translated code, making
1718  * sure that TBs and code are on different cache lines.
1719  */
1720 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1721 {
1722     uintptr_t align = qemu_icache_linesize;
1723     TranslationBlock *tb;
1724     void *next;
1725 
1726  retry:
1727     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1728     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1729 
1730     if (unlikely(next > s->code_gen_highwater)) {
1731         if (tcg_region_alloc(s)) {
1732             return NULL;
1733         }
1734         goto retry;
1735     }
1736     qatomic_set(&s->code_gen_ptr, next);
1737     return tb;
1738 }
1739 
1740 void tcg_prologue_init(void)
1741 {
1742     TCGContext *s = tcg_ctx;
1743     size_t prologue_size;
1744 
1745     s->code_ptr = s->code_gen_ptr;
1746     s->code_buf = s->code_gen_ptr;
1747     s->data_gen_ptr = NULL;
1748 
1749 #ifndef CONFIG_TCG_INTERPRETER
1750     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1751 #endif
1752 
1753     s->pool_labels = NULL;
1754 
1755     qemu_thread_jit_write();
1756     /* Generate the prologue.  */
1757     tcg_target_qemu_prologue(s);
1758 
1759     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1760     {
1761         int result = tcg_out_pool_finalize(s);
1762         tcg_debug_assert(result == 0);
1763     }
1764 
1765     prologue_size = tcg_current_code_size(s);
1766     perf_report_prologue(s->code_gen_ptr, prologue_size);
1767 
1768 #ifndef CONFIG_TCG_INTERPRETER
1769     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1770                         (uintptr_t)s->code_buf, prologue_size);
1771 #endif
1772 
1773     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1774         FILE *logfile = qemu_log_trylock();
1775         if (logfile) {
1776             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1777             if (s->data_gen_ptr) {
1778                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1779                 size_t data_size = prologue_size - code_size;
1780                 size_t i;
1781 
1782                 disas(logfile, s->code_gen_ptr, code_size);
1783 
1784                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1785                     if (sizeof(tcg_target_ulong) == 8) {
1786                         fprintf(logfile,
1787                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1788                                 (uintptr_t)s->data_gen_ptr + i,
1789                                 *(uint64_t *)(s->data_gen_ptr + i));
1790                     } else {
1791                         fprintf(logfile,
1792                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1793                                 (uintptr_t)s->data_gen_ptr + i,
1794                                 *(uint32_t *)(s->data_gen_ptr + i));
1795                     }
1796                 }
1797             } else {
1798                 disas(logfile, s->code_gen_ptr, prologue_size);
1799             }
1800             fprintf(logfile, "\n");
1801             qemu_log_unlock(logfile);
1802         }
1803     }
1804 
1805 #ifndef CONFIG_TCG_INTERPRETER
1806     /*
1807      * Assert that goto_ptr is implemented completely, setting an epilogue.
1808      * For tci, we use NULL as the signal to return from the interpreter,
1809      * so skip this check.
1810      */
1811     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1812 #endif
1813 
1814     tcg_region_prologue_set(s);
1815 }
1816 
1817 void tcg_func_start(TCGContext *s)
1818 {
1819     tcg_pool_reset(s);
1820     s->nb_temps = s->nb_globals;
1821 
1822     /* No temps have been previously allocated for size or locality.  */
1823     tcg_temp_ebb_reset_freed(s);
1824 
1825     /* No constant temps have been previously allocated. */
1826     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1827         if (s->const_table[i]) {
1828             g_hash_table_remove_all(s->const_table[i]);
1829         }
1830     }
1831 
1832     s->nb_ops = 0;
1833     s->nb_labels = 0;
1834     s->current_frame_offset = s->frame_start;
1835 
1836 #ifdef CONFIG_DEBUG_TCG
1837     s->goto_tb_issue_mask = 0;
1838 #endif
1839 
1840     QTAILQ_INIT(&s->ops);
1841     QTAILQ_INIT(&s->free_ops);
1842     s->emit_before_op = NULL;
1843     QSIMPLEQ_INIT(&s->labels);
1844 
1845     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1846     tcg_debug_assert(s->insn_start_words > 0);
1847 }
1848 
1849 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1850 {
1851     int n = s->nb_temps++;
1852 
1853     if (n >= TCG_MAX_TEMPS) {
1854         tcg_raise_tb_overflow(s);
1855     }
1856     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1857 }
1858 
1859 static TCGTemp *tcg_global_alloc(TCGContext *s)
1860 {
1861     TCGTemp *ts;
1862 
1863     tcg_debug_assert(s->nb_globals == s->nb_temps);
1864     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1865     s->nb_globals++;
1866     ts = tcg_temp_alloc(s);
1867     ts->kind = TEMP_GLOBAL;
1868 
1869     return ts;
1870 }
1871 
1872 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1873                                             TCGReg reg, const char *name)
1874 {
1875     TCGTemp *ts;
1876 
1877     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1878 
1879     ts = tcg_global_alloc(s);
1880     ts->base_type = type;
1881     ts->type = type;
1882     ts->kind = TEMP_FIXED;
1883     ts->reg = reg;
1884     ts->name = name;
1885     tcg_regset_set_reg(s->reserved_regs, reg);
1886 
1887     return ts;
1888 }
1889 
1890 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1891 {
1892     s->frame_start = start;
1893     s->frame_end = start + size;
1894     s->frame_temp
1895         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1896 }
1897 
1898 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1899                                             const char *name, TCGType type)
1900 {
1901     TCGContext *s = tcg_ctx;
1902     TCGTemp *base_ts = tcgv_ptr_temp(base);
1903     TCGTemp *ts = tcg_global_alloc(s);
1904     int indirect_reg = 0;
1905 
1906     switch (base_ts->kind) {
1907     case TEMP_FIXED:
1908         break;
1909     case TEMP_GLOBAL:
1910         /* We do not support double-indirect registers.  */
1911         tcg_debug_assert(!base_ts->indirect_reg);
1912         base_ts->indirect_base = 1;
1913         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1914                             ? 2 : 1);
1915         indirect_reg = 1;
1916         break;
1917     default:
1918         g_assert_not_reached();
1919     }
1920 
1921     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1922         TCGTemp *ts2 = tcg_global_alloc(s);
1923         char buf[64];
1924 
1925         ts->base_type = TCG_TYPE_I64;
1926         ts->type = TCG_TYPE_I32;
1927         ts->indirect_reg = indirect_reg;
1928         ts->mem_allocated = 1;
1929         ts->mem_base = base_ts;
1930         ts->mem_offset = offset;
1931         pstrcpy(buf, sizeof(buf), name);
1932         pstrcat(buf, sizeof(buf), "_0");
1933         ts->name = strdup(buf);
1934 
1935         tcg_debug_assert(ts2 == ts + 1);
1936         ts2->base_type = TCG_TYPE_I64;
1937         ts2->type = TCG_TYPE_I32;
1938         ts2->indirect_reg = indirect_reg;
1939         ts2->mem_allocated = 1;
1940         ts2->mem_base = base_ts;
1941         ts2->mem_offset = offset + 4;
1942         ts2->temp_subindex = 1;
1943         pstrcpy(buf, sizeof(buf), name);
1944         pstrcat(buf, sizeof(buf), "_1");
1945         ts2->name = strdup(buf);
1946     } else {
1947         ts->base_type = type;
1948         ts->type = type;
1949         ts->indirect_reg = indirect_reg;
1950         ts->mem_allocated = 1;
1951         ts->mem_base = base_ts;
1952         ts->mem_offset = offset;
1953         ts->name = name;
1954     }
1955     return ts;
1956 }
1957 
1958 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1959 {
1960     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1961     return temp_tcgv_i32(ts);
1962 }
1963 
1964 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1965 {
1966     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1967     return temp_tcgv_i64(ts);
1968 }
1969 
1970 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1971 {
1972     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1973     return temp_tcgv_ptr(ts);
1974 }
1975 
1976 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1977 {
1978     TCGContext *s = tcg_ctx;
1979     TCGTemp *ts;
1980     int n;
1981 
1982     if (kind == TEMP_EBB) {
1983         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1984 
1985         if (idx < TCG_MAX_TEMPS) {
1986             /* There is already an available temp with the right type.  */
1987             clear_bit(idx, s->free_temps[type].l);
1988 
1989             ts = &s->temps[idx];
1990             ts->temp_allocated = 1;
1991             tcg_debug_assert(ts->base_type == type);
1992             tcg_debug_assert(ts->kind == kind);
1993             return ts;
1994         }
1995     } else {
1996         tcg_debug_assert(kind == TEMP_TB);
1997     }
1998 
1999     switch (type) {
2000     case TCG_TYPE_I32:
2001     case TCG_TYPE_V64:
2002     case TCG_TYPE_V128:
2003     case TCG_TYPE_V256:
2004         n = 1;
2005         break;
2006     case TCG_TYPE_I64:
2007         n = 64 / TCG_TARGET_REG_BITS;
2008         break;
2009     case TCG_TYPE_I128:
2010         n = 128 / TCG_TARGET_REG_BITS;
2011         break;
2012     default:
2013         g_assert_not_reached();
2014     }
2015 
2016     ts = tcg_temp_alloc(s);
2017     ts->base_type = type;
2018     ts->temp_allocated = 1;
2019     ts->kind = kind;
2020 
2021     if (n == 1) {
2022         ts->type = type;
2023     } else {
2024         ts->type = TCG_TYPE_REG;
2025 
2026         for (int i = 1; i < n; ++i) {
2027             TCGTemp *ts2 = tcg_temp_alloc(s);
2028 
2029             tcg_debug_assert(ts2 == ts + i);
2030             ts2->base_type = type;
2031             ts2->type = TCG_TYPE_REG;
2032             ts2->temp_allocated = 1;
2033             ts2->temp_subindex = i;
2034             ts2->kind = kind;
2035         }
2036     }
2037     return ts;
2038 }
2039 
2040 TCGv_i32 tcg_temp_new_i32(void)
2041 {
2042     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
2043 }
2044 
2045 TCGv_i32 tcg_temp_ebb_new_i32(void)
2046 {
2047     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
2048 }
2049 
2050 TCGv_i64 tcg_temp_new_i64(void)
2051 {
2052     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
2053 }
2054 
2055 TCGv_i64 tcg_temp_ebb_new_i64(void)
2056 {
2057     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
2058 }
2059 
2060 TCGv_ptr tcg_temp_new_ptr(void)
2061 {
2062     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
2063 }
2064 
2065 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2066 {
2067     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2068 }
2069 
2070 TCGv_i128 tcg_temp_new_i128(void)
2071 {
2072     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2073 }
2074 
2075 TCGv_i128 tcg_temp_ebb_new_i128(void)
2076 {
2077     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2078 }
2079 
2080 TCGv_vec tcg_temp_new_vec(TCGType type)
2081 {
2082     TCGTemp *t;
2083 
2084 #ifdef CONFIG_DEBUG_TCG
2085     switch (type) {
2086     case TCG_TYPE_V64:
2087         assert(TCG_TARGET_HAS_v64);
2088         break;
2089     case TCG_TYPE_V128:
2090         assert(TCG_TARGET_HAS_v128);
2091         break;
2092     case TCG_TYPE_V256:
2093         assert(TCG_TARGET_HAS_v256);
2094         break;
2095     default:
2096         g_assert_not_reached();
2097     }
2098 #endif
2099 
2100     t = tcg_temp_new_internal(type, TEMP_EBB);
2101     return temp_tcgv_vec(t);
2102 }
2103 
2104 /* Create a new temp of the same type as an existing temp.  */
2105 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2106 {
2107     TCGTemp *t = tcgv_vec_temp(match);
2108 
2109     tcg_debug_assert(t->temp_allocated != 0);
2110 
2111     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2112     return temp_tcgv_vec(t);
2113 }
2114 
2115 void tcg_temp_free_internal(TCGTemp *ts)
2116 {
2117     TCGContext *s = tcg_ctx;
2118 
2119     switch (ts->kind) {
2120     case TEMP_CONST:
2121     case TEMP_TB:
2122         /* Silently ignore free. */
2123         break;
2124     case TEMP_EBB:
2125         tcg_debug_assert(ts->temp_allocated != 0);
2126         ts->temp_allocated = 0;
2127         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2128         break;
2129     default:
2130         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2131         g_assert_not_reached();
2132     }
2133 }
2134 
2135 void tcg_temp_free_i32(TCGv_i32 arg)
2136 {
2137     tcg_temp_free_internal(tcgv_i32_temp(arg));
2138 }
2139 
2140 void tcg_temp_free_i64(TCGv_i64 arg)
2141 {
2142     tcg_temp_free_internal(tcgv_i64_temp(arg));
2143 }
2144 
2145 void tcg_temp_free_i128(TCGv_i128 arg)
2146 {
2147     tcg_temp_free_internal(tcgv_i128_temp(arg));
2148 }
2149 
2150 void tcg_temp_free_ptr(TCGv_ptr arg)
2151 {
2152     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2153 }
2154 
2155 void tcg_temp_free_vec(TCGv_vec arg)
2156 {
2157     tcg_temp_free_internal(tcgv_vec_temp(arg));
2158 }
2159 
2160 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2161 {
2162     TCGContext *s = tcg_ctx;
2163     GHashTable *h = s->const_table[type];
2164     TCGTemp *ts;
2165 
2166     if (h == NULL) {
2167         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2168         s->const_table[type] = h;
2169     }
2170 
2171     ts = g_hash_table_lookup(h, &val);
2172     if (ts == NULL) {
2173         int64_t *val_ptr;
2174 
2175         ts = tcg_temp_alloc(s);
2176 
2177         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2178             TCGTemp *ts2 = tcg_temp_alloc(s);
2179 
2180             tcg_debug_assert(ts2 == ts + 1);
2181 
2182             ts->base_type = TCG_TYPE_I64;
2183             ts->type = TCG_TYPE_I32;
2184             ts->kind = TEMP_CONST;
2185             ts->temp_allocated = 1;
2186 
2187             ts2->base_type = TCG_TYPE_I64;
2188             ts2->type = TCG_TYPE_I32;
2189             ts2->kind = TEMP_CONST;
2190             ts2->temp_allocated = 1;
2191             ts2->temp_subindex = 1;
2192 
2193             /*
2194              * Retain the full value of the 64-bit constant in the low
2195              * part, so that the hash table works.  Actual uses will
2196              * truncate the value to the low part.
2197              */
2198             ts[HOST_BIG_ENDIAN].val = val;
2199             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2200             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2201         } else {
2202             ts->base_type = type;
2203             ts->type = type;
2204             ts->kind = TEMP_CONST;
2205             ts->temp_allocated = 1;
2206             ts->val = val;
2207             val_ptr = &ts->val;
2208         }
2209         g_hash_table_insert(h, val_ptr, ts);
2210     }
2211 
2212     return ts;
2213 }
2214 
2215 TCGv_i32 tcg_constant_i32(int32_t val)
2216 {
2217     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2218 }
2219 
2220 TCGv_i64 tcg_constant_i64(int64_t val)
2221 {
2222     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2223 }
2224 
2225 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2226 {
2227     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2228 }
2229 
2230 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2231 {
2232     val = dup_const(vece, val);
2233     return temp_tcgv_vec(tcg_constant_internal(type, val));
2234 }
2235 
2236 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2237 {
2238     TCGTemp *t = tcgv_vec_temp(match);
2239 
2240     tcg_debug_assert(t->temp_allocated != 0);
2241     return tcg_constant_vec(t->base_type, vece, val);
2242 }
2243 
2244 #ifdef CONFIG_DEBUG_TCG
2245 size_t temp_idx(TCGTemp *ts)
2246 {
2247     ptrdiff_t n = ts - tcg_ctx->temps;
2248     assert(n >= 0 && n < tcg_ctx->nb_temps);
2249     return n;
2250 }
2251 
2252 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2253 {
2254     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2255 
2256     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2257     assert(o % sizeof(TCGTemp) == 0);
2258 
2259     return (void *)tcg_ctx + (uintptr_t)v;
2260 }
2261 #endif /* CONFIG_DEBUG_TCG */
2262 
2263 /*
2264  * Return true if OP may appear in the opcode stream with TYPE.
2265  * Test the runtime variable that controls each opcode.
2266  */
2267 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2268 {
2269     bool has_type;
2270 
2271     switch (type) {
2272     case TCG_TYPE_I32:
2273         has_type = true;
2274         break;
2275     case TCG_TYPE_I64:
2276         has_type = TCG_TARGET_REG_BITS == 64;
2277         break;
2278     case TCG_TYPE_V64:
2279         has_type = TCG_TARGET_HAS_v64;
2280         break;
2281     case TCG_TYPE_V128:
2282         has_type = TCG_TARGET_HAS_v128;
2283         break;
2284     case TCG_TYPE_V256:
2285         has_type = TCG_TARGET_HAS_v256;
2286         break;
2287     default:
2288         has_type = false;
2289         break;
2290     }
2291 
2292     switch (op) {
2293     case INDEX_op_discard:
2294     case INDEX_op_set_label:
2295     case INDEX_op_call:
2296     case INDEX_op_br:
2297     case INDEX_op_mb:
2298     case INDEX_op_insn_start:
2299     case INDEX_op_exit_tb:
2300     case INDEX_op_goto_tb:
2301     case INDEX_op_goto_ptr:
2302     case INDEX_op_qemu_ld_i32:
2303     case INDEX_op_qemu_st_i32:
2304     case INDEX_op_qemu_ld_i64:
2305     case INDEX_op_qemu_st_i64:
2306         return true;
2307 
2308     case INDEX_op_qemu_st8_i32:
2309         return TCG_TARGET_HAS_qemu_st8_i32;
2310 
2311     case INDEX_op_qemu_ld_i128:
2312     case INDEX_op_qemu_st_i128:
2313         return TCG_TARGET_HAS_qemu_ldst_i128;
2314 
2315     case INDEX_op_add:
2316     case INDEX_op_and:
2317     case INDEX_op_brcond:
2318     case INDEX_op_mov:
2319     case INDEX_op_movcond:
2320     case INDEX_op_negsetcond:
2321     case INDEX_op_or:
2322     case INDEX_op_setcond:
2323     case INDEX_op_xor:
2324         return has_type;
2325 
2326     case INDEX_op_ld8u_i32:
2327     case INDEX_op_ld8s_i32:
2328     case INDEX_op_ld16u_i32:
2329     case INDEX_op_ld16s_i32:
2330     case INDEX_op_ld_i32:
2331     case INDEX_op_st8_i32:
2332     case INDEX_op_st16_i32:
2333     case INDEX_op_st_i32:
2334     case INDEX_op_extract_i32:
2335     case INDEX_op_sextract_i32:
2336     case INDEX_op_deposit_i32:
2337         return true;
2338 
2339     case INDEX_op_extract2_i32:
2340         return TCG_TARGET_HAS_extract2_i32;
2341     case INDEX_op_add2_i32:
2342         return TCG_TARGET_HAS_add2_i32;
2343     case INDEX_op_sub2_i32:
2344         return TCG_TARGET_HAS_sub2_i32;
2345     case INDEX_op_bswap32_i32:
2346         return TCG_TARGET_HAS_bswap32_i32;
2347 
2348     case INDEX_op_brcond2_i32:
2349     case INDEX_op_setcond2_i32:
2350         return TCG_TARGET_REG_BITS == 32;
2351 
2352     case INDEX_op_ld8u_i64:
2353     case INDEX_op_ld8s_i64:
2354     case INDEX_op_ld16u_i64:
2355     case INDEX_op_ld16s_i64:
2356     case INDEX_op_ld32u_i64:
2357     case INDEX_op_ld32s_i64:
2358     case INDEX_op_ld_i64:
2359     case INDEX_op_st8_i64:
2360     case INDEX_op_st16_i64:
2361     case INDEX_op_st32_i64:
2362     case INDEX_op_st_i64:
2363     case INDEX_op_ext_i32_i64:
2364     case INDEX_op_extu_i32_i64:
2365     case INDEX_op_extract_i64:
2366     case INDEX_op_sextract_i64:
2367     case INDEX_op_deposit_i64:
2368         return TCG_TARGET_REG_BITS == 64;
2369 
2370     case INDEX_op_extract2_i64:
2371         return TCG_TARGET_HAS_extract2_i64;
2372     case INDEX_op_extrl_i64_i32:
2373     case INDEX_op_extrh_i64_i32:
2374         return TCG_TARGET_HAS_extr_i64_i32;
2375     case INDEX_op_bswap32_i64:
2376         return TCG_TARGET_HAS_bswap32_i64;
2377     case INDEX_op_bswap64_i64:
2378         return TCG_TARGET_HAS_bswap64_i64;
2379     case INDEX_op_add2_i64:
2380         return TCG_TARGET_HAS_add2_i64;
2381     case INDEX_op_sub2_i64:
2382         return TCG_TARGET_HAS_sub2_i64;
2383 
2384     case INDEX_op_mov_vec:
2385     case INDEX_op_dup_vec:
2386     case INDEX_op_dupm_vec:
2387     case INDEX_op_ld_vec:
2388     case INDEX_op_st_vec:
2389     case INDEX_op_add_vec:
2390     case INDEX_op_sub_vec:
2391     case INDEX_op_and_vec:
2392     case INDEX_op_or_vec:
2393     case INDEX_op_xor_vec:
2394     case INDEX_op_cmp_vec:
2395         return has_type;
2396     case INDEX_op_dup2_vec:
2397         return has_type && TCG_TARGET_REG_BITS == 32;
2398     case INDEX_op_not_vec:
2399         return has_type && TCG_TARGET_HAS_not_vec;
2400     case INDEX_op_neg_vec:
2401         return has_type && TCG_TARGET_HAS_neg_vec;
2402     case INDEX_op_abs_vec:
2403         return has_type && TCG_TARGET_HAS_abs_vec;
2404     case INDEX_op_andc_vec:
2405         return has_type && TCG_TARGET_HAS_andc_vec;
2406     case INDEX_op_orc_vec:
2407         return has_type && TCG_TARGET_HAS_orc_vec;
2408     case INDEX_op_nand_vec:
2409         return has_type && TCG_TARGET_HAS_nand_vec;
2410     case INDEX_op_nor_vec:
2411         return has_type && TCG_TARGET_HAS_nor_vec;
2412     case INDEX_op_eqv_vec:
2413         return has_type && TCG_TARGET_HAS_eqv_vec;
2414     case INDEX_op_mul_vec:
2415         return has_type && TCG_TARGET_HAS_mul_vec;
2416     case INDEX_op_shli_vec:
2417     case INDEX_op_shri_vec:
2418     case INDEX_op_sari_vec:
2419         return has_type && TCG_TARGET_HAS_shi_vec;
2420     case INDEX_op_shls_vec:
2421     case INDEX_op_shrs_vec:
2422     case INDEX_op_sars_vec:
2423         return has_type && TCG_TARGET_HAS_shs_vec;
2424     case INDEX_op_shlv_vec:
2425     case INDEX_op_shrv_vec:
2426     case INDEX_op_sarv_vec:
2427         return has_type && TCG_TARGET_HAS_shv_vec;
2428     case INDEX_op_rotli_vec:
2429         return has_type && TCG_TARGET_HAS_roti_vec;
2430     case INDEX_op_rotls_vec:
2431         return has_type && TCG_TARGET_HAS_rots_vec;
2432     case INDEX_op_rotlv_vec:
2433     case INDEX_op_rotrv_vec:
2434         return has_type && TCG_TARGET_HAS_rotv_vec;
2435     case INDEX_op_ssadd_vec:
2436     case INDEX_op_usadd_vec:
2437     case INDEX_op_sssub_vec:
2438     case INDEX_op_ussub_vec:
2439         return has_type && TCG_TARGET_HAS_sat_vec;
2440     case INDEX_op_smin_vec:
2441     case INDEX_op_umin_vec:
2442     case INDEX_op_smax_vec:
2443     case INDEX_op_umax_vec:
2444         return has_type && TCG_TARGET_HAS_minmax_vec;
2445     case INDEX_op_bitsel_vec:
2446         return has_type && TCG_TARGET_HAS_bitsel_vec;
2447     case INDEX_op_cmpsel_vec:
2448         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2449 
2450     default:
2451         if (op < INDEX_op_last_generic) {
2452             const TCGOutOp *outop;
2453             TCGConstraintSetIndex con_set;
2454 
2455             if (!has_type) {
2456                 return false;
2457             }
2458 
2459             outop = all_outop[op];
2460             tcg_debug_assert(outop != NULL);
2461 
2462             con_set = outop->static_constraint;
2463             if (con_set == C_Dynamic) {
2464                 con_set = outop->dynamic_constraint(type, flags);
2465             }
2466             if (con_set >= 0) {
2467                 return true;
2468             }
2469             tcg_debug_assert(con_set == C_NotImplemented);
2470             return false;
2471         }
2472         tcg_debug_assert(op < NB_OPS);
2473         return true;
2474 
2475     case INDEX_op_last_generic:
2476         g_assert_not_reached();
2477     }
2478 }
2479 
2480 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2481 {
2482     unsigned width;
2483 
2484     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2485     width = (type == TCG_TYPE_I32 ? 32 : 64);
2486 
2487     tcg_debug_assert(ofs < width);
2488     tcg_debug_assert(len > 0);
2489     tcg_debug_assert(len <= width - ofs);
2490 
2491     return TCG_TARGET_deposit_valid(type, ofs, len);
2492 }
2493 
2494 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2495 
2496 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2497                           TCGTemp *ret, TCGTemp **args)
2498 {
2499     TCGv_i64 extend_free[MAX_CALL_IARGS];
2500     int n_extend = 0;
2501     TCGOp *op;
2502     int i, n, pi = 0, total_args;
2503 
2504     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2505         init_call_layout(info);
2506         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2507     }
2508 
2509     total_args = info->nr_out + info->nr_in + 2;
2510     op = tcg_op_alloc(INDEX_op_call, total_args);
2511 
2512 #ifdef CONFIG_PLUGIN
2513     /* Flag helpers that may affect guest state */
2514     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2515         tcg_ctx->plugin_insn->calls_helpers = true;
2516     }
2517 #endif
2518 
2519     TCGOP_CALLO(op) = n = info->nr_out;
2520     switch (n) {
2521     case 0:
2522         tcg_debug_assert(ret == NULL);
2523         break;
2524     case 1:
2525         tcg_debug_assert(ret != NULL);
2526         op->args[pi++] = temp_arg(ret);
2527         break;
2528     case 2:
2529     case 4:
2530         tcg_debug_assert(ret != NULL);
2531         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2532         tcg_debug_assert(ret->temp_subindex == 0);
2533         for (i = 0; i < n; ++i) {
2534             op->args[pi++] = temp_arg(ret + i);
2535         }
2536         break;
2537     default:
2538         g_assert_not_reached();
2539     }
2540 
2541     TCGOP_CALLI(op) = n = info->nr_in;
2542     for (i = 0; i < n; i++) {
2543         const TCGCallArgumentLoc *loc = &info->in[i];
2544         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2545 
2546         switch (loc->kind) {
2547         case TCG_CALL_ARG_NORMAL:
2548         case TCG_CALL_ARG_BY_REF:
2549         case TCG_CALL_ARG_BY_REF_N:
2550             op->args[pi++] = temp_arg(ts);
2551             break;
2552 
2553         case TCG_CALL_ARG_EXTEND_U:
2554         case TCG_CALL_ARG_EXTEND_S:
2555             {
2556                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2557                 TCGv_i32 orig = temp_tcgv_i32(ts);
2558 
2559                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2560                     tcg_gen_ext_i32_i64(temp, orig);
2561                 } else {
2562                     tcg_gen_extu_i32_i64(temp, orig);
2563                 }
2564                 op->args[pi++] = tcgv_i64_arg(temp);
2565                 extend_free[n_extend++] = temp;
2566             }
2567             break;
2568 
2569         default:
2570             g_assert_not_reached();
2571         }
2572     }
2573     op->args[pi++] = (uintptr_t)func;
2574     op->args[pi++] = (uintptr_t)info;
2575     tcg_debug_assert(pi == total_args);
2576 
2577     if (tcg_ctx->emit_before_op) {
2578         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2579     } else {
2580         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2581     }
2582 
2583     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2584     for (i = 0; i < n_extend; ++i) {
2585         tcg_temp_free_i64(extend_free[i]);
2586     }
2587 }
2588 
2589 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2590 {
2591     tcg_gen_callN(func, info, ret, NULL);
2592 }
2593 
2594 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2595 {
2596     tcg_gen_callN(func, info, ret, &t1);
2597 }
2598 
2599 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2600                    TCGTemp *t1, TCGTemp *t2)
2601 {
2602     TCGTemp *args[2] = { t1, t2 };
2603     tcg_gen_callN(func, info, ret, args);
2604 }
2605 
2606 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2607                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2608 {
2609     TCGTemp *args[3] = { t1, t2, t3 };
2610     tcg_gen_callN(func, info, ret, args);
2611 }
2612 
2613 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2614                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2615 {
2616     TCGTemp *args[4] = { t1, t2, t3, t4 };
2617     tcg_gen_callN(func, info, ret, args);
2618 }
2619 
2620 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2621                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2622 {
2623     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2624     tcg_gen_callN(func, info, ret, args);
2625 }
2626 
2627 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2628                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2629                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2630 {
2631     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2632     tcg_gen_callN(func, info, ret, args);
2633 }
2634 
2635 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2636                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2637                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2638 {
2639     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2640     tcg_gen_callN(func, info, ret, args);
2641 }
2642 
2643 static void tcg_reg_alloc_start(TCGContext *s)
2644 {
2645     int i, n;
2646 
2647     for (i = 0, n = s->nb_temps; i < n; i++) {
2648         TCGTemp *ts = &s->temps[i];
2649         TCGTempVal val = TEMP_VAL_MEM;
2650 
2651         switch (ts->kind) {
2652         case TEMP_CONST:
2653             val = TEMP_VAL_CONST;
2654             break;
2655         case TEMP_FIXED:
2656             val = TEMP_VAL_REG;
2657             break;
2658         case TEMP_GLOBAL:
2659             break;
2660         case TEMP_EBB:
2661             val = TEMP_VAL_DEAD;
2662             /* fall through */
2663         case TEMP_TB:
2664             ts->mem_allocated = 0;
2665             break;
2666         default:
2667             g_assert_not_reached();
2668         }
2669         ts->val_type = val;
2670     }
2671 
2672     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2673 }
2674 
2675 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2676                                  TCGTemp *ts)
2677 {
2678     int idx = temp_idx(ts);
2679 
2680     switch (ts->kind) {
2681     case TEMP_FIXED:
2682     case TEMP_GLOBAL:
2683         pstrcpy(buf, buf_size, ts->name);
2684         break;
2685     case TEMP_TB:
2686         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2687         break;
2688     case TEMP_EBB:
2689         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2690         break;
2691     case TEMP_CONST:
2692         switch (ts->type) {
2693         case TCG_TYPE_I32:
2694             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2695             break;
2696 #if TCG_TARGET_REG_BITS > 32
2697         case TCG_TYPE_I64:
2698             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2699             break;
2700 #endif
2701         case TCG_TYPE_V64:
2702         case TCG_TYPE_V128:
2703         case TCG_TYPE_V256:
2704             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2705                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2706             break;
2707         default:
2708             g_assert_not_reached();
2709         }
2710         break;
2711     }
2712     return buf;
2713 }
2714 
2715 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2716                              int buf_size, TCGArg arg)
2717 {
2718     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2719 }
2720 
2721 static const char * const cond_name[] =
2722 {
2723     [TCG_COND_NEVER] = "never",
2724     [TCG_COND_ALWAYS] = "always",
2725     [TCG_COND_EQ] = "eq",
2726     [TCG_COND_NE] = "ne",
2727     [TCG_COND_LT] = "lt",
2728     [TCG_COND_GE] = "ge",
2729     [TCG_COND_LE] = "le",
2730     [TCG_COND_GT] = "gt",
2731     [TCG_COND_LTU] = "ltu",
2732     [TCG_COND_GEU] = "geu",
2733     [TCG_COND_LEU] = "leu",
2734     [TCG_COND_GTU] = "gtu",
2735     [TCG_COND_TSTEQ] = "tsteq",
2736     [TCG_COND_TSTNE] = "tstne",
2737 };
2738 
2739 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2740 {
2741     [MO_UB]   = "ub",
2742     [MO_SB]   = "sb",
2743     [MO_LEUW] = "leuw",
2744     [MO_LESW] = "lesw",
2745     [MO_LEUL] = "leul",
2746     [MO_LESL] = "lesl",
2747     [MO_LEUQ] = "leq",
2748     [MO_BEUW] = "beuw",
2749     [MO_BESW] = "besw",
2750     [MO_BEUL] = "beul",
2751     [MO_BESL] = "besl",
2752     [MO_BEUQ] = "beq",
2753     [MO_128 + MO_BE] = "beo",
2754     [MO_128 + MO_LE] = "leo",
2755 };
2756 
2757 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2758     [MO_UNALN >> MO_ASHIFT]    = "un+",
2759     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2760     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2761     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2762     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2763     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2764     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2765     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2766 };
2767 
2768 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2769     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2770     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2771     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2772     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2773     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2774     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2775 };
2776 
2777 static const char bswap_flag_name[][6] = {
2778     [TCG_BSWAP_IZ] = "iz",
2779     [TCG_BSWAP_OZ] = "oz",
2780     [TCG_BSWAP_OS] = "os",
2781     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2782     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2783 };
2784 
2785 #ifdef CONFIG_PLUGIN
2786 static const char * const plugin_from_name[] = {
2787     "from-tb",
2788     "from-insn",
2789     "after-insn",
2790     "after-tb",
2791 };
2792 #endif
2793 
2794 static inline bool tcg_regset_single(TCGRegSet d)
2795 {
2796     return (d & (d - 1)) == 0;
2797 }
2798 
2799 static inline TCGReg tcg_regset_first(TCGRegSet d)
2800 {
2801     if (TCG_TARGET_NB_REGS <= 32) {
2802         return ctz32(d);
2803     } else {
2804         return ctz64(d);
2805     }
2806 }
2807 
2808 /* Return only the number of characters output -- no error return. */
2809 #define ne_fprintf(...) \
2810     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2811 
2812 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2813 {
2814     char buf[128];
2815     TCGOp *op;
2816 
2817     QTAILQ_FOREACH(op, &s->ops, link) {
2818         int i, k, nb_oargs, nb_iargs, nb_cargs;
2819         const TCGOpDef *def;
2820         TCGOpcode c;
2821         int col = 0;
2822 
2823         c = op->opc;
2824         def = &tcg_op_defs[c];
2825 
2826         if (c == INDEX_op_insn_start) {
2827             nb_oargs = 0;
2828             col += ne_fprintf(f, "\n ----");
2829 
2830             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2831                 col += ne_fprintf(f, " %016" PRIx64,
2832                                   tcg_get_insn_start_param(op, i));
2833             }
2834         } else if (c == INDEX_op_call) {
2835             const TCGHelperInfo *info = tcg_call_info(op);
2836             void *func = tcg_call_func(op);
2837 
2838             /* variable number of arguments */
2839             nb_oargs = TCGOP_CALLO(op);
2840             nb_iargs = TCGOP_CALLI(op);
2841             nb_cargs = def->nb_cargs;
2842 
2843             col += ne_fprintf(f, " %s ", def->name);
2844 
2845             /*
2846              * Print the function name from TCGHelperInfo, if available.
2847              * Note that plugins have a template function for the info,
2848              * but the actual function pointer comes from the plugin.
2849              */
2850             if (func == info->func) {
2851                 col += ne_fprintf(f, "%s", info->name);
2852             } else {
2853                 col += ne_fprintf(f, "plugin(%p)", func);
2854             }
2855 
2856             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2857             for (i = 0; i < nb_oargs; i++) {
2858                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2859                                                             op->args[i]));
2860             }
2861             for (i = 0; i < nb_iargs; i++) {
2862                 TCGArg arg = op->args[nb_oargs + i];
2863                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2864                 col += ne_fprintf(f, ",%s", t);
2865             }
2866         } else {
2867             if (def->flags & TCG_OPF_INT) {
2868                 col += ne_fprintf(f, " %s_i%d ",
2869                                   def->name,
2870                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2871             } else if (def->flags & TCG_OPF_VECTOR) {
2872                 col += ne_fprintf(f, "%s v%d,e%d,",
2873                                   def->name,
2874                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2875                                   8 << TCGOP_VECE(op));
2876             } else {
2877                 col += ne_fprintf(f, " %s ", def->name);
2878             }
2879 
2880             nb_oargs = def->nb_oargs;
2881             nb_iargs = def->nb_iargs;
2882             nb_cargs = def->nb_cargs;
2883 
2884             k = 0;
2885             for (i = 0; i < nb_oargs; i++) {
2886                 const char *sep =  k ? "," : "";
2887                 col += ne_fprintf(f, "%s%s", sep,
2888                                   tcg_get_arg_str(s, buf, sizeof(buf),
2889                                                   op->args[k++]));
2890             }
2891             for (i = 0; i < nb_iargs; i++) {
2892                 const char *sep =  k ? "," : "";
2893                 col += ne_fprintf(f, "%s%s", sep,
2894                                   tcg_get_arg_str(s, buf, sizeof(buf),
2895                                                   op->args[k++]));
2896             }
2897             switch (c) {
2898             case INDEX_op_brcond:
2899             case INDEX_op_setcond:
2900             case INDEX_op_negsetcond:
2901             case INDEX_op_movcond:
2902             case INDEX_op_brcond2_i32:
2903             case INDEX_op_setcond2_i32:
2904             case INDEX_op_cmp_vec:
2905             case INDEX_op_cmpsel_vec:
2906                 if (op->args[k] < ARRAY_SIZE(cond_name)
2907                     && cond_name[op->args[k]]) {
2908                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2909                 } else {
2910                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2911                 }
2912                 i = 1;
2913                 break;
2914             case INDEX_op_qemu_ld_i32:
2915             case INDEX_op_qemu_st_i32:
2916             case INDEX_op_qemu_st8_i32:
2917             case INDEX_op_qemu_ld_i64:
2918             case INDEX_op_qemu_st_i64:
2919             case INDEX_op_qemu_ld_i128:
2920             case INDEX_op_qemu_st_i128:
2921                 {
2922                     const char *s_al, *s_op, *s_at;
2923                     MemOpIdx oi = op->args[k++];
2924                     MemOp mop = get_memop(oi);
2925                     unsigned ix = get_mmuidx(oi);
2926 
2927                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2928                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2929                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2930                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2931 
2932                     /* If all fields are accounted for, print symbolically. */
2933                     if (!mop && s_al && s_op && s_at) {
2934                         col += ne_fprintf(f, ",%s%s%s,%u",
2935                                           s_at, s_al, s_op, ix);
2936                     } else {
2937                         mop = get_memop(oi);
2938                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2939                     }
2940                     i = 1;
2941                 }
2942                 break;
2943             case INDEX_op_bswap16:
2944             case INDEX_op_bswap32_i32:
2945             case INDEX_op_bswap32_i64:
2946             case INDEX_op_bswap64_i64:
2947                 {
2948                     TCGArg flags = op->args[k];
2949                     const char *name = NULL;
2950 
2951                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2952                         name = bswap_flag_name[flags];
2953                     }
2954                     if (name) {
2955                         col += ne_fprintf(f, ",%s", name);
2956                     } else {
2957                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2958                     }
2959                     i = k = 1;
2960                 }
2961                 break;
2962 #ifdef CONFIG_PLUGIN
2963             case INDEX_op_plugin_cb:
2964                 {
2965                     TCGArg from = op->args[k++];
2966                     const char *name = NULL;
2967 
2968                     if (from < ARRAY_SIZE(plugin_from_name)) {
2969                         name = plugin_from_name[from];
2970                     }
2971                     if (name) {
2972                         col += ne_fprintf(f, "%s", name);
2973                     } else {
2974                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2975                     }
2976                     i = 1;
2977                 }
2978                 break;
2979 #endif
2980             default:
2981                 i = 0;
2982                 break;
2983             }
2984             switch (c) {
2985             case INDEX_op_set_label:
2986             case INDEX_op_br:
2987             case INDEX_op_brcond:
2988             case INDEX_op_brcond2_i32:
2989                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2990                                   arg_label(op->args[k])->id);
2991                 i++, k++;
2992                 break;
2993             case INDEX_op_mb:
2994                 {
2995                     TCGBar membar = op->args[k];
2996                     const char *b_op, *m_op;
2997 
2998                     switch (membar & TCG_BAR_SC) {
2999                     case 0:
3000                         b_op = "none";
3001                         break;
3002                     case TCG_BAR_LDAQ:
3003                         b_op = "acq";
3004                         break;
3005                     case TCG_BAR_STRL:
3006                         b_op = "rel";
3007                         break;
3008                     case TCG_BAR_SC:
3009                         b_op = "seq";
3010                         break;
3011                     default:
3012                         g_assert_not_reached();
3013                     }
3014 
3015                     switch (membar & TCG_MO_ALL) {
3016                     case 0:
3017                         m_op = "none";
3018                         break;
3019                     case TCG_MO_LD_LD:
3020                         m_op = "rr";
3021                         break;
3022                     case TCG_MO_LD_ST:
3023                         m_op = "rw";
3024                         break;
3025                     case TCG_MO_ST_LD:
3026                         m_op = "wr";
3027                         break;
3028                     case TCG_MO_ST_ST:
3029                         m_op = "ww";
3030                         break;
3031                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3032                         m_op = "rr+rw";
3033                         break;
3034                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3035                         m_op = "rr+wr";
3036                         break;
3037                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3038                         m_op = "rr+ww";
3039                         break;
3040                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3041                         m_op = "rw+wr";
3042                         break;
3043                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3044                         m_op = "rw+ww";
3045                         break;
3046                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3047                         m_op = "wr+ww";
3048                         break;
3049                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3050                         m_op = "rr+rw+wr";
3051                         break;
3052                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3053                         m_op = "rr+rw+ww";
3054                         break;
3055                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3056                         m_op = "rr+wr+ww";
3057                         break;
3058                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3059                         m_op = "rw+wr+ww";
3060                         break;
3061                     case TCG_MO_ALL:
3062                         m_op = "all";
3063                         break;
3064                     default:
3065                         g_assert_not_reached();
3066                     }
3067 
3068                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3069                     i++, k++;
3070                 }
3071                 break;
3072             default:
3073                 break;
3074             }
3075             for (; i < nb_cargs; i++, k++) {
3076                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3077                                   op->args[k]);
3078             }
3079         }
3080 
3081         if (have_prefs || op->life) {
3082             for (; col < 40; ++col) {
3083                 putc(' ', f);
3084             }
3085         }
3086 
3087         if (op->life) {
3088             unsigned life = op->life;
3089 
3090             if (life & (SYNC_ARG * 3)) {
3091                 ne_fprintf(f, "  sync:");
3092                 for (i = 0; i < 2; ++i) {
3093                     if (life & (SYNC_ARG << i)) {
3094                         ne_fprintf(f, " %d", i);
3095                     }
3096                 }
3097             }
3098             life /= DEAD_ARG;
3099             if (life) {
3100                 ne_fprintf(f, "  dead:");
3101                 for (i = 0; life; ++i, life >>= 1) {
3102                     if (life & 1) {
3103                         ne_fprintf(f, " %d", i);
3104                     }
3105                 }
3106             }
3107         }
3108 
3109         if (have_prefs) {
3110             for (i = 0; i < nb_oargs; ++i) {
3111                 TCGRegSet set = output_pref(op, i);
3112 
3113                 if (i == 0) {
3114                     ne_fprintf(f, "  pref=");
3115                 } else {
3116                     ne_fprintf(f, ",");
3117                 }
3118                 if (set == 0) {
3119                     ne_fprintf(f, "none");
3120                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3121                     ne_fprintf(f, "all");
3122 #ifdef CONFIG_DEBUG_TCG
3123                 } else if (tcg_regset_single(set)) {
3124                     TCGReg reg = tcg_regset_first(set);
3125                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3126 #endif
3127                 } else if (TCG_TARGET_NB_REGS <= 32) {
3128                     ne_fprintf(f, "0x%x", (uint32_t)set);
3129                 } else {
3130                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3131                 }
3132             }
3133         }
3134 
3135         putc('\n', f);
3136     }
3137 }
3138 
3139 /* we give more priority to constraints with less registers */
3140 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3141 {
3142     int n;
3143 
3144     arg_ct += k;
3145     n = ctpop64(arg_ct->regs);
3146 
3147     /*
3148      * Sort constraints of a single register first, which includes output
3149      * aliases (which must exactly match the input already allocated).
3150      */
3151     if (n == 1 || arg_ct->oalias) {
3152         return INT_MAX;
3153     }
3154 
3155     /*
3156      * Sort register pairs next, first then second immediately after.
3157      * Arbitrarily sort multiple pairs by the index of the first reg;
3158      * there shouldn't be many pairs.
3159      */
3160     switch (arg_ct->pair) {
3161     case 1:
3162     case 3:
3163         return (k + 1) * 2;
3164     case 2:
3165         return (arg_ct->pair_index + 1) * 2 - 1;
3166     }
3167 
3168     /* Finally, sort by decreasing register count. */
3169     assert(n > 1);
3170     return -n;
3171 }
3172 
3173 /* sort from highest priority to lowest */
3174 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3175 {
3176     int i, j;
3177 
3178     for (i = 0; i < n; i++) {
3179         a[start + i].sort_index = start + i;
3180     }
3181     if (n <= 1) {
3182         return;
3183     }
3184     for (i = 0; i < n - 1; i++) {
3185         for (j = i + 1; j < n; j++) {
3186             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3187             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3188             if (p1 < p2) {
3189                 int tmp = a[start + i].sort_index;
3190                 a[start + i].sort_index = a[start + j].sort_index;
3191                 a[start + j].sort_index = tmp;
3192             }
3193         }
3194     }
3195 }
3196 
3197 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3198 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3199 
3200 static void process_constraint_sets(void)
3201 {
3202     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3203         const TCGConstraintSet *tdefs = &constraint_sets[c];
3204         TCGArgConstraint *args_ct = all_cts[c];
3205         int nb_oargs = tdefs->nb_oargs;
3206         int nb_iargs = tdefs->nb_iargs;
3207         int nb_args = nb_oargs + nb_iargs;
3208         bool saw_alias_pair = false;
3209 
3210         for (int i = 0; i < nb_args; i++) {
3211             const char *ct_str = tdefs->args_ct_str[i];
3212             bool input_p = i >= nb_oargs;
3213             int o;
3214 
3215             switch (*ct_str) {
3216             case '0' ... '9':
3217                 o = *ct_str - '0';
3218                 tcg_debug_assert(input_p);
3219                 tcg_debug_assert(o < nb_oargs);
3220                 tcg_debug_assert(args_ct[o].regs != 0);
3221                 tcg_debug_assert(!args_ct[o].oalias);
3222                 args_ct[i] = args_ct[o];
3223                 /* The output sets oalias.  */
3224                 args_ct[o].oalias = 1;
3225                 args_ct[o].alias_index = i;
3226                 /* The input sets ialias. */
3227                 args_ct[i].ialias = 1;
3228                 args_ct[i].alias_index = o;
3229                 if (args_ct[i].pair) {
3230                     saw_alias_pair = true;
3231                 }
3232                 tcg_debug_assert(ct_str[1] == '\0');
3233                 continue;
3234 
3235             case '&':
3236                 tcg_debug_assert(!input_p);
3237                 args_ct[i].newreg = true;
3238                 ct_str++;
3239                 break;
3240 
3241             case 'p': /* plus */
3242                 /* Allocate to the register after the previous. */
3243                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3244                 o = i - 1;
3245                 tcg_debug_assert(!args_ct[o].pair);
3246                 tcg_debug_assert(!args_ct[o].ct);
3247                 args_ct[i] = (TCGArgConstraint){
3248                     .pair = 2,
3249                     .pair_index = o,
3250                     .regs = args_ct[o].regs << 1,
3251                     .newreg = args_ct[o].newreg,
3252                 };
3253                 args_ct[o].pair = 1;
3254                 args_ct[o].pair_index = i;
3255                 tcg_debug_assert(ct_str[1] == '\0');
3256                 continue;
3257 
3258             case 'm': /* minus */
3259                 /* Allocate to the register before the previous. */
3260                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3261                 o = i - 1;
3262                 tcg_debug_assert(!args_ct[o].pair);
3263                 tcg_debug_assert(!args_ct[o].ct);
3264                 args_ct[i] = (TCGArgConstraint){
3265                     .pair = 1,
3266                     .pair_index = o,
3267                     .regs = args_ct[o].regs >> 1,
3268                     .newreg = args_ct[o].newreg,
3269                 };
3270                 args_ct[o].pair = 2;
3271                 args_ct[o].pair_index = i;
3272                 tcg_debug_assert(ct_str[1] == '\0');
3273                 continue;
3274             }
3275 
3276             do {
3277                 switch (*ct_str) {
3278                 case 'i':
3279                     args_ct[i].ct |= TCG_CT_CONST;
3280                     break;
3281 #ifdef TCG_REG_ZERO
3282                 case 'z':
3283                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3284                     break;
3285 #endif
3286 
3287                 /* Include all of the target-specific constraints. */
3288 
3289 #undef CONST
3290 #define CONST(CASE, MASK) \
3291     case CASE: args_ct[i].ct |= MASK; break;
3292 #define REGS(CASE, MASK) \
3293     case CASE: args_ct[i].regs |= MASK; break;
3294 
3295 #include "tcg-target-con-str.h"
3296 
3297 #undef REGS
3298 #undef CONST
3299                 default:
3300                 case '0' ... '9':
3301                 case '&':
3302                 case 'p':
3303                 case 'm':
3304                     /* Typo in TCGConstraintSet constraint. */
3305                     g_assert_not_reached();
3306                 }
3307             } while (*++ct_str != '\0');
3308         }
3309 
3310         /*
3311          * Fix up output pairs that are aliased with inputs.
3312          * When we created the alias, we copied pair from the output.
3313          * There are three cases:
3314          *    (1a) Pairs of inputs alias pairs of outputs.
3315          *    (1b) One input aliases the first of a pair of outputs.
3316          *    (2)  One input aliases the second of a pair of outputs.
3317          *
3318          * Case 1a is handled by making sure that the pair_index'es are
3319          * properly updated so that they appear the same as a pair of inputs.
3320          *
3321          * Case 1b is handled by setting the pair_index of the input to
3322          * itself, simply so it doesn't point to an unrelated argument.
3323          * Since we don't encounter the "second" during the input allocation
3324          * phase, nothing happens with the second half of the input pair.
3325          *
3326          * Case 2 is handled by setting the second input to pair=3, the
3327          * first output to pair=3, and the pair_index'es to match.
3328          */
3329         if (saw_alias_pair) {
3330             for (int i = nb_oargs; i < nb_args; i++) {
3331                 int o, o2, i2;
3332 
3333                 /*
3334                  * Since [0-9pm] must be alone in the constraint string,
3335                  * the only way they can both be set is if the pair comes
3336                  * from the output alias.
3337                  */
3338                 if (!args_ct[i].ialias) {
3339                     continue;
3340                 }
3341                 switch (args_ct[i].pair) {
3342                 case 0:
3343                     break;
3344                 case 1:
3345                     o = args_ct[i].alias_index;
3346                     o2 = args_ct[o].pair_index;
3347                     tcg_debug_assert(args_ct[o].pair == 1);
3348                     tcg_debug_assert(args_ct[o2].pair == 2);
3349                     if (args_ct[o2].oalias) {
3350                         /* Case 1a */
3351                         i2 = args_ct[o2].alias_index;
3352                         tcg_debug_assert(args_ct[i2].pair == 2);
3353                         args_ct[i2].pair_index = i;
3354                         args_ct[i].pair_index = i2;
3355                     } else {
3356                         /* Case 1b */
3357                         args_ct[i].pair_index = i;
3358                     }
3359                     break;
3360                 case 2:
3361                     o = args_ct[i].alias_index;
3362                     o2 = args_ct[o].pair_index;
3363                     tcg_debug_assert(args_ct[o].pair == 2);
3364                     tcg_debug_assert(args_ct[o2].pair == 1);
3365                     if (args_ct[o2].oalias) {
3366                         /* Case 1a */
3367                         i2 = args_ct[o2].alias_index;
3368                         tcg_debug_assert(args_ct[i2].pair == 1);
3369                         args_ct[i2].pair_index = i;
3370                         args_ct[i].pair_index = i2;
3371                     } else {
3372                         /* Case 2 */
3373                         args_ct[i].pair = 3;
3374                         args_ct[o2].pair = 3;
3375                         args_ct[i].pair_index = o2;
3376                         args_ct[o2].pair_index = i;
3377                     }
3378                     break;
3379                 default:
3380                     g_assert_not_reached();
3381                 }
3382             }
3383         }
3384 
3385         /* sort the constraints (XXX: this is just an heuristic) */
3386         sort_constraints(args_ct, 0, nb_oargs);
3387         sort_constraints(args_ct, nb_oargs, nb_iargs);
3388     }
3389 }
3390 
3391 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3392 {
3393     TCGOpcode opc = op->opc;
3394     TCGType type = TCGOP_TYPE(op);
3395     unsigned flags = TCGOP_FLAGS(op);
3396     const TCGOpDef *def = &tcg_op_defs[opc];
3397     const TCGOutOp *outop = all_outop[opc];
3398     TCGConstraintSetIndex con_set;
3399 
3400     if (def->flags & TCG_OPF_NOT_PRESENT) {
3401         return empty_cts;
3402     }
3403 
3404     if (outop) {
3405         con_set = outop->static_constraint;
3406         if (con_set == C_Dynamic) {
3407             con_set = outop->dynamic_constraint(type, flags);
3408         }
3409     } else {
3410         con_set = tcg_target_op_def(opc, type, flags);
3411     }
3412     tcg_debug_assert(con_set >= 0);
3413     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3414 
3415     /* The constraint arguments must match TCGOpcode arguments. */
3416     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3417     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3418 
3419     return all_cts[con_set];
3420 }
3421 
3422 static void remove_label_use(TCGOp *op, int idx)
3423 {
3424     TCGLabel *label = arg_label(op->args[idx]);
3425     TCGLabelUse *use;
3426 
3427     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3428         if (use->op == op) {
3429             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3430             return;
3431         }
3432     }
3433     g_assert_not_reached();
3434 }
3435 
3436 void tcg_op_remove(TCGContext *s, TCGOp *op)
3437 {
3438     switch (op->opc) {
3439     case INDEX_op_br:
3440         remove_label_use(op, 0);
3441         break;
3442     case INDEX_op_brcond:
3443         remove_label_use(op, 3);
3444         break;
3445     case INDEX_op_brcond2_i32:
3446         remove_label_use(op, 5);
3447         break;
3448     default:
3449         break;
3450     }
3451 
3452     QTAILQ_REMOVE(&s->ops, op, link);
3453     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3454     s->nb_ops--;
3455 }
3456 
3457 void tcg_remove_ops_after(TCGOp *op)
3458 {
3459     TCGContext *s = tcg_ctx;
3460 
3461     while (true) {
3462         TCGOp *last = tcg_last_op();
3463         if (last == op) {
3464             return;
3465         }
3466         tcg_op_remove(s, last);
3467     }
3468 }
3469 
3470 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3471 {
3472     TCGContext *s = tcg_ctx;
3473     TCGOp *op = NULL;
3474 
3475     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3476         QTAILQ_FOREACH(op, &s->free_ops, link) {
3477             if (nargs <= op->nargs) {
3478                 QTAILQ_REMOVE(&s->free_ops, op, link);
3479                 nargs = op->nargs;
3480                 goto found;
3481             }
3482         }
3483     }
3484 
3485     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3486     nargs = MAX(4, nargs);
3487     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3488 
3489  found:
3490     memset(op, 0, offsetof(TCGOp, link));
3491     op->opc = opc;
3492     op->nargs = nargs;
3493 
3494     /* Check for bitfield overflow. */
3495     tcg_debug_assert(op->nargs == nargs);
3496 
3497     s->nb_ops++;
3498     return op;
3499 }
3500 
3501 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3502 {
3503     TCGOp *op = tcg_op_alloc(opc, nargs);
3504 
3505     if (tcg_ctx->emit_before_op) {
3506         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3507     } else {
3508         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3509     }
3510     return op;
3511 }
3512 
3513 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3514                             TCGOpcode opc, TCGType type, unsigned nargs)
3515 {
3516     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3517 
3518     TCGOP_TYPE(new_op) = type;
3519     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3520     return new_op;
3521 }
3522 
3523 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3524                            TCGOpcode opc, TCGType type, unsigned nargs)
3525 {
3526     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3527 
3528     TCGOP_TYPE(new_op) = type;
3529     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3530     return new_op;
3531 }
3532 
3533 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3534 {
3535     TCGLabelUse *u;
3536 
3537     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3538         TCGOp *op = u->op;
3539         switch (op->opc) {
3540         case INDEX_op_br:
3541             op->args[0] = label_arg(to);
3542             break;
3543         case INDEX_op_brcond:
3544             op->args[3] = label_arg(to);
3545             break;
3546         case INDEX_op_brcond2_i32:
3547             op->args[5] = label_arg(to);
3548             break;
3549         default:
3550             g_assert_not_reached();
3551         }
3552     }
3553 
3554     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3555 }
3556 
3557 /* Reachable analysis : remove unreachable code.  */
3558 static void __attribute__((noinline))
3559 reachable_code_pass(TCGContext *s)
3560 {
3561     TCGOp *op, *op_next, *op_prev;
3562     bool dead = false;
3563 
3564     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3565         bool remove = dead;
3566         TCGLabel *label;
3567 
3568         switch (op->opc) {
3569         case INDEX_op_set_label:
3570             label = arg_label(op->args[0]);
3571 
3572             /*
3573              * Note that the first op in the TB is always a load,
3574              * so there is always something before a label.
3575              */
3576             op_prev = QTAILQ_PREV(op, link);
3577 
3578             /*
3579              * If we find two sequential labels, move all branches to
3580              * reference the second label and remove the first label.
3581              * Do this before branch to next optimization, so that the
3582              * middle label is out of the way.
3583              */
3584             if (op_prev->opc == INDEX_op_set_label) {
3585                 move_label_uses(label, arg_label(op_prev->args[0]));
3586                 tcg_op_remove(s, op_prev);
3587                 op_prev = QTAILQ_PREV(op, link);
3588             }
3589 
3590             /*
3591              * Optimization can fold conditional branches to unconditional.
3592              * If we find a label which is preceded by an unconditional
3593              * branch to next, remove the branch.  We couldn't do this when
3594              * processing the branch because any dead code between the branch
3595              * and label had not yet been removed.
3596              */
3597             if (op_prev->opc == INDEX_op_br &&
3598                 label == arg_label(op_prev->args[0])) {
3599                 tcg_op_remove(s, op_prev);
3600                 /* Fall through means insns become live again.  */
3601                 dead = false;
3602             }
3603 
3604             if (QSIMPLEQ_EMPTY(&label->branches)) {
3605                 /*
3606                  * While there is an occasional backward branch, virtually
3607                  * all branches generated by the translators are forward.
3608                  * Which means that generally we will have already removed
3609                  * all references to the label that will be, and there is
3610                  * little to be gained by iterating.
3611                  */
3612                 remove = true;
3613             } else {
3614                 /* Once we see a label, insns become live again.  */
3615                 dead = false;
3616                 remove = false;
3617             }
3618             break;
3619 
3620         case INDEX_op_br:
3621         case INDEX_op_exit_tb:
3622         case INDEX_op_goto_ptr:
3623             /* Unconditional branches; everything following is dead.  */
3624             dead = true;
3625             break;
3626 
3627         case INDEX_op_call:
3628             /* Notice noreturn helper calls, raising exceptions.  */
3629             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3630                 dead = true;
3631             }
3632             break;
3633 
3634         case INDEX_op_insn_start:
3635             /* Never remove -- we need to keep these for unwind.  */
3636             remove = false;
3637             break;
3638 
3639         default:
3640             break;
3641         }
3642 
3643         if (remove) {
3644             tcg_op_remove(s, op);
3645         }
3646     }
3647 }
3648 
3649 #define TS_DEAD  1
3650 #define TS_MEM   2
3651 
3652 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3653 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3654 
3655 /* For liveness_pass_1, the register preferences for a given temp.  */
3656 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3657 {
3658     return ts->state_ptr;
3659 }
3660 
3661 /* For liveness_pass_1, reset the preferences for a given temp to the
3662  * maximal regset for its type.
3663  */
3664 static inline void la_reset_pref(TCGTemp *ts)
3665 {
3666     *la_temp_pref(ts)
3667         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3668 }
3669 
3670 /* liveness analysis: end of function: all temps are dead, and globals
3671    should be in memory. */
3672 static void la_func_end(TCGContext *s, int ng, int nt)
3673 {
3674     int i;
3675 
3676     for (i = 0; i < ng; ++i) {
3677         s->temps[i].state = TS_DEAD | TS_MEM;
3678         la_reset_pref(&s->temps[i]);
3679     }
3680     for (i = ng; i < nt; ++i) {
3681         s->temps[i].state = TS_DEAD;
3682         la_reset_pref(&s->temps[i]);
3683     }
3684 }
3685 
3686 /* liveness analysis: end of basic block: all temps are dead, globals
3687    and local temps should be in memory. */
3688 static void la_bb_end(TCGContext *s, int ng, int nt)
3689 {
3690     int i;
3691 
3692     for (i = 0; i < nt; ++i) {
3693         TCGTemp *ts = &s->temps[i];
3694         int state;
3695 
3696         switch (ts->kind) {
3697         case TEMP_FIXED:
3698         case TEMP_GLOBAL:
3699         case TEMP_TB:
3700             state = TS_DEAD | TS_MEM;
3701             break;
3702         case TEMP_EBB:
3703         case TEMP_CONST:
3704             state = TS_DEAD;
3705             break;
3706         default:
3707             g_assert_not_reached();
3708         }
3709         ts->state = state;
3710         la_reset_pref(ts);
3711     }
3712 }
3713 
3714 /* liveness analysis: sync globals back to memory.  */
3715 static void la_global_sync(TCGContext *s, int ng)
3716 {
3717     int i;
3718 
3719     for (i = 0; i < ng; ++i) {
3720         int state = s->temps[i].state;
3721         s->temps[i].state = state | TS_MEM;
3722         if (state == TS_DEAD) {
3723             /* If the global was previously dead, reset prefs.  */
3724             la_reset_pref(&s->temps[i]);
3725         }
3726     }
3727 }
3728 
3729 /*
3730  * liveness analysis: conditional branch: all temps are dead unless
3731  * explicitly live-across-conditional-branch, globals and local temps
3732  * should be synced.
3733  */
3734 static void la_bb_sync(TCGContext *s, int ng, int nt)
3735 {
3736     la_global_sync(s, ng);
3737 
3738     for (int i = ng; i < nt; ++i) {
3739         TCGTemp *ts = &s->temps[i];
3740         int state;
3741 
3742         switch (ts->kind) {
3743         case TEMP_TB:
3744             state = ts->state;
3745             ts->state = state | TS_MEM;
3746             if (state != TS_DEAD) {
3747                 continue;
3748             }
3749             break;
3750         case TEMP_EBB:
3751         case TEMP_CONST:
3752             continue;
3753         default:
3754             g_assert_not_reached();
3755         }
3756         la_reset_pref(&s->temps[i]);
3757     }
3758 }
3759 
3760 /* liveness analysis: sync globals back to memory and kill.  */
3761 static void la_global_kill(TCGContext *s, int ng)
3762 {
3763     int i;
3764 
3765     for (i = 0; i < ng; i++) {
3766         s->temps[i].state = TS_DEAD | TS_MEM;
3767         la_reset_pref(&s->temps[i]);
3768     }
3769 }
3770 
3771 /* liveness analysis: note live globals crossing calls.  */
3772 static void la_cross_call(TCGContext *s, int nt)
3773 {
3774     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3775     int i;
3776 
3777     for (i = 0; i < nt; i++) {
3778         TCGTemp *ts = &s->temps[i];
3779         if (!(ts->state & TS_DEAD)) {
3780             TCGRegSet *pset = la_temp_pref(ts);
3781             TCGRegSet set = *pset;
3782 
3783             set &= mask;
3784             /* If the combination is not possible, restart.  */
3785             if (set == 0) {
3786                 set = tcg_target_available_regs[ts->type] & mask;
3787             }
3788             *pset = set;
3789         }
3790     }
3791 }
3792 
3793 /*
3794  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3795  * to TEMP_EBB, if possible.
3796  */
3797 static void __attribute__((noinline))
3798 liveness_pass_0(TCGContext *s)
3799 {
3800     void * const multiple_ebb = (void *)(uintptr_t)-1;
3801     int nb_temps = s->nb_temps;
3802     TCGOp *op, *ebb;
3803 
3804     for (int i = s->nb_globals; i < nb_temps; ++i) {
3805         s->temps[i].state_ptr = NULL;
3806     }
3807 
3808     /*
3809      * Represent each EBB by the op at which it begins.  In the case of
3810      * the first EBB, this is the first op, otherwise it is a label.
3811      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3812      * within a single EBB, else MULTIPLE_EBB.
3813      */
3814     ebb = QTAILQ_FIRST(&s->ops);
3815     QTAILQ_FOREACH(op, &s->ops, link) {
3816         const TCGOpDef *def;
3817         int nb_oargs, nb_iargs;
3818 
3819         switch (op->opc) {
3820         case INDEX_op_set_label:
3821             ebb = op;
3822             continue;
3823         case INDEX_op_discard:
3824             continue;
3825         case INDEX_op_call:
3826             nb_oargs = TCGOP_CALLO(op);
3827             nb_iargs = TCGOP_CALLI(op);
3828             break;
3829         default:
3830             def = &tcg_op_defs[op->opc];
3831             nb_oargs = def->nb_oargs;
3832             nb_iargs = def->nb_iargs;
3833             break;
3834         }
3835 
3836         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3837             TCGTemp *ts = arg_temp(op->args[i]);
3838 
3839             if (ts->kind != TEMP_TB) {
3840                 continue;
3841             }
3842             if (ts->state_ptr == NULL) {
3843                 ts->state_ptr = ebb;
3844             } else if (ts->state_ptr != ebb) {
3845                 ts->state_ptr = multiple_ebb;
3846             }
3847         }
3848     }
3849 
3850     /*
3851      * For TEMP_TB that turned out not to be used beyond one EBB,
3852      * reduce the liveness to TEMP_EBB.
3853      */
3854     for (int i = s->nb_globals; i < nb_temps; ++i) {
3855         TCGTemp *ts = &s->temps[i];
3856         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3857             ts->kind = TEMP_EBB;
3858         }
3859     }
3860 }
3861 
3862 /* Liveness analysis : update the opc_arg_life array to tell if a
3863    given input arguments is dead. Instructions updating dead
3864    temporaries are removed. */
3865 static void __attribute__((noinline))
3866 liveness_pass_1(TCGContext *s)
3867 {
3868     int nb_globals = s->nb_globals;
3869     int nb_temps = s->nb_temps;
3870     TCGOp *op, *op_prev;
3871     TCGRegSet *prefs;
3872     int i;
3873 
3874     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3875     for (i = 0; i < nb_temps; ++i) {
3876         s->temps[i].state_ptr = prefs + i;
3877     }
3878 
3879     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3880     la_func_end(s, nb_globals, nb_temps);
3881 
3882     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3883         int nb_iargs, nb_oargs;
3884         TCGOpcode opc_new, opc_new2;
3885         TCGLifeData arg_life = 0;
3886         TCGTemp *ts;
3887         TCGOpcode opc = op->opc;
3888         const TCGOpDef *def = &tcg_op_defs[opc];
3889         const TCGArgConstraint *args_ct;
3890 
3891         switch (opc) {
3892         case INDEX_op_call:
3893             {
3894                 const TCGHelperInfo *info = tcg_call_info(op);
3895                 int call_flags = tcg_call_flags(op);
3896 
3897                 nb_oargs = TCGOP_CALLO(op);
3898                 nb_iargs = TCGOP_CALLI(op);
3899 
3900                 /* pure functions can be removed if their result is unused */
3901                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3902                     for (i = 0; i < nb_oargs; i++) {
3903                         ts = arg_temp(op->args[i]);
3904                         if (ts->state != TS_DEAD) {
3905                             goto do_not_remove_call;
3906                         }
3907                     }
3908                     goto do_remove;
3909                 }
3910             do_not_remove_call:
3911 
3912                 /* Output args are dead.  */
3913                 for (i = 0; i < nb_oargs; i++) {
3914                     ts = arg_temp(op->args[i]);
3915                     if (ts->state & TS_DEAD) {
3916                         arg_life |= DEAD_ARG << i;
3917                     }
3918                     if (ts->state & TS_MEM) {
3919                         arg_life |= SYNC_ARG << i;
3920                     }
3921                     ts->state = TS_DEAD;
3922                     la_reset_pref(ts);
3923                 }
3924 
3925                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3926                 memset(op->output_pref, 0, sizeof(op->output_pref));
3927 
3928                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3929                                     TCG_CALL_NO_READ_GLOBALS))) {
3930                     la_global_kill(s, nb_globals);
3931                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3932                     la_global_sync(s, nb_globals);
3933                 }
3934 
3935                 /* Record arguments that die in this helper.  */
3936                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3937                     ts = arg_temp(op->args[i]);
3938                     if (ts->state & TS_DEAD) {
3939                         arg_life |= DEAD_ARG << i;
3940                     }
3941                 }
3942 
3943                 /* For all live registers, remove call-clobbered prefs.  */
3944                 la_cross_call(s, nb_temps);
3945 
3946                 /*
3947                  * Input arguments are live for preceding opcodes.
3948                  *
3949                  * For those arguments that die, and will be allocated in
3950                  * registers, clear the register set for that arg, to be
3951                  * filled in below.  For args that will be on the stack,
3952                  * reset to any available reg.  Process arguments in reverse
3953                  * order so that if a temp is used more than once, the stack
3954                  * reset to max happens before the register reset to 0.
3955                  */
3956                 for (i = nb_iargs - 1; i >= 0; i--) {
3957                     const TCGCallArgumentLoc *loc = &info->in[i];
3958                     ts = arg_temp(op->args[nb_oargs + i]);
3959 
3960                     if (ts->state & TS_DEAD) {
3961                         switch (loc->kind) {
3962                         case TCG_CALL_ARG_NORMAL:
3963                         case TCG_CALL_ARG_EXTEND_U:
3964                         case TCG_CALL_ARG_EXTEND_S:
3965                             if (arg_slot_reg_p(loc->arg_slot)) {
3966                                 *la_temp_pref(ts) = 0;
3967                                 break;
3968                             }
3969                             /* fall through */
3970                         default:
3971                             *la_temp_pref(ts) =
3972                                 tcg_target_available_regs[ts->type];
3973                             break;
3974                         }
3975                         ts->state &= ~TS_DEAD;
3976                     }
3977                 }
3978 
3979                 /*
3980                  * For each input argument, add its input register to prefs.
3981                  * If a temp is used once, this produces a single set bit;
3982                  * if a temp is used multiple times, this produces a set.
3983                  */
3984                 for (i = 0; i < nb_iargs; i++) {
3985                     const TCGCallArgumentLoc *loc = &info->in[i];
3986                     ts = arg_temp(op->args[nb_oargs + i]);
3987 
3988                     switch (loc->kind) {
3989                     case TCG_CALL_ARG_NORMAL:
3990                     case TCG_CALL_ARG_EXTEND_U:
3991                     case TCG_CALL_ARG_EXTEND_S:
3992                         if (arg_slot_reg_p(loc->arg_slot)) {
3993                             tcg_regset_set_reg(*la_temp_pref(ts),
3994                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3995                         }
3996                         break;
3997                     default:
3998                         break;
3999                     }
4000                 }
4001             }
4002             break;
4003         case INDEX_op_insn_start:
4004             break;
4005         case INDEX_op_discard:
4006             /* mark the temporary as dead */
4007             ts = arg_temp(op->args[0]);
4008             ts->state = TS_DEAD;
4009             la_reset_pref(ts);
4010             break;
4011 
4012         case INDEX_op_add2_i32:
4013         case INDEX_op_add2_i64:
4014             opc_new = INDEX_op_add;
4015             goto do_addsub2;
4016         case INDEX_op_sub2_i32:
4017         case INDEX_op_sub2_i64:
4018             opc_new = INDEX_op_sub;
4019         do_addsub2:
4020             nb_iargs = 4;
4021             nb_oargs = 2;
4022             /* Test if the high part of the operation is dead, but not
4023                the low part.  The result can be optimized to a simple
4024                add or sub.  This happens often for x86_64 guest when the
4025                cpu mode is set to 32 bit.  */
4026             if (arg_temp(op->args[1])->state == TS_DEAD) {
4027                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4028                     goto do_remove;
4029                 }
4030                 /* Replace the opcode and adjust the args in place,
4031                    leaving 3 unused args at the end.  */
4032                 op->opc = opc = opc_new;
4033                 op->args[1] = op->args[2];
4034                 op->args[2] = op->args[4];
4035                 /* Fall through and mark the single-word operation live.  */
4036                 nb_iargs = 2;
4037                 nb_oargs = 1;
4038             }
4039             goto do_not_remove;
4040 
4041         case INDEX_op_muls2:
4042             opc_new = INDEX_op_mul;
4043             opc_new2 = INDEX_op_mulsh;
4044             goto do_mul2;
4045         case INDEX_op_mulu2:
4046             opc_new = INDEX_op_mul;
4047             opc_new2 = INDEX_op_muluh;
4048         do_mul2:
4049             nb_iargs = 2;
4050             nb_oargs = 2;
4051             if (arg_temp(op->args[1])->state == TS_DEAD) {
4052                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4053                     /* Both parts of the operation are dead.  */
4054                     goto do_remove;
4055                 }
4056                 /* The high part of the operation is dead; generate the low. */
4057                 op->opc = opc = opc_new;
4058                 op->args[1] = op->args[2];
4059                 op->args[2] = op->args[3];
4060             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4061                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4062                 /* The low part of the operation is dead; generate the high. */
4063                 op->opc = opc = opc_new2;
4064                 op->args[0] = op->args[1];
4065                 op->args[1] = op->args[2];
4066                 op->args[2] = op->args[3];
4067             } else {
4068                 goto do_not_remove;
4069             }
4070             /* Mark the single-word operation live.  */
4071             nb_oargs = 1;
4072             goto do_not_remove;
4073 
4074         default:
4075             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4076             nb_iargs = def->nb_iargs;
4077             nb_oargs = def->nb_oargs;
4078 
4079             /* Test if the operation can be removed because all
4080                its outputs are dead. We assume that nb_oargs == 0
4081                implies side effects */
4082             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4083                 for (i = 0; i < nb_oargs; i++) {
4084                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4085                         goto do_not_remove;
4086                     }
4087                 }
4088                 goto do_remove;
4089             }
4090             goto do_not_remove;
4091 
4092         do_remove:
4093             tcg_op_remove(s, op);
4094             break;
4095 
4096         do_not_remove:
4097             for (i = 0; i < nb_oargs; i++) {
4098                 ts = arg_temp(op->args[i]);
4099 
4100                 /* Remember the preference of the uses that followed.  */
4101                 if (i < ARRAY_SIZE(op->output_pref)) {
4102                     op->output_pref[i] = *la_temp_pref(ts);
4103                 }
4104 
4105                 /* Output args are dead.  */
4106                 if (ts->state & TS_DEAD) {
4107                     arg_life |= DEAD_ARG << i;
4108                 }
4109                 if (ts->state & TS_MEM) {
4110                     arg_life |= SYNC_ARG << i;
4111                 }
4112                 ts->state = TS_DEAD;
4113                 la_reset_pref(ts);
4114             }
4115 
4116             /* If end of basic block, update.  */
4117             if (def->flags & TCG_OPF_BB_EXIT) {
4118                 la_func_end(s, nb_globals, nb_temps);
4119             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4120                 la_bb_sync(s, nb_globals, nb_temps);
4121             } else if (def->flags & TCG_OPF_BB_END) {
4122                 la_bb_end(s, nb_globals, nb_temps);
4123             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4124                 la_global_sync(s, nb_globals);
4125                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4126                     la_cross_call(s, nb_temps);
4127                 }
4128             }
4129 
4130             /* Record arguments that die in this opcode.  */
4131             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4132                 ts = arg_temp(op->args[i]);
4133                 if (ts->state & TS_DEAD) {
4134                     arg_life |= DEAD_ARG << i;
4135                 }
4136             }
4137 
4138             /* Input arguments are live for preceding opcodes.  */
4139             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4140                 ts = arg_temp(op->args[i]);
4141                 if (ts->state & TS_DEAD) {
4142                     /* For operands that were dead, initially allow
4143                        all regs for the type.  */
4144                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4145                     ts->state &= ~TS_DEAD;
4146                 }
4147             }
4148 
4149             /* Incorporate constraints for this operand.  */
4150             switch (opc) {
4151             case INDEX_op_mov:
4152                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4153                    have proper constraints.  That said, special case
4154                    moves to propagate preferences backward.  */
4155                 if (IS_DEAD_ARG(1)) {
4156                     *la_temp_pref(arg_temp(op->args[0]))
4157                         = *la_temp_pref(arg_temp(op->args[1]));
4158                 }
4159                 break;
4160 
4161             default:
4162                 args_ct = opcode_args_ct(op);
4163                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4164                     const TCGArgConstraint *ct = &args_ct[i];
4165                     TCGRegSet set, *pset;
4166 
4167                     ts = arg_temp(op->args[i]);
4168                     pset = la_temp_pref(ts);
4169                     set = *pset;
4170 
4171                     set &= ct->regs;
4172                     if (ct->ialias) {
4173                         set &= output_pref(op, ct->alias_index);
4174                     }
4175                     /* If the combination is not possible, restart.  */
4176                     if (set == 0) {
4177                         set = ct->regs;
4178                     }
4179                     *pset = set;
4180                 }
4181                 break;
4182             }
4183             break;
4184         }
4185         op->life = arg_life;
4186     }
4187 }
4188 
4189 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4190 static bool __attribute__((noinline))
4191 liveness_pass_2(TCGContext *s)
4192 {
4193     int nb_globals = s->nb_globals;
4194     int nb_temps, i;
4195     bool changes = false;
4196     TCGOp *op, *op_next;
4197 
4198     /* Create a temporary for each indirect global.  */
4199     for (i = 0; i < nb_globals; ++i) {
4200         TCGTemp *its = &s->temps[i];
4201         if (its->indirect_reg) {
4202             TCGTemp *dts = tcg_temp_alloc(s);
4203             dts->type = its->type;
4204             dts->base_type = its->base_type;
4205             dts->temp_subindex = its->temp_subindex;
4206             dts->kind = TEMP_EBB;
4207             its->state_ptr = dts;
4208         } else {
4209             its->state_ptr = NULL;
4210         }
4211         /* All globals begin dead.  */
4212         its->state = TS_DEAD;
4213     }
4214     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4215         TCGTemp *its = &s->temps[i];
4216         its->state_ptr = NULL;
4217         its->state = TS_DEAD;
4218     }
4219 
4220     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4221         TCGOpcode opc = op->opc;
4222         const TCGOpDef *def = &tcg_op_defs[opc];
4223         TCGLifeData arg_life = op->life;
4224         int nb_iargs, nb_oargs, call_flags;
4225         TCGTemp *arg_ts, *dir_ts;
4226 
4227         if (opc == INDEX_op_call) {
4228             nb_oargs = TCGOP_CALLO(op);
4229             nb_iargs = TCGOP_CALLI(op);
4230             call_flags = tcg_call_flags(op);
4231         } else {
4232             nb_iargs = def->nb_iargs;
4233             nb_oargs = def->nb_oargs;
4234 
4235             /* Set flags similar to how calls require.  */
4236             if (def->flags & TCG_OPF_COND_BRANCH) {
4237                 /* Like reading globals: sync_globals */
4238                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4239             } else if (def->flags & TCG_OPF_BB_END) {
4240                 /* Like writing globals: save_globals */
4241                 call_flags = 0;
4242             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4243                 /* Like reading globals: sync_globals */
4244                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4245             } else {
4246                 /* No effect on globals.  */
4247                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4248                               TCG_CALL_NO_WRITE_GLOBALS);
4249             }
4250         }
4251 
4252         /* Make sure that input arguments are available.  */
4253         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4254             arg_ts = arg_temp(op->args[i]);
4255             dir_ts = arg_ts->state_ptr;
4256             if (dir_ts && arg_ts->state == TS_DEAD) {
4257                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4258                                   ? INDEX_op_ld_i32
4259                                   : INDEX_op_ld_i64);
4260                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4261                                                   arg_ts->type, 3);
4262 
4263                 lop->args[0] = temp_arg(dir_ts);
4264                 lop->args[1] = temp_arg(arg_ts->mem_base);
4265                 lop->args[2] = arg_ts->mem_offset;
4266 
4267                 /* Loaded, but synced with memory.  */
4268                 arg_ts->state = TS_MEM;
4269             }
4270         }
4271 
4272         /* Perform input replacement, and mark inputs that became dead.
4273            No action is required except keeping temp_state up to date
4274            so that we reload when needed.  */
4275         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4276             arg_ts = arg_temp(op->args[i]);
4277             dir_ts = arg_ts->state_ptr;
4278             if (dir_ts) {
4279                 op->args[i] = temp_arg(dir_ts);
4280                 changes = true;
4281                 if (IS_DEAD_ARG(i)) {
4282                     arg_ts->state = TS_DEAD;
4283                 }
4284             }
4285         }
4286 
4287         /* Liveness analysis should ensure that the following are
4288            all correct, for call sites and basic block end points.  */
4289         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4290             /* Nothing to do */
4291         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4292             for (i = 0; i < nb_globals; ++i) {
4293                 /* Liveness should see that globals are synced back,
4294                    that is, either TS_DEAD or TS_MEM.  */
4295                 arg_ts = &s->temps[i];
4296                 tcg_debug_assert(arg_ts->state_ptr == 0
4297                                  || arg_ts->state != 0);
4298             }
4299         } else {
4300             for (i = 0; i < nb_globals; ++i) {
4301                 /* Liveness should see that globals are saved back,
4302                    that is, TS_DEAD, waiting to be reloaded.  */
4303                 arg_ts = &s->temps[i];
4304                 tcg_debug_assert(arg_ts->state_ptr == 0
4305                                  || arg_ts->state == TS_DEAD);
4306             }
4307         }
4308 
4309         /* Outputs become available.  */
4310         if (opc == INDEX_op_mov) {
4311             arg_ts = arg_temp(op->args[0]);
4312             dir_ts = arg_ts->state_ptr;
4313             if (dir_ts) {
4314                 op->args[0] = temp_arg(dir_ts);
4315                 changes = true;
4316 
4317                 /* The output is now live and modified.  */
4318                 arg_ts->state = 0;
4319 
4320                 if (NEED_SYNC_ARG(0)) {
4321                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4322                                       ? INDEX_op_st_i32
4323                                       : INDEX_op_st_i64);
4324                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4325                                                      arg_ts->type, 3);
4326                     TCGTemp *out_ts = dir_ts;
4327 
4328                     if (IS_DEAD_ARG(0)) {
4329                         out_ts = arg_temp(op->args[1]);
4330                         arg_ts->state = TS_DEAD;
4331                         tcg_op_remove(s, op);
4332                     } else {
4333                         arg_ts->state = TS_MEM;
4334                     }
4335 
4336                     sop->args[0] = temp_arg(out_ts);
4337                     sop->args[1] = temp_arg(arg_ts->mem_base);
4338                     sop->args[2] = arg_ts->mem_offset;
4339                 } else {
4340                     tcg_debug_assert(!IS_DEAD_ARG(0));
4341                 }
4342             }
4343         } else {
4344             for (i = 0; i < nb_oargs; i++) {
4345                 arg_ts = arg_temp(op->args[i]);
4346                 dir_ts = arg_ts->state_ptr;
4347                 if (!dir_ts) {
4348                     continue;
4349                 }
4350                 op->args[i] = temp_arg(dir_ts);
4351                 changes = true;
4352 
4353                 /* The output is now live and modified.  */
4354                 arg_ts->state = 0;
4355 
4356                 /* Sync outputs upon their last write.  */
4357                 if (NEED_SYNC_ARG(i)) {
4358                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4359                                       ? INDEX_op_st_i32
4360                                       : INDEX_op_st_i64);
4361                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4362                                                      arg_ts->type, 3);
4363 
4364                     sop->args[0] = temp_arg(dir_ts);
4365                     sop->args[1] = temp_arg(arg_ts->mem_base);
4366                     sop->args[2] = arg_ts->mem_offset;
4367 
4368                     arg_ts->state = TS_MEM;
4369                 }
4370                 /* Drop outputs that are dead.  */
4371                 if (IS_DEAD_ARG(i)) {
4372                     arg_ts->state = TS_DEAD;
4373                 }
4374             }
4375         }
4376     }
4377 
4378     return changes;
4379 }
4380 
4381 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4382 {
4383     intptr_t off;
4384     int size, align;
4385 
4386     /* When allocating an object, look at the full type. */
4387     size = tcg_type_size(ts->base_type);
4388     switch (ts->base_type) {
4389     case TCG_TYPE_I32:
4390         align = 4;
4391         break;
4392     case TCG_TYPE_I64:
4393     case TCG_TYPE_V64:
4394         align = 8;
4395         break;
4396     case TCG_TYPE_I128:
4397     case TCG_TYPE_V128:
4398     case TCG_TYPE_V256:
4399         /*
4400          * Note that we do not require aligned storage for V256,
4401          * and that we provide alignment for I128 to match V128,
4402          * even if that's above what the host ABI requires.
4403          */
4404         align = 16;
4405         break;
4406     default:
4407         g_assert_not_reached();
4408     }
4409 
4410     /*
4411      * Assume the stack is sufficiently aligned.
4412      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4413      * and do not require 16 byte vector alignment.  This seems slightly
4414      * easier than fully parameterizing the above switch statement.
4415      */
4416     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4417     off = ROUND_UP(s->current_frame_offset, align);
4418 
4419     /* If we've exhausted the stack frame, restart with a smaller TB. */
4420     if (off + size > s->frame_end) {
4421         tcg_raise_tb_overflow(s);
4422     }
4423     s->current_frame_offset = off + size;
4424 #if defined(__sparc__)
4425     off += TCG_TARGET_STACK_BIAS;
4426 #endif
4427 
4428     /* If the object was subdivided, assign memory to all the parts. */
4429     if (ts->base_type != ts->type) {
4430         int part_size = tcg_type_size(ts->type);
4431         int part_count = size / part_size;
4432 
4433         /*
4434          * Each part is allocated sequentially in tcg_temp_new_internal.
4435          * Jump back to the first part by subtracting the current index.
4436          */
4437         ts -= ts->temp_subindex;
4438         for (int i = 0; i < part_count; ++i) {
4439             ts[i].mem_offset = off + i * part_size;
4440             ts[i].mem_base = s->frame_temp;
4441             ts[i].mem_allocated = 1;
4442         }
4443     } else {
4444         ts->mem_offset = off;
4445         ts->mem_base = s->frame_temp;
4446         ts->mem_allocated = 1;
4447     }
4448 }
4449 
4450 /* Assign @reg to @ts, and update reg_to_temp[]. */
4451 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4452 {
4453     if (ts->val_type == TEMP_VAL_REG) {
4454         TCGReg old = ts->reg;
4455         tcg_debug_assert(s->reg_to_temp[old] == ts);
4456         if (old == reg) {
4457             return;
4458         }
4459         s->reg_to_temp[old] = NULL;
4460     }
4461     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4462     s->reg_to_temp[reg] = ts;
4463     ts->val_type = TEMP_VAL_REG;
4464     ts->reg = reg;
4465 }
4466 
4467 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4468 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4469 {
4470     tcg_debug_assert(type != TEMP_VAL_REG);
4471     if (ts->val_type == TEMP_VAL_REG) {
4472         TCGReg reg = ts->reg;
4473         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4474         s->reg_to_temp[reg] = NULL;
4475     }
4476     ts->val_type = type;
4477 }
4478 
4479 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4480 
4481 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4482    mark it free; otherwise mark it dead.  */
4483 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4484 {
4485     TCGTempVal new_type;
4486 
4487     switch (ts->kind) {
4488     case TEMP_FIXED:
4489         return;
4490     case TEMP_GLOBAL:
4491     case TEMP_TB:
4492         new_type = TEMP_VAL_MEM;
4493         break;
4494     case TEMP_EBB:
4495         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4496         break;
4497     case TEMP_CONST:
4498         new_type = TEMP_VAL_CONST;
4499         break;
4500     default:
4501         g_assert_not_reached();
4502     }
4503     set_temp_val_nonreg(s, ts, new_type);
4504 }
4505 
4506 /* Mark a temporary as dead.  */
4507 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4508 {
4509     temp_free_or_dead(s, ts, 1);
4510 }
4511 
4512 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4513    registers needs to be allocated to store a constant.  If 'free_or_dead'
4514    is non-zero, subsequently release the temporary; if it is positive, the
4515    temp is dead; if it is negative, the temp is free.  */
4516 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4517                       TCGRegSet preferred_regs, int free_or_dead)
4518 {
4519     if (!temp_readonly(ts) && !ts->mem_coherent) {
4520         if (!ts->mem_allocated) {
4521             temp_allocate_frame(s, ts);
4522         }
4523         switch (ts->val_type) {
4524         case TEMP_VAL_CONST:
4525             /* If we're going to free the temp immediately, then we won't
4526                require it later in a register, so attempt to store the
4527                constant to memory directly.  */
4528             if (free_or_dead
4529                 && tcg_out_sti(s, ts->type, ts->val,
4530                                ts->mem_base->reg, ts->mem_offset)) {
4531                 break;
4532             }
4533             temp_load(s, ts, tcg_target_available_regs[ts->type],
4534                       allocated_regs, preferred_regs);
4535             /* fallthrough */
4536 
4537         case TEMP_VAL_REG:
4538             tcg_out_st(s, ts->type, ts->reg,
4539                        ts->mem_base->reg, ts->mem_offset);
4540             break;
4541 
4542         case TEMP_VAL_MEM:
4543             break;
4544 
4545         case TEMP_VAL_DEAD:
4546         default:
4547             g_assert_not_reached();
4548         }
4549         ts->mem_coherent = 1;
4550     }
4551     if (free_or_dead) {
4552         temp_free_or_dead(s, ts, free_or_dead);
4553     }
4554 }
4555 
4556 /* free register 'reg' by spilling the corresponding temporary if necessary */
4557 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4558 {
4559     TCGTemp *ts = s->reg_to_temp[reg];
4560     if (ts != NULL) {
4561         temp_sync(s, ts, allocated_regs, 0, -1);
4562     }
4563 }
4564 
4565 /**
4566  * tcg_reg_alloc:
4567  * @required_regs: Set of registers in which we must allocate.
4568  * @allocated_regs: Set of registers which must be avoided.
4569  * @preferred_regs: Set of registers we should prefer.
4570  * @rev: True if we search the registers in "indirect" order.
4571  *
4572  * The allocated register must be in @required_regs & ~@allocated_regs,
4573  * but if we can put it in @preferred_regs we may save a move later.
4574  */
4575 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4576                             TCGRegSet allocated_regs,
4577                             TCGRegSet preferred_regs, bool rev)
4578 {
4579     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4580     TCGRegSet reg_ct[2];
4581     const int *order;
4582 
4583     reg_ct[1] = required_regs & ~allocated_regs;
4584     tcg_debug_assert(reg_ct[1] != 0);
4585     reg_ct[0] = reg_ct[1] & preferred_regs;
4586 
4587     /* Skip the preferred_regs option if it cannot be satisfied,
4588        or if the preference made no difference.  */
4589     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4590 
4591     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4592 
4593     /* Try free registers, preferences first.  */
4594     for (j = f; j < 2; j++) {
4595         TCGRegSet set = reg_ct[j];
4596 
4597         if (tcg_regset_single(set)) {
4598             /* One register in the set.  */
4599             TCGReg reg = tcg_regset_first(set);
4600             if (s->reg_to_temp[reg] == NULL) {
4601                 return reg;
4602             }
4603         } else {
4604             for (i = 0; i < n; i++) {
4605                 TCGReg reg = order[i];
4606                 if (s->reg_to_temp[reg] == NULL &&
4607                     tcg_regset_test_reg(set, reg)) {
4608                     return reg;
4609                 }
4610             }
4611         }
4612     }
4613 
4614     /* We must spill something.  */
4615     for (j = f; j < 2; j++) {
4616         TCGRegSet set = reg_ct[j];
4617 
4618         if (tcg_regset_single(set)) {
4619             /* One register in the set.  */
4620             TCGReg reg = tcg_regset_first(set);
4621             tcg_reg_free(s, reg, allocated_regs);
4622             return reg;
4623         } else {
4624             for (i = 0; i < n; i++) {
4625                 TCGReg reg = order[i];
4626                 if (tcg_regset_test_reg(set, reg)) {
4627                     tcg_reg_free(s, reg, allocated_regs);
4628                     return reg;
4629                 }
4630             }
4631         }
4632     }
4633 
4634     g_assert_not_reached();
4635 }
4636 
4637 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4638                                  TCGRegSet allocated_regs,
4639                                  TCGRegSet preferred_regs, bool rev)
4640 {
4641     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4642     TCGRegSet reg_ct[2];
4643     const int *order;
4644 
4645     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4646     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4647     tcg_debug_assert(reg_ct[1] != 0);
4648     reg_ct[0] = reg_ct[1] & preferred_regs;
4649 
4650     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4651 
4652     /*
4653      * Skip the preferred_regs option if it cannot be satisfied,
4654      * or if the preference made no difference.
4655      */
4656     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4657 
4658     /*
4659      * Minimize the number of flushes by looking for 2 free registers first,
4660      * then a single flush, then two flushes.
4661      */
4662     for (fmin = 2; fmin >= 0; fmin--) {
4663         for (j = k; j < 2; j++) {
4664             TCGRegSet set = reg_ct[j];
4665 
4666             for (i = 0; i < n; i++) {
4667                 TCGReg reg = order[i];
4668 
4669                 if (tcg_regset_test_reg(set, reg)) {
4670                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4671                     if (f >= fmin) {
4672                         tcg_reg_free(s, reg, allocated_regs);
4673                         tcg_reg_free(s, reg + 1, allocated_regs);
4674                         return reg;
4675                     }
4676                 }
4677             }
4678         }
4679     }
4680     g_assert_not_reached();
4681 }
4682 
4683 /* Make sure the temporary is in a register.  If needed, allocate the register
4684    from DESIRED while avoiding ALLOCATED.  */
4685 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4686                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4687 {
4688     TCGReg reg;
4689 
4690     switch (ts->val_type) {
4691     case TEMP_VAL_REG:
4692         return;
4693     case TEMP_VAL_CONST:
4694         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4695                             preferred_regs, ts->indirect_base);
4696         if (ts->type <= TCG_TYPE_I64) {
4697             tcg_out_movi(s, ts->type, reg, ts->val);
4698         } else {
4699             uint64_t val = ts->val;
4700             MemOp vece = MO_64;
4701 
4702             /*
4703              * Find the minimal vector element that matches the constant.
4704              * The targets will, in general, have to do this search anyway,
4705              * do this generically.
4706              */
4707             if (val == dup_const(MO_8, val)) {
4708                 vece = MO_8;
4709             } else if (val == dup_const(MO_16, val)) {
4710                 vece = MO_16;
4711             } else if (val == dup_const(MO_32, val)) {
4712                 vece = MO_32;
4713             }
4714 
4715             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4716         }
4717         ts->mem_coherent = 0;
4718         break;
4719     case TEMP_VAL_MEM:
4720         if (!ts->mem_allocated) {
4721             temp_allocate_frame(s, ts);
4722         }
4723         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4724                             preferred_regs, ts->indirect_base);
4725         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4726         ts->mem_coherent = 1;
4727         break;
4728     case TEMP_VAL_DEAD:
4729     default:
4730         g_assert_not_reached();
4731     }
4732     set_temp_val_reg(s, ts, reg);
4733 }
4734 
4735 /* Save a temporary to memory. 'allocated_regs' is used in case a
4736    temporary registers needs to be allocated to store a constant.  */
4737 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4738 {
4739     /* The liveness analysis already ensures that globals are back
4740        in memory. Keep an tcg_debug_assert for safety. */
4741     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4742 }
4743 
4744 /* save globals to their canonical location and assume they can be
4745    modified be the following code. 'allocated_regs' is used in case a
4746    temporary registers needs to be allocated to store a constant. */
4747 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4748 {
4749     int i, n;
4750 
4751     for (i = 0, n = s->nb_globals; i < n; i++) {
4752         temp_save(s, &s->temps[i], allocated_regs);
4753     }
4754 }
4755 
4756 /* sync globals to their canonical location and assume they can be
4757    read by the following code. 'allocated_regs' is used in case a
4758    temporary registers needs to be allocated to store a constant. */
4759 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4760 {
4761     int i, n;
4762 
4763     for (i = 0, n = s->nb_globals; i < n; i++) {
4764         TCGTemp *ts = &s->temps[i];
4765         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4766                          || ts->kind == TEMP_FIXED
4767                          || ts->mem_coherent);
4768     }
4769 }
4770 
4771 /* at the end of a basic block, we assume all temporaries are dead and
4772    all globals are stored at their canonical location. */
4773 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4774 {
4775     int i;
4776 
4777     for (i = s->nb_globals; i < s->nb_temps; i++) {
4778         TCGTemp *ts = &s->temps[i];
4779 
4780         switch (ts->kind) {
4781         case TEMP_TB:
4782             temp_save(s, ts, allocated_regs);
4783             break;
4784         case TEMP_EBB:
4785             /* The liveness analysis already ensures that temps are dead.
4786                Keep an tcg_debug_assert for safety. */
4787             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4788             break;
4789         case TEMP_CONST:
4790             /* Similarly, we should have freed any allocated register. */
4791             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4792             break;
4793         default:
4794             g_assert_not_reached();
4795         }
4796     }
4797 
4798     save_globals(s, allocated_regs);
4799 }
4800 
4801 /*
4802  * At a conditional branch, we assume all temporaries are dead unless
4803  * explicitly live-across-conditional-branch; all globals and local
4804  * temps are synced to their location.
4805  */
4806 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4807 {
4808     sync_globals(s, allocated_regs);
4809 
4810     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4811         TCGTemp *ts = &s->temps[i];
4812         /*
4813          * The liveness analysis already ensures that temps are dead.
4814          * Keep tcg_debug_asserts for safety.
4815          */
4816         switch (ts->kind) {
4817         case TEMP_TB:
4818             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4819             break;
4820         case TEMP_EBB:
4821         case TEMP_CONST:
4822             break;
4823         default:
4824             g_assert_not_reached();
4825         }
4826     }
4827 }
4828 
4829 /*
4830  * Specialized code generation for INDEX_op_mov_* with a constant.
4831  */
4832 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4833                                   tcg_target_ulong val, TCGLifeData arg_life,
4834                                   TCGRegSet preferred_regs)
4835 {
4836     /* ENV should not be modified.  */
4837     tcg_debug_assert(!temp_readonly(ots));
4838 
4839     /* The movi is not explicitly generated here.  */
4840     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4841     ots->val = val;
4842     ots->mem_coherent = 0;
4843     if (NEED_SYNC_ARG(0)) {
4844         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4845     } else if (IS_DEAD_ARG(0)) {
4846         temp_dead(s, ots);
4847     }
4848 }
4849 
4850 /*
4851  * Specialized code generation for INDEX_op_mov_*.
4852  */
4853 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4854 {
4855     const TCGLifeData arg_life = op->life;
4856     TCGRegSet allocated_regs, preferred_regs;
4857     TCGTemp *ts, *ots;
4858     TCGType otype, itype;
4859     TCGReg oreg, ireg;
4860 
4861     allocated_regs = s->reserved_regs;
4862     preferred_regs = output_pref(op, 0);
4863     ots = arg_temp(op->args[0]);
4864     ts = arg_temp(op->args[1]);
4865 
4866     /* ENV should not be modified.  */
4867     tcg_debug_assert(!temp_readonly(ots));
4868 
4869     /* Note that otype != itype for no-op truncation.  */
4870     otype = ots->type;
4871     itype = ts->type;
4872 
4873     if (ts->val_type == TEMP_VAL_CONST) {
4874         /* propagate constant or generate sti */
4875         tcg_target_ulong val = ts->val;
4876         if (IS_DEAD_ARG(1)) {
4877             temp_dead(s, ts);
4878         }
4879         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4880         return;
4881     }
4882 
4883     /* If the source value is in memory we're going to be forced
4884        to have it in a register in order to perform the copy.  Copy
4885        the SOURCE value into its own register first, that way we
4886        don't have to reload SOURCE the next time it is used. */
4887     if (ts->val_type == TEMP_VAL_MEM) {
4888         temp_load(s, ts, tcg_target_available_regs[itype],
4889                   allocated_regs, preferred_regs);
4890     }
4891     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4892     ireg = ts->reg;
4893 
4894     if (IS_DEAD_ARG(0)) {
4895         /* mov to a non-saved dead register makes no sense (even with
4896            liveness analysis disabled). */
4897         tcg_debug_assert(NEED_SYNC_ARG(0));
4898         if (!ots->mem_allocated) {
4899             temp_allocate_frame(s, ots);
4900         }
4901         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4902         if (IS_DEAD_ARG(1)) {
4903             temp_dead(s, ts);
4904         }
4905         temp_dead(s, ots);
4906         return;
4907     }
4908 
4909     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4910         /*
4911          * The mov can be suppressed.  Kill input first, so that it
4912          * is unlinked from reg_to_temp, then set the output to the
4913          * reg that we saved from the input.
4914          */
4915         temp_dead(s, ts);
4916         oreg = ireg;
4917     } else {
4918         if (ots->val_type == TEMP_VAL_REG) {
4919             oreg = ots->reg;
4920         } else {
4921             /* Make sure to not spill the input register during allocation. */
4922             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4923                                  allocated_regs | ((TCGRegSet)1 << ireg),
4924                                  preferred_regs, ots->indirect_base);
4925         }
4926         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4927             /*
4928              * Cross register class move not supported.
4929              * Store the source register into the destination slot
4930              * and leave the destination temp as TEMP_VAL_MEM.
4931              */
4932             assert(!temp_readonly(ots));
4933             if (!ts->mem_allocated) {
4934                 temp_allocate_frame(s, ots);
4935             }
4936             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4937             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4938             ots->mem_coherent = 1;
4939             return;
4940         }
4941     }
4942     set_temp_val_reg(s, ots, oreg);
4943     ots->mem_coherent = 0;
4944 
4945     if (NEED_SYNC_ARG(0)) {
4946         temp_sync(s, ots, allocated_regs, 0, 0);
4947     }
4948 }
4949 
4950 /*
4951  * Specialized code generation for INDEX_op_dup_vec.
4952  */
4953 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4954 {
4955     const TCGLifeData arg_life = op->life;
4956     TCGRegSet dup_out_regs, dup_in_regs;
4957     const TCGArgConstraint *dup_args_ct;
4958     TCGTemp *its, *ots;
4959     TCGType itype, vtype;
4960     unsigned vece;
4961     int lowpart_ofs;
4962     bool ok;
4963 
4964     ots = arg_temp(op->args[0]);
4965     its = arg_temp(op->args[1]);
4966 
4967     /* ENV should not be modified.  */
4968     tcg_debug_assert(!temp_readonly(ots));
4969 
4970     itype = its->type;
4971     vece = TCGOP_VECE(op);
4972     vtype = TCGOP_TYPE(op);
4973 
4974     if (its->val_type == TEMP_VAL_CONST) {
4975         /* Propagate constant via movi -> dupi.  */
4976         tcg_target_ulong val = its->val;
4977         if (IS_DEAD_ARG(1)) {
4978             temp_dead(s, its);
4979         }
4980         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4981         return;
4982     }
4983 
4984     dup_args_ct = opcode_args_ct(op);
4985     dup_out_regs = dup_args_ct[0].regs;
4986     dup_in_regs = dup_args_ct[1].regs;
4987 
4988     /* Allocate the output register now.  */
4989     if (ots->val_type != TEMP_VAL_REG) {
4990         TCGRegSet allocated_regs = s->reserved_regs;
4991         TCGReg oreg;
4992 
4993         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4994             /* Make sure to not spill the input register. */
4995             tcg_regset_set_reg(allocated_regs, its->reg);
4996         }
4997         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4998                              output_pref(op, 0), ots->indirect_base);
4999         set_temp_val_reg(s, ots, oreg);
5000     }
5001 
5002     switch (its->val_type) {
5003     case TEMP_VAL_REG:
5004         /*
5005          * The dup constriaints must be broad, covering all possible VECE.
5006          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5007          * to fail, indicating that extra moves are required for that case.
5008          */
5009         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5010             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5011                 goto done;
5012             }
5013             /* Try again from memory or a vector input register.  */
5014         }
5015         if (!its->mem_coherent) {
5016             /*
5017              * The input register is not synced, and so an extra store
5018              * would be required to use memory.  Attempt an integer-vector
5019              * register move first.  We do not have a TCGRegSet for this.
5020              */
5021             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5022                 break;
5023             }
5024             /* Sync the temp back to its slot and load from there.  */
5025             temp_sync(s, its, s->reserved_regs, 0, 0);
5026         }
5027         /* fall through */
5028 
5029     case TEMP_VAL_MEM:
5030         lowpart_ofs = 0;
5031         if (HOST_BIG_ENDIAN) {
5032             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5033         }
5034         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5035                              its->mem_offset + lowpart_ofs)) {
5036             goto done;
5037         }
5038         /* Load the input into the destination vector register. */
5039         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5040         break;
5041 
5042     default:
5043         g_assert_not_reached();
5044     }
5045 
5046     /* We now have a vector input register, so dup must succeed. */
5047     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5048     tcg_debug_assert(ok);
5049 
5050  done:
5051     ots->mem_coherent = 0;
5052     if (IS_DEAD_ARG(1)) {
5053         temp_dead(s, its);
5054     }
5055     if (NEED_SYNC_ARG(0)) {
5056         temp_sync(s, ots, s->reserved_regs, 0, 0);
5057     }
5058     if (IS_DEAD_ARG(0)) {
5059         temp_dead(s, ots);
5060     }
5061 }
5062 
5063 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5064 {
5065     const TCGLifeData arg_life = op->life;
5066     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5067     TCGRegSet i_allocated_regs;
5068     TCGRegSet o_allocated_regs;
5069     int i, k, nb_iargs, nb_oargs;
5070     TCGReg reg;
5071     TCGArg arg;
5072     const TCGArgConstraint *args_ct;
5073     const TCGArgConstraint *arg_ct;
5074     TCGTemp *ts;
5075     TCGArg new_args[TCG_MAX_OP_ARGS];
5076     int const_args[TCG_MAX_OP_ARGS];
5077     TCGCond op_cond;
5078 
5079     nb_oargs = def->nb_oargs;
5080     nb_iargs = def->nb_iargs;
5081 
5082     /* copy constants */
5083     memcpy(new_args + nb_oargs + nb_iargs,
5084            op->args + nb_oargs + nb_iargs,
5085            sizeof(TCGArg) * def->nb_cargs);
5086 
5087     i_allocated_regs = s->reserved_regs;
5088     o_allocated_regs = s->reserved_regs;
5089 
5090     switch (op->opc) {
5091     case INDEX_op_brcond:
5092         op_cond = op->args[2];
5093         break;
5094     case INDEX_op_setcond:
5095     case INDEX_op_negsetcond:
5096     case INDEX_op_cmp_vec:
5097         op_cond = op->args[3];
5098         break;
5099     case INDEX_op_brcond2_i32:
5100         op_cond = op->args[4];
5101         break;
5102     case INDEX_op_movcond:
5103     case INDEX_op_setcond2_i32:
5104     case INDEX_op_cmpsel_vec:
5105         op_cond = op->args[5];
5106         break;
5107     default:
5108         /* No condition within opcode. */
5109         op_cond = TCG_COND_ALWAYS;
5110         break;
5111     }
5112 
5113     args_ct = opcode_args_ct(op);
5114 
5115     /* satisfy input constraints */
5116     for (k = 0; k < nb_iargs; k++) {
5117         TCGRegSet i_preferred_regs, i_required_regs;
5118         bool allocate_new_reg, copyto_new_reg;
5119         TCGTemp *ts2;
5120         int i1, i2;
5121 
5122         i = args_ct[nb_oargs + k].sort_index;
5123         arg = op->args[i];
5124         arg_ct = &args_ct[i];
5125         ts = arg_temp(arg);
5126 
5127         if (ts->val_type == TEMP_VAL_CONST) {
5128 #ifdef TCG_REG_ZERO
5129             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5130                 /* Hardware zero register: indicate register via non-const. */
5131                 const_args[i] = 0;
5132                 new_args[i] = TCG_REG_ZERO;
5133                 continue;
5134             }
5135 #endif
5136 
5137             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5138                                        op_cond, TCGOP_VECE(op))) {
5139                 /* constant is OK for instruction */
5140                 const_args[i] = 1;
5141                 new_args[i] = ts->val;
5142                 continue;
5143             }
5144         }
5145 
5146         reg = ts->reg;
5147         i_preferred_regs = 0;
5148         i_required_regs = arg_ct->regs;
5149         allocate_new_reg = false;
5150         copyto_new_reg = false;
5151 
5152         switch (arg_ct->pair) {
5153         case 0: /* not paired */
5154             if (arg_ct->ialias) {
5155                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5156 
5157                 /*
5158                  * If the input is readonly, then it cannot also be an
5159                  * output and aliased to itself.  If the input is not
5160                  * dead after the instruction, we must allocate a new
5161                  * register and move it.
5162                  */
5163                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5164                     || args_ct[arg_ct->alias_index].newreg) {
5165                     allocate_new_reg = true;
5166                 } else if (ts->val_type == TEMP_VAL_REG) {
5167                     /*
5168                      * Check if the current register has already been
5169                      * allocated for another input.
5170                      */
5171                     allocate_new_reg =
5172                         tcg_regset_test_reg(i_allocated_regs, reg);
5173                 }
5174             }
5175             if (!allocate_new_reg) {
5176                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5177                           i_preferred_regs);
5178                 reg = ts->reg;
5179                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5180             }
5181             if (allocate_new_reg) {
5182                 /*
5183                  * Allocate a new register matching the constraint
5184                  * and move the temporary register into it.
5185                  */
5186                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5187                           i_allocated_regs, 0);
5188                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5189                                     i_preferred_regs, ts->indirect_base);
5190                 copyto_new_reg = true;
5191             }
5192             break;
5193 
5194         case 1:
5195             /* First of an input pair; if i1 == i2, the second is an output. */
5196             i1 = i;
5197             i2 = arg_ct->pair_index;
5198             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5199 
5200             /*
5201              * It is easier to default to allocating a new pair
5202              * and to identify a few cases where it's not required.
5203              */
5204             if (arg_ct->ialias) {
5205                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5206                 if (IS_DEAD_ARG(i1) &&
5207                     IS_DEAD_ARG(i2) &&
5208                     !temp_readonly(ts) &&
5209                     ts->val_type == TEMP_VAL_REG &&
5210                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5211                     tcg_regset_test_reg(i_required_regs, reg) &&
5212                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5213                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5214                     (ts2
5215                      ? ts2->val_type == TEMP_VAL_REG &&
5216                        ts2->reg == reg + 1 &&
5217                        !temp_readonly(ts2)
5218                      : s->reg_to_temp[reg + 1] == NULL)) {
5219                     break;
5220                 }
5221             } else {
5222                 /* Without aliasing, the pair must also be an input. */
5223                 tcg_debug_assert(ts2);
5224                 if (ts->val_type == TEMP_VAL_REG &&
5225                     ts2->val_type == TEMP_VAL_REG &&
5226                     ts2->reg == reg + 1 &&
5227                     tcg_regset_test_reg(i_required_regs, reg)) {
5228                     break;
5229                 }
5230             }
5231             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5232                                      0, ts->indirect_base);
5233             goto do_pair;
5234 
5235         case 2: /* pair second */
5236             reg = new_args[arg_ct->pair_index] + 1;
5237             goto do_pair;
5238 
5239         case 3: /* ialias with second output, no first input */
5240             tcg_debug_assert(arg_ct->ialias);
5241             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5242 
5243             if (IS_DEAD_ARG(i) &&
5244                 !temp_readonly(ts) &&
5245                 ts->val_type == TEMP_VAL_REG &&
5246                 reg > 0 &&
5247                 s->reg_to_temp[reg - 1] == NULL &&
5248                 tcg_regset_test_reg(i_required_regs, reg) &&
5249                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5250                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5251                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5252                 break;
5253             }
5254             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5255                                      i_allocated_regs, 0,
5256                                      ts->indirect_base);
5257             tcg_regset_set_reg(i_allocated_regs, reg);
5258             reg += 1;
5259             goto do_pair;
5260 
5261         do_pair:
5262             /*
5263              * If an aliased input is not dead after the instruction,
5264              * we must allocate a new register and move it.
5265              */
5266             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5267                 TCGRegSet t_allocated_regs = i_allocated_regs;
5268 
5269                 /*
5270                  * Because of the alias, and the continued life, make sure
5271                  * that the temp is somewhere *other* than the reg pair,
5272                  * and we get a copy in reg.
5273                  */
5274                 tcg_regset_set_reg(t_allocated_regs, reg);
5275                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5276                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5277                     /* If ts was already in reg, copy it somewhere else. */
5278                     TCGReg nr;
5279                     bool ok;
5280 
5281                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5282                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5283                                        t_allocated_regs, 0, ts->indirect_base);
5284                     ok = tcg_out_mov(s, ts->type, nr, reg);
5285                     tcg_debug_assert(ok);
5286 
5287                     set_temp_val_reg(s, ts, nr);
5288                 } else {
5289                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5290                               t_allocated_regs, 0);
5291                     copyto_new_reg = true;
5292                 }
5293             } else {
5294                 /* Preferably allocate to reg, otherwise copy. */
5295                 i_required_regs = (TCGRegSet)1 << reg;
5296                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5297                           i_preferred_regs);
5298                 copyto_new_reg = ts->reg != reg;
5299             }
5300             break;
5301 
5302         default:
5303             g_assert_not_reached();
5304         }
5305 
5306         if (copyto_new_reg) {
5307             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5308                 /*
5309                  * Cross register class move not supported.  Sync the
5310                  * temp back to its slot and load from there.
5311                  */
5312                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5313                 tcg_out_ld(s, ts->type, reg,
5314                            ts->mem_base->reg, ts->mem_offset);
5315             }
5316         }
5317         new_args[i] = reg;
5318         const_args[i] = 0;
5319         tcg_regset_set_reg(i_allocated_regs, reg);
5320     }
5321 
5322     /* mark dead temporaries and free the associated registers */
5323     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5324         if (IS_DEAD_ARG(i)) {
5325             temp_dead(s, arg_temp(op->args[i]));
5326         }
5327     }
5328 
5329     if (def->flags & TCG_OPF_COND_BRANCH) {
5330         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5331     } else if (def->flags & TCG_OPF_BB_END) {
5332         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5333     } else {
5334         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5335             /* XXX: permit generic clobber register list ? */
5336             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5337                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5338                     tcg_reg_free(s, i, i_allocated_regs);
5339                 }
5340             }
5341         }
5342         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5343             /* sync globals if the op has side effects and might trigger
5344                an exception. */
5345             sync_globals(s, i_allocated_regs);
5346         }
5347 
5348         /* satisfy the output constraints */
5349         for (k = 0; k < nb_oargs; k++) {
5350             i = args_ct[k].sort_index;
5351             arg = op->args[i];
5352             arg_ct = &args_ct[i];
5353             ts = arg_temp(arg);
5354 
5355             /* ENV should not be modified.  */
5356             tcg_debug_assert(!temp_readonly(ts));
5357 
5358             switch (arg_ct->pair) {
5359             case 0: /* not paired */
5360                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5361                     reg = new_args[arg_ct->alias_index];
5362                 } else if (arg_ct->newreg) {
5363                     reg = tcg_reg_alloc(s, arg_ct->regs,
5364                                         i_allocated_regs | o_allocated_regs,
5365                                         output_pref(op, k), ts->indirect_base);
5366                 } else {
5367                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5368                                         output_pref(op, k), ts->indirect_base);
5369                 }
5370                 break;
5371 
5372             case 1: /* first of pair */
5373                 if (arg_ct->oalias) {
5374                     reg = new_args[arg_ct->alias_index];
5375                 } else if (arg_ct->newreg) {
5376                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5377                                              i_allocated_regs | o_allocated_regs,
5378                                              output_pref(op, k),
5379                                              ts->indirect_base);
5380                 } else {
5381                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5382                                              output_pref(op, k),
5383                                              ts->indirect_base);
5384                 }
5385                 break;
5386 
5387             case 2: /* second of pair */
5388                 if (arg_ct->oalias) {
5389                     reg = new_args[arg_ct->alias_index];
5390                 } else {
5391                     reg = new_args[arg_ct->pair_index] + 1;
5392                 }
5393                 break;
5394 
5395             case 3: /* first of pair, aliasing with a second input */
5396                 tcg_debug_assert(!arg_ct->newreg);
5397                 reg = new_args[arg_ct->pair_index] - 1;
5398                 break;
5399 
5400             default:
5401                 g_assert_not_reached();
5402             }
5403             tcg_regset_set_reg(o_allocated_regs, reg);
5404             set_temp_val_reg(s, ts, reg);
5405             ts->mem_coherent = 0;
5406             new_args[i] = reg;
5407         }
5408     }
5409 
5410     /* emit instruction */
5411     TCGType type = TCGOP_TYPE(op);
5412     switch (op->opc) {
5413     case INDEX_op_ext_i32_i64:
5414         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5415         break;
5416     case INDEX_op_extu_i32_i64:
5417         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5418         break;
5419     case INDEX_op_extrl_i64_i32:
5420         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5421         break;
5422 
5423     case INDEX_op_add:
5424     case INDEX_op_and:
5425     case INDEX_op_andc:
5426     case INDEX_op_clz:
5427     case INDEX_op_ctz:
5428     case INDEX_op_divs:
5429     case INDEX_op_divu:
5430     case INDEX_op_eqv:
5431     case INDEX_op_mul:
5432     case INDEX_op_mulsh:
5433     case INDEX_op_muluh:
5434     case INDEX_op_nand:
5435     case INDEX_op_nor:
5436     case INDEX_op_or:
5437     case INDEX_op_orc:
5438     case INDEX_op_rems:
5439     case INDEX_op_remu:
5440     case INDEX_op_rotl:
5441     case INDEX_op_rotr:
5442     case INDEX_op_sar:
5443     case INDEX_op_shl:
5444     case INDEX_op_shr:
5445     case INDEX_op_xor:
5446         {
5447             const TCGOutOpBinary *out =
5448                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5449 
5450             /* Constants should never appear in the first source operand. */
5451             tcg_debug_assert(!const_args[1]);
5452             if (const_args[2]) {
5453                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5454             } else {
5455                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5456             }
5457         }
5458         break;
5459 
5460     case INDEX_op_sub:
5461         {
5462             const TCGOutOpSubtract *out = &outop_sub;
5463 
5464             /*
5465              * Constants should never appear in the second source operand.
5466              * These are folded to add with negative constant.
5467              */
5468             tcg_debug_assert(!const_args[2]);
5469             if (const_args[1]) {
5470                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5471             } else {
5472                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5473             }
5474         }
5475         break;
5476 
5477     case INDEX_op_ctpop:
5478     case INDEX_op_neg:
5479     case INDEX_op_not:
5480         {
5481             const TCGOutOpUnary *out =
5482                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5483 
5484             /* Constants should have been folded. */
5485             tcg_debug_assert(!const_args[1]);
5486             out->out_rr(s, type, new_args[0], new_args[1]);
5487         }
5488         break;
5489 
5490     case INDEX_op_bswap16:
5491         {
5492             const TCGOutOpBswap *out =
5493                 container_of(all_outop[op->opc], TCGOutOpBswap, base);
5494 
5495             tcg_debug_assert(!const_args[1]);
5496             out->out_rr(s, type, new_args[0], new_args[1], new_args[2]);
5497         }
5498         break;
5499 
5500     case INDEX_op_divs2:
5501     case INDEX_op_divu2:
5502         {
5503             const TCGOutOpDivRem *out =
5504                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5505 
5506             /* Only used by x86 and s390x, which use matching constraints. */
5507             tcg_debug_assert(new_args[0] == new_args[2]);
5508             tcg_debug_assert(new_args[1] == new_args[3]);
5509             tcg_debug_assert(!const_args[4]);
5510             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5511         }
5512         break;
5513 
5514     case INDEX_op_muls2:
5515     case INDEX_op_mulu2:
5516         {
5517             const TCGOutOpMul2 *out =
5518                 container_of(all_outop[op->opc], TCGOutOpMul2, base);
5519 
5520             tcg_debug_assert(!const_args[2]);
5521             tcg_debug_assert(!const_args[3]);
5522             out->out_rrrr(s, type, new_args[0], new_args[1],
5523                           new_args[2], new_args[3]);
5524         }
5525         break;
5526 
5527     case INDEX_op_brcond:
5528         {
5529             const TCGOutOpBrcond *out = &outop_brcond;
5530             TCGCond cond = new_args[2];
5531             TCGLabel *label = arg_label(new_args[3]);
5532 
5533             tcg_debug_assert(!const_args[0]);
5534             if (const_args[1]) {
5535                 out->out_ri(s, type, cond, new_args[0], new_args[1], label);
5536             } else {
5537                 out->out_rr(s, type, cond, new_args[0], new_args[1], label);
5538             }
5539         }
5540         break;
5541 
5542     case INDEX_op_movcond:
5543         {
5544             const TCGOutOpMovcond *out = &outop_movcond;
5545             TCGCond cond = new_args[5];
5546 
5547             tcg_debug_assert(!const_args[1]);
5548             out->out(s, type, cond, new_args[0],
5549                      new_args[1], new_args[2], const_args[2],
5550                      new_args[3], const_args[3],
5551                      new_args[4], const_args[4]);
5552         }
5553         break;
5554 
5555     case INDEX_op_setcond:
5556     case INDEX_op_negsetcond:
5557         {
5558             const TCGOutOpSetcond *out =
5559                 container_of(all_outop[op->opc], TCGOutOpSetcond, base);
5560             TCGCond cond = new_args[3];
5561 
5562             tcg_debug_assert(!const_args[1]);
5563             if (const_args[2]) {
5564                 out->out_rri(s, type, cond,
5565                              new_args[0], new_args[1], new_args[2]);
5566             } else {
5567                 out->out_rrr(s, type, cond,
5568                              new_args[0], new_args[1], new_args[2]);
5569             }
5570         }
5571         break;
5572 
5573 #if TCG_TARGET_REG_BITS == 32
5574     case INDEX_op_brcond2_i32:
5575         {
5576             const TCGOutOpBrcond2 *out = &outop_brcond2;
5577             TCGCond cond = new_args[4];
5578             TCGLabel *label = arg_label(new_args[5]);
5579 
5580             tcg_debug_assert(!const_args[0]);
5581             tcg_debug_assert(!const_args[1]);
5582             out->out(s, cond, new_args[0], new_args[1],
5583                      new_args[2], const_args[2],
5584                      new_args[3], const_args[3], label);
5585         }
5586         break;
5587     case INDEX_op_setcond2_i32:
5588         {
5589             const TCGOutOpSetcond2 *out = &outop_setcond2;
5590             TCGCond cond = new_args[5];
5591 
5592             tcg_debug_assert(!const_args[1]);
5593             tcg_debug_assert(!const_args[2]);
5594             out->out(s, cond, new_args[0], new_args[1], new_args[2],
5595                      new_args[3], const_args[3], new_args[4], const_args[4]);
5596         }
5597         break;
5598 #else
5599     case INDEX_op_brcond2_i32:
5600     case INDEX_op_setcond2_i32:
5601         g_assert_not_reached();
5602 #endif
5603 
5604     default:
5605         if (def->flags & TCG_OPF_VECTOR) {
5606             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5607                            TCGOP_VECE(op), new_args, const_args);
5608         } else {
5609             tcg_out_op(s, op->opc, type, new_args, const_args);
5610         }
5611         break;
5612     }
5613 
5614     /* move the outputs in the correct register if needed */
5615     for(i = 0; i < nb_oargs; i++) {
5616         ts = arg_temp(op->args[i]);
5617 
5618         /* ENV should not be modified.  */
5619         tcg_debug_assert(!temp_readonly(ts));
5620 
5621         if (NEED_SYNC_ARG(i)) {
5622             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5623         } else if (IS_DEAD_ARG(i)) {
5624             temp_dead(s, ts);
5625         }
5626     }
5627 }
5628 
5629 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5630 {
5631     const TCGLifeData arg_life = op->life;
5632     TCGTemp *ots, *itsl, *itsh;
5633     TCGType vtype = TCGOP_TYPE(op);
5634 
5635     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5636     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5637     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5638 
5639     ots = arg_temp(op->args[0]);
5640     itsl = arg_temp(op->args[1]);
5641     itsh = arg_temp(op->args[2]);
5642 
5643     /* ENV should not be modified.  */
5644     tcg_debug_assert(!temp_readonly(ots));
5645 
5646     /* Allocate the output register now.  */
5647     if (ots->val_type != TEMP_VAL_REG) {
5648         TCGRegSet allocated_regs = s->reserved_regs;
5649         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5650         TCGReg oreg;
5651 
5652         /* Make sure to not spill the input registers. */
5653         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5654             tcg_regset_set_reg(allocated_regs, itsl->reg);
5655         }
5656         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5657             tcg_regset_set_reg(allocated_regs, itsh->reg);
5658         }
5659 
5660         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5661                              output_pref(op, 0), ots->indirect_base);
5662         set_temp_val_reg(s, ots, oreg);
5663     }
5664 
5665     /* Promote dup2 of immediates to dupi_vec. */
5666     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5667         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5668         MemOp vece = MO_64;
5669 
5670         if (val == dup_const(MO_8, val)) {
5671             vece = MO_8;
5672         } else if (val == dup_const(MO_16, val)) {
5673             vece = MO_16;
5674         } else if (val == dup_const(MO_32, val)) {
5675             vece = MO_32;
5676         }
5677 
5678         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5679         goto done;
5680     }
5681 
5682     /* If the two inputs form one 64-bit value, try dupm_vec. */
5683     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5684         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5685         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5686         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5687 
5688         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5689         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5690 
5691         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5692                              its->mem_base->reg, its->mem_offset)) {
5693             goto done;
5694         }
5695     }
5696 
5697     /* Fall back to generic expansion. */
5698     return false;
5699 
5700  done:
5701     ots->mem_coherent = 0;
5702     if (IS_DEAD_ARG(1)) {
5703         temp_dead(s, itsl);
5704     }
5705     if (IS_DEAD_ARG(2)) {
5706         temp_dead(s, itsh);
5707     }
5708     if (NEED_SYNC_ARG(0)) {
5709         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5710     } else if (IS_DEAD_ARG(0)) {
5711         temp_dead(s, ots);
5712     }
5713     return true;
5714 }
5715 
5716 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5717                          TCGRegSet allocated_regs)
5718 {
5719     if (ts->val_type == TEMP_VAL_REG) {
5720         if (ts->reg != reg) {
5721             tcg_reg_free(s, reg, allocated_regs);
5722             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5723                 /*
5724                  * Cross register class move not supported.  Sync the
5725                  * temp back to its slot and load from there.
5726                  */
5727                 temp_sync(s, ts, allocated_regs, 0, 0);
5728                 tcg_out_ld(s, ts->type, reg,
5729                            ts->mem_base->reg, ts->mem_offset);
5730             }
5731         }
5732     } else {
5733         TCGRegSet arg_set = 0;
5734 
5735         tcg_reg_free(s, reg, allocated_regs);
5736         tcg_regset_set_reg(arg_set, reg);
5737         temp_load(s, ts, arg_set, allocated_regs, 0);
5738     }
5739 }
5740 
5741 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5742                          TCGRegSet allocated_regs)
5743 {
5744     /*
5745      * When the destination is on the stack, load up the temp and store.
5746      * If there are many call-saved registers, the temp might live to
5747      * see another use; otherwise it'll be discarded.
5748      */
5749     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5750     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5751                arg_slot_stk_ofs(arg_slot));
5752 }
5753 
5754 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5755                             TCGTemp *ts, TCGRegSet *allocated_regs)
5756 {
5757     if (arg_slot_reg_p(l->arg_slot)) {
5758         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5759         load_arg_reg(s, reg, ts, *allocated_regs);
5760         tcg_regset_set_reg(*allocated_regs, reg);
5761     } else {
5762         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5763     }
5764 }
5765 
5766 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5767                          intptr_t ref_off, TCGRegSet *allocated_regs)
5768 {
5769     TCGReg reg;
5770 
5771     if (arg_slot_reg_p(arg_slot)) {
5772         reg = tcg_target_call_iarg_regs[arg_slot];
5773         tcg_reg_free(s, reg, *allocated_regs);
5774         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5775         tcg_regset_set_reg(*allocated_regs, reg);
5776     } else {
5777         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5778                             *allocated_regs, 0, false);
5779         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5780         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5781                    arg_slot_stk_ofs(arg_slot));
5782     }
5783 }
5784 
5785 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5786 {
5787     const int nb_oargs = TCGOP_CALLO(op);
5788     const int nb_iargs = TCGOP_CALLI(op);
5789     const TCGLifeData arg_life = op->life;
5790     const TCGHelperInfo *info = tcg_call_info(op);
5791     TCGRegSet allocated_regs = s->reserved_regs;
5792     int i;
5793 
5794     /*
5795      * Move inputs into place in reverse order,
5796      * so that we place stacked arguments first.
5797      */
5798     for (i = nb_iargs - 1; i >= 0; --i) {
5799         const TCGCallArgumentLoc *loc = &info->in[i];
5800         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5801 
5802         switch (loc->kind) {
5803         case TCG_CALL_ARG_NORMAL:
5804         case TCG_CALL_ARG_EXTEND_U:
5805         case TCG_CALL_ARG_EXTEND_S:
5806             load_arg_normal(s, loc, ts, &allocated_regs);
5807             break;
5808         case TCG_CALL_ARG_BY_REF:
5809             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5810             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5811                          arg_slot_stk_ofs(loc->ref_slot),
5812                          &allocated_regs);
5813             break;
5814         case TCG_CALL_ARG_BY_REF_N:
5815             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5816             break;
5817         default:
5818             g_assert_not_reached();
5819         }
5820     }
5821 
5822     /* Mark dead temporaries and free the associated registers.  */
5823     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5824         if (IS_DEAD_ARG(i)) {
5825             temp_dead(s, arg_temp(op->args[i]));
5826         }
5827     }
5828 
5829     /* Clobber call registers.  */
5830     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5831         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5832             tcg_reg_free(s, i, allocated_regs);
5833         }
5834     }
5835 
5836     /*
5837      * Save globals if they might be written by the helper,
5838      * sync them if they might be read.
5839      */
5840     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5841         /* Nothing to do */
5842     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5843         sync_globals(s, allocated_regs);
5844     } else {
5845         save_globals(s, allocated_regs);
5846     }
5847 
5848     /*
5849      * If the ABI passes a pointer to the returned struct as the first
5850      * argument, load that now.  Pass a pointer to the output home slot.
5851      */
5852     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5853         TCGTemp *ts = arg_temp(op->args[0]);
5854 
5855         if (!ts->mem_allocated) {
5856             temp_allocate_frame(s, ts);
5857         }
5858         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5859     }
5860 
5861     tcg_out_call(s, tcg_call_func(op), info);
5862 
5863     /* Assign output registers and emit moves if needed.  */
5864     switch (info->out_kind) {
5865     case TCG_CALL_RET_NORMAL:
5866         for (i = 0; i < nb_oargs; i++) {
5867             TCGTemp *ts = arg_temp(op->args[i]);
5868             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5869 
5870             /* ENV should not be modified.  */
5871             tcg_debug_assert(!temp_readonly(ts));
5872 
5873             set_temp_val_reg(s, ts, reg);
5874             ts->mem_coherent = 0;
5875         }
5876         break;
5877 
5878     case TCG_CALL_RET_BY_VEC:
5879         {
5880             TCGTemp *ts = arg_temp(op->args[0]);
5881 
5882             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5883             tcg_debug_assert(ts->temp_subindex == 0);
5884             if (!ts->mem_allocated) {
5885                 temp_allocate_frame(s, ts);
5886             }
5887             tcg_out_st(s, TCG_TYPE_V128,
5888                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5889                        ts->mem_base->reg, ts->mem_offset);
5890         }
5891         /* fall through to mark all parts in memory */
5892 
5893     case TCG_CALL_RET_BY_REF:
5894         /* The callee has performed a write through the reference. */
5895         for (i = 0; i < nb_oargs; i++) {
5896             TCGTemp *ts = arg_temp(op->args[i]);
5897             ts->val_type = TEMP_VAL_MEM;
5898         }
5899         break;
5900 
5901     default:
5902         g_assert_not_reached();
5903     }
5904 
5905     /* Flush or discard output registers as needed. */
5906     for (i = 0; i < nb_oargs; i++) {
5907         TCGTemp *ts = arg_temp(op->args[i]);
5908         if (NEED_SYNC_ARG(i)) {
5909             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5910         } else if (IS_DEAD_ARG(i)) {
5911             temp_dead(s, ts);
5912         }
5913     }
5914 }
5915 
5916 /**
5917  * atom_and_align_for_opc:
5918  * @s: tcg context
5919  * @opc: memory operation code
5920  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5921  * @allow_two_ops: true if we are prepared to issue two operations
5922  *
5923  * Return the alignment and atomicity to use for the inline fast path
5924  * for the given memory operation.  The alignment may be larger than
5925  * that specified in @opc, and the correct alignment will be diagnosed
5926  * by the slow path helper.
5927  *
5928  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5929  * and issue two loads or stores for subalignment.
5930  */
5931 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5932                                            MemOp host_atom, bool allow_two_ops)
5933 {
5934     MemOp align = memop_alignment_bits(opc);
5935     MemOp size = opc & MO_SIZE;
5936     MemOp half = size ? size - 1 : 0;
5937     MemOp atom = opc & MO_ATOM_MASK;
5938     MemOp atmax;
5939 
5940     switch (atom) {
5941     case MO_ATOM_NONE:
5942         /* The operation requires no specific atomicity. */
5943         atmax = MO_8;
5944         break;
5945 
5946     case MO_ATOM_IFALIGN:
5947         atmax = size;
5948         break;
5949 
5950     case MO_ATOM_IFALIGN_PAIR:
5951         atmax = half;
5952         break;
5953 
5954     case MO_ATOM_WITHIN16:
5955         atmax = size;
5956         if (size == MO_128) {
5957             /* Misalignment implies !within16, and therefore no atomicity. */
5958         } else if (host_atom != MO_ATOM_WITHIN16) {
5959             /* The host does not implement within16, so require alignment. */
5960             align = MAX(align, size);
5961         }
5962         break;
5963 
5964     case MO_ATOM_WITHIN16_PAIR:
5965         atmax = size;
5966         /*
5967          * Misalignment implies !within16, and therefore half atomicity.
5968          * Any host prepared for two operations can implement this with
5969          * half alignment.
5970          */
5971         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5972             align = MAX(align, half);
5973         }
5974         break;
5975 
5976     case MO_ATOM_SUBALIGN:
5977         atmax = size;
5978         if (host_atom != MO_ATOM_SUBALIGN) {
5979             /* If unaligned but not odd, there are subobjects up to half. */
5980             if (allow_two_ops) {
5981                 align = MAX(align, half);
5982             } else {
5983                 align = MAX(align, size);
5984             }
5985         }
5986         break;
5987 
5988     default:
5989         g_assert_not_reached();
5990     }
5991 
5992     return (TCGAtomAlign){ .atom = atmax, .align = align };
5993 }
5994 
5995 /*
5996  * Similarly for qemu_ld/st slow path helpers.
5997  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5998  * using only the provided backend tcg_out_* functions.
5999  */
6000 
6001 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
6002 {
6003     int ofs = arg_slot_stk_ofs(slot);
6004 
6005     /*
6006      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
6007      * require extension to uint64_t, adjust the address for uint32_t.
6008      */
6009     if (HOST_BIG_ENDIAN &&
6010         TCG_TARGET_REG_BITS == 64 &&
6011         type == TCG_TYPE_I32) {
6012         ofs += 4;
6013     }
6014     return ofs;
6015 }
6016 
6017 static void tcg_out_helper_load_slots(TCGContext *s,
6018                                       unsigned nmov, TCGMovExtend *mov,
6019                                       const TCGLdstHelperParam *parm)
6020 {
6021     unsigned i;
6022     TCGReg dst3;
6023 
6024     /*
6025      * Start from the end, storing to the stack first.
6026      * This frees those registers, so we need not consider overlap.
6027      */
6028     for (i = nmov; i-- > 0; ) {
6029         unsigned slot = mov[i].dst;
6030 
6031         if (arg_slot_reg_p(slot)) {
6032             goto found_reg;
6033         }
6034 
6035         TCGReg src = mov[i].src;
6036         TCGType dst_type = mov[i].dst_type;
6037         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6038 
6039         /* The argument is going onto the stack; extend into scratch. */
6040         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
6041             tcg_debug_assert(parm->ntmp != 0);
6042             mov[i].dst = src = parm->tmp[0];
6043             tcg_out_movext1(s, &mov[i]);
6044         }
6045 
6046         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
6047                    tcg_out_helper_stk_ofs(dst_type, slot));
6048     }
6049     return;
6050 
6051  found_reg:
6052     /*
6053      * The remaining arguments are in registers.
6054      * Convert slot numbers to argument registers.
6055      */
6056     nmov = i + 1;
6057     for (i = 0; i < nmov; ++i) {
6058         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
6059     }
6060 
6061     switch (nmov) {
6062     case 4:
6063         /* The backend must have provided enough temps for the worst case. */
6064         tcg_debug_assert(parm->ntmp >= 2);
6065 
6066         dst3 = mov[3].dst;
6067         for (unsigned j = 0; j < 3; ++j) {
6068             if (dst3 == mov[j].src) {
6069                 /*
6070                  * Conflict. Copy the source to a temporary, perform the
6071                  * remaining moves, then the extension from our scratch
6072                  * on the way out.
6073                  */
6074                 TCGReg scratch = parm->tmp[1];
6075 
6076                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
6077                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
6078                 tcg_out_movext1_new_src(s, &mov[3], scratch);
6079                 break;
6080             }
6081         }
6082 
6083         /* No conflicts: perform this move and continue. */
6084         tcg_out_movext1(s, &mov[3]);
6085         /* fall through */
6086 
6087     case 3:
6088         tcg_out_movext3(s, mov, mov + 1, mov + 2,
6089                         parm->ntmp ? parm->tmp[0] : -1);
6090         break;
6091     case 2:
6092         tcg_out_movext2(s, mov, mov + 1,
6093                         parm->ntmp ? parm->tmp[0] : -1);
6094         break;
6095     case 1:
6096         tcg_out_movext1(s, mov);
6097         break;
6098     default:
6099         g_assert_not_reached();
6100     }
6101 }
6102 
6103 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
6104                                     TCGType type, tcg_target_long imm,
6105                                     const TCGLdstHelperParam *parm)
6106 {
6107     if (arg_slot_reg_p(slot)) {
6108         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
6109     } else {
6110         int ofs = tcg_out_helper_stk_ofs(type, slot);
6111         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
6112             tcg_debug_assert(parm->ntmp != 0);
6113             tcg_out_movi(s, type, parm->tmp[0], imm);
6114             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
6115         }
6116     }
6117 }
6118 
6119 static void tcg_out_helper_load_common_args(TCGContext *s,
6120                                             const TCGLabelQemuLdst *ldst,
6121                                             const TCGLdstHelperParam *parm,
6122                                             const TCGHelperInfo *info,
6123                                             unsigned next_arg)
6124 {
6125     TCGMovExtend ptr_mov = {
6126         .dst_type = TCG_TYPE_PTR,
6127         .src_type = TCG_TYPE_PTR,
6128         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6129     };
6130     const TCGCallArgumentLoc *loc = &info->in[0];
6131     TCGType type;
6132     unsigned slot;
6133     tcg_target_ulong imm;
6134 
6135     /*
6136      * Handle env, which is always first.
6137      */
6138     ptr_mov.dst = loc->arg_slot;
6139     ptr_mov.src = TCG_AREG0;
6140     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6141 
6142     /*
6143      * Handle oi.
6144      */
6145     imm = ldst->oi;
6146     loc = &info->in[next_arg];
6147     type = TCG_TYPE_I32;
6148     switch (loc->kind) {
6149     case TCG_CALL_ARG_NORMAL:
6150         break;
6151     case TCG_CALL_ARG_EXTEND_U:
6152     case TCG_CALL_ARG_EXTEND_S:
6153         /* No extension required for MemOpIdx. */
6154         tcg_debug_assert(imm <= INT32_MAX);
6155         type = TCG_TYPE_REG;
6156         break;
6157     default:
6158         g_assert_not_reached();
6159     }
6160     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6161     next_arg++;
6162 
6163     /*
6164      * Handle ra.
6165      */
6166     loc = &info->in[next_arg];
6167     slot = loc->arg_slot;
6168     if (parm->ra_gen) {
6169         int arg_reg = -1;
6170         TCGReg ra_reg;
6171 
6172         if (arg_slot_reg_p(slot)) {
6173             arg_reg = tcg_target_call_iarg_regs[slot];
6174         }
6175         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6176 
6177         ptr_mov.dst = slot;
6178         ptr_mov.src = ra_reg;
6179         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6180     } else {
6181         imm = (uintptr_t)ldst->raddr;
6182         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6183     }
6184 }
6185 
6186 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6187                                        const TCGCallArgumentLoc *loc,
6188                                        TCGType dst_type, TCGType src_type,
6189                                        TCGReg lo, TCGReg hi)
6190 {
6191     MemOp reg_mo;
6192 
6193     if (dst_type <= TCG_TYPE_REG) {
6194         MemOp src_ext;
6195 
6196         switch (loc->kind) {
6197         case TCG_CALL_ARG_NORMAL:
6198             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6199             break;
6200         case TCG_CALL_ARG_EXTEND_U:
6201             dst_type = TCG_TYPE_REG;
6202             src_ext = MO_UL;
6203             break;
6204         case TCG_CALL_ARG_EXTEND_S:
6205             dst_type = TCG_TYPE_REG;
6206             src_ext = MO_SL;
6207             break;
6208         default:
6209             g_assert_not_reached();
6210         }
6211 
6212         mov[0].dst = loc->arg_slot;
6213         mov[0].dst_type = dst_type;
6214         mov[0].src = lo;
6215         mov[0].src_type = src_type;
6216         mov[0].src_ext = src_ext;
6217         return 1;
6218     }
6219 
6220     if (TCG_TARGET_REG_BITS == 32) {
6221         assert(dst_type == TCG_TYPE_I64);
6222         reg_mo = MO_32;
6223     } else {
6224         assert(dst_type == TCG_TYPE_I128);
6225         reg_mo = MO_64;
6226     }
6227 
6228     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6229     mov[0].src = lo;
6230     mov[0].dst_type = TCG_TYPE_REG;
6231     mov[0].src_type = TCG_TYPE_REG;
6232     mov[0].src_ext = reg_mo;
6233 
6234     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6235     mov[1].src = hi;
6236     mov[1].dst_type = TCG_TYPE_REG;
6237     mov[1].src_type = TCG_TYPE_REG;
6238     mov[1].src_ext = reg_mo;
6239 
6240     return 2;
6241 }
6242 
6243 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6244                                    const TCGLdstHelperParam *parm)
6245 {
6246     const TCGHelperInfo *info;
6247     const TCGCallArgumentLoc *loc;
6248     TCGMovExtend mov[2];
6249     unsigned next_arg, nmov;
6250     MemOp mop = get_memop(ldst->oi);
6251 
6252     switch (mop & MO_SIZE) {
6253     case MO_8:
6254     case MO_16:
6255     case MO_32:
6256         info = &info_helper_ld32_mmu;
6257         break;
6258     case MO_64:
6259         info = &info_helper_ld64_mmu;
6260         break;
6261     case MO_128:
6262         info = &info_helper_ld128_mmu;
6263         break;
6264     default:
6265         g_assert_not_reached();
6266     }
6267 
6268     /* Defer env argument. */
6269     next_arg = 1;
6270 
6271     loc = &info->in[next_arg];
6272     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6273         /*
6274          * 32-bit host with 32-bit guest: zero-extend the guest address
6275          * to 64-bits for the helper by storing the low part, then
6276          * load a zero for the high part.
6277          */
6278         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6279                                TCG_TYPE_I32, TCG_TYPE_I32,
6280                                ldst->addr_reg, -1);
6281         tcg_out_helper_load_slots(s, 1, mov, parm);
6282 
6283         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6284                                 TCG_TYPE_I32, 0, parm);
6285         next_arg += 2;
6286     } else {
6287         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6288                                       ldst->addr_reg, -1);
6289         tcg_out_helper_load_slots(s, nmov, mov, parm);
6290         next_arg += nmov;
6291     }
6292 
6293     switch (info->out_kind) {
6294     case TCG_CALL_RET_NORMAL:
6295     case TCG_CALL_RET_BY_VEC:
6296         break;
6297     case TCG_CALL_RET_BY_REF:
6298         /*
6299          * The return reference is in the first argument slot.
6300          * We need memory in which to return: re-use the top of stack.
6301          */
6302         {
6303             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6304 
6305             if (arg_slot_reg_p(0)) {
6306                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6307                                  TCG_REG_CALL_STACK, ofs_slot0);
6308             } else {
6309                 tcg_debug_assert(parm->ntmp != 0);
6310                 tcg_out_addi_ptr(s, parm->tmp[0],
6311                                  TCG_REG_CALL_STACK, ofs_slot0);
6312                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6313                            TCG_REG_CALL_STACK, ofs_slot0);
6314             }
6315         }
6316         break;
6317     default:
6318         g_assert_not_reached();
6319     }
6320 
6321     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6322 }
6323 
6324 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6325                                   bool load_sign,
6326                                   const TCGLdstHelperParam *parm)
6327 {
6328     MemOp mop = get_memop(ldst->oi);
6329     TCGMovExtend mov[2];
6330     int ofs_slot0;
6331 
6332     switch (ldst->type) {
6333     case TCG_TYPE_I64:
6334         if (TCG_TARGET_REG_BITS == 32) {
6335             break;
6336         }
6337         /* fall through */
6338 
6339     case TCG_TYPE_I32:
6340         mov[0].dst = ldst->datalo_reg;
6341         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6342         mov[0].dst_type = ldst->type;
6343         mov[0].src_type = TCG_TYPE_REG;
6344 
6345         /*
6346          * If load_sign, then we allowed the helper to perform the
6347          * appropriate sign extension to tcg_target_ulong, and all
6348          * we need now is a plain move.
6349          *
6350          * If they do not, then we expect the relevant extension
6351          * instruction to be no more expensive than a move, and
6352          * we thus save the icache etc by only using one of two
6353          * helper functions.
6354          */
6355         if (load_sign || !(mop & MO_SIGN)) {
6356             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6357                 mov[0].src_ext = MO_32;
6358             } else {
6359                 mov[0].src_ext = MO_64;
6360             }
6361         } else {
6362             mov[0].src_ext = mop & MO_SSIZE;
6363         }
6364         tcg_out_movext1(s, mov);
6365         return;
6366 
6367     case TCG_TYPE_I128:
6368         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6369         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6370         switch (TCG_TARGET_CALL_RET_I128) {
6371         case TCG_CALL_RET_NORMAL:
6372             break;
6373         case TCG_CALL_RET_BY_VEC:
6374             tcg_out_st(s, TCG_TYPE_V128,
6375                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6376                        TCG_REG_CALL_STACK, ofs_slot0);
6377             /* fall through */
6378         case TCG_CALL_RET_BY_REF:
6379             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6380                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6381             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6382                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6383             return;
6384         default:
6385             g_assert_not_reached();
6386         }
6387         break;
6388 
6389     default:
6390         g_assert_not_reached();
6391     }
6392 
6393     mov[0].dst = ldst->datalo_reg;
6394     mov[0].src =
6395         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6396     mov[0].dst_type = TCG_TYPE_REG;
6397     mov[0].src_type = TCG_TYPE_REG;
6398     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6399 
6400     mov[1].dst = ldst->datahi_reg;
6401     mov[1].src =
6402         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6403     mov[1].dst_type = TCG_TYPE_REG;
6404     mov[1].src_type = TCG_TYPE_REG;
6405     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6406 
6407     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6408 }
6409 
6410 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6411                                    const TCGLdstHelperParam *parm)
6412 {
6413     const TCGHelperInfo *info;
6414     const TCGCallArgumentLoc *loc;
6415     TCGMovExtend mov[4];
6416     TCGType data_type;
6417     unsigned next_arg, nmov, n;
6418     MemOp mop = get_memop(ldst->oi);
6419 
6420     switch (mop & MO_SIZE) {
6421     case MO_8:
6422     case MO_16:
6423     case MO_32:
6424         info = &info_helper_st32_mmu;
6425         data_type = TCG_TYPE_I32;
6426         break;
6427     case MO_64:
6428         info = &info_helper_st64_mmu;
6429         data_type = TCG_TYPE_I64;
6430         break;
6431     case MO_128:
6432         info = &info_helper_st128_mmu;
6433         data_type = TCG_TYPE_I128;
6434         break;
6435     default:
6436         g_assert_not_reached();
6437     }
6438 
6439     /* Defer env argument. */
6440     next_arg = 1;
6441     nmov = 0;
6442 
6443     /* Handle addr argument. */
6444     loc = &info->in[next_arg];
6445     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6446     if (TCG_TARGET_REG_BITS == 32) {
6447         /*
6448          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6449          * to 64-bits for the helper by storing the low part.  Later,
6450          * after we have processed the register inputs, we will load a
6451          * zero for the high part.
6452          */
6453         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6454                                TCG_TYPE_I32, TCG_TYPE_I32,
6455                                ldst->addr_reg, -1);
6456         next_arg += 2;
6457         nmov += 1;
6458     } else {
6459         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6460                                    ldst->addr_reg, -1);
6461         next_arg += n;
6462         nmov += n;
6463     }
6464 
6465     /* Handle data argument. */
6466     loc = &info->in[next_arg];
6467     switch (loc->kind) {
6468     case TCG_CALL_ARG_NORMAL:
6469     case TCG_CALL_ARG_EXTEND_U:
6470     case TCG_CALL_ARG_EXTEND_S:
6471         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6472                                    ldst->datalo_reg, ldst->datahi_reg);
6473         next_arg += n;
6474         nmov += n;
6475         tcg_out_helper_load_slots(s, nmov, mov, parm);
6476         break;
6477 
6478     case TCG_CALL_ARG_BY_REF:
6479         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6480         tcg_debug_assert(data_type == TCG_TYPE_I128);
6481         tcg_out_st(s, TCG_TYPE_I64,
6482                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6483                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6484         tcg_out_st(s, TCG_TYPE_I64,
6485                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6486                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6487 
6488         tcg_out_helper_load_slots(s, nmov, mov, parm);
6489 
6490         if (arg_slot_reg_p(loc->arg_slot)) {
6491             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6492                              TCG_REG_CALL_STACK,
6493                              arg_slot_stk_ofs(loc->ref_slot));
6494         } else {
6495             tcg_debug_assert(parm->ntmp != 0);
6496             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6497                              arg_slot_stk_ofs(loc->ref_slot));
6498             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6499                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6500         }
6501         next_arg += 2;
6502         break;
6503 
6504     default:
6505         g_assert_not_reached();
6506     }
6507 
6508     if (TCG_TARGET_REG_BITS == 32) {
6509         /* Zero extend the address by loading a zero for the high part. */
6510         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6511         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6512     }
6513 
6514     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6515 }
6516 
6517 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6518 {
6519     int i, start_words, num_insns;
6520     TCGOp *op;
6521 
6522     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6523                  && qemu_log_in_addr_range(pc_start))) {
6524         FILE *logfile = qemu_log_trylock();
6525         if (logfile) {
6526             fprintf(logfile, "OP:\n");
6527             tcg_dump_ops(s, logfile, false);
6528             fprintf(logfile, "\n");
6529             qemu_log_unlock(logfile);
6530         }
6531     }
6532 
6533 #ifdef CONFIG_DEBUG_TCG
6534     /* Ensure all labels referenced have been emitted.  */
6535     {
6536         TCGLabel *l;
6537         bool error = false;
6538 
6539         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6540             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6541                 qemu_log_mask(CPU_LOG_TB_OP,
6542                               "$L%d referenced but not present.\n", l->id);
6543                 error = true;
6544             }
6545         }
6546         assert(!error);
6547     }
6548 #endif
6549 
6550     /* Do not reuse any EBB that may be allocated within the TB. */
6551     tcg_temp_ebb_reset_freed(s);
6552 
6553     tcg_optimize(s);
6554 
6555     reachable_code_pass(s);
6556     liveness_pass_0(s);
6557     liveness_pass_1(s);
6558 
6559     if (s->nb_indirects > 0) {
6560         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6561                      && qemu_log_in_addr_range(pc_start))) {
6562             FILE *logfile = qemu_log_trylock();
6563             if (logfile) {
6564                 fprintf(logfile, "OP before indirect lowering:\n");
6565                 tcg_dump_ops(s, logfile, false);
6566                 fprintf(logfile, "\n");
6567                 qemu_log_unlock(logfile);
6568             }
6569         }
6570 
6571         /* Replace indirect temps with direct temps.  */
6572         if (liveness_pass_2(s)) {
6573             /* If changes were made, re-run liveness.  */
6574             liveness_pass_1(s);
6575         }
6576     }
6577 
6578     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6579                  && qemu_log_in_addr_range(pc_start))) {
6580         FILE *logfile = qemu_log_trylock();
6581         if (logfile) {
6582             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6583             tcg_dump_ops(s, logfile, true);
6584             fprintf(logfile, "\n");
6585             qemu_log_unlock(logfile);
6586         }
6587     }
6588 
6589     /* Initialize goto_tb jump offsets. */
6590     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6591     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6592     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6593     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6594 
6595     tcg_reg_alloc_start(s);
6596 
6597     /*
6598      * Reset the buffer pointers when restarting after overflow.
6599      * TODO: Move this into translate-all.c with the rest of the
6600      * buffer management.  Having only this done here is confusing.
6601      */
6602     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6603     s->code_ptr = s->code_buf;
6604     s->data_gen_ptr = NULL;
6605 
6606     QSIMPLEQ_INIT(&s->ldst_labels);
6607     s->pool_labels = NULL;
6608 
6609     start_words = s->insn_start_words;
6610     s->gen_insn_data =
6611         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6612 
6613     tcg_out_tb_start(s);
6614 
6615     num_insns = -1;
6616     QTAILQ_FOREACH(op, &s->ops, link) {
6617         TCGOpcode opc = op->opc;
6618 
6619         switch (opc) {
6620         case INDEX_op_mov:
6621         case INDEX_op_mov_vec:
6622             tcg_reg_alloc_mov(s, op);
6623             break;
6624         case INDEX_op_dup_vec:
6625             tcg_reg_alloc_dup(s, op);
6626             break;
6627         case INDEX_op_insn_start:
6628             if (num_insns >= 0) {
6629                 size_t off = tcg_current_code_size(s);
6630                 s->gen_insn_end_off[num_insns] = off;
6631                 /* Assert that we do not overflow our stored offset.  */
6632                 assert(s->gen_insn_end_off[num_insns] == off);
6633             }
6634             num_insns++;
6635             for (i = 0; i < start_words; ++i) {
6636                 s->gen_insn_data[num_insns * start_words + i] =
6637                     tcg_get_insn_start_param(op, i);
6638             }
6639             break;
6640         case INDEX_op_discard:
6641             temp_dead(s, arg_temp(op->args[0]));
6642             break;
6643         case INDEX_op_set_label:
6644             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6645             tcg_out_label(s, arg_label(op->args[0]));
6646             break;
6647         case INDEX_op_call:
6648             tcg_reg_alloc_call(s, op);
6649             break;
6650         case INDEX_op_exit_tb:
6651             tcg_out_exit_tb(s, op->args[0]);
6652             break;
6653         case INDEX_op_goto_tb:
6654             tcg_out_goto_tb(s, op->args[0]);
6655             break;
6656         case INDEX_op_dup2_vec:
6657             if (tcg_reg_alloc_dup2(s, op)) {
6658                 break;
6659             }
6660             /* fall through */
6661         default:
6662             /* Sanity check that we've not introduced any unhandled opcodes. */
6663             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6664                                               TCGOP_FLAGS(op)));
6665             /* Note: in order to speed up the code, it would be much
6666                faster to have specialized register allocator functions for
6667                some common argument patterns */
6668             tcg_reg_alloc_op(s, op);
6669             break;
6670         }
6671         /* Test for (pending) buffer overflow.  The assumption is that any
6672            one operation beginning below the high water mark cannot overrun
6673            the buffer completely.  Thus we can test for overflow after
6674            generating code without having to check during generation.  */
6675         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6676             return -1;
6677         }
6678         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6679         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6680             return -2;
6681         }
6682     }
6683     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6684     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6685 
6686     /* Generate TB finalization at the end of block */
6687     i = tcg_out_ldst_finalize(s);
6688     if (i < 0) {
6689         return i;
6690     }
6691     i = tcg_out_pool_finalize(s);
6692     if (i < 0) {
6693         return i;
6694     }
6695     if (!tcg_resolve_relocs(s)) {
6696         return -2;
6697     }
6698 
6699 #ifndef CONFIG_TCG_INTERPRETER
6700     /* flush instruction cache */
6701     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6702                         (uintptr_t)s->code_buf,
6703                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6704 #endif
6705 
6706     return tcg_current_code_size(s);
6707 }
6708 
6709 #ifdef ELF_HOST_MACHINE
6710 /* In order to use this feature, the backend needs to do three things:
6711 
6712    (1) Define ELF_HOST_MACHINE to indicate both what value to
6713        put into the ELF image and to indicate support for the feature.
6714 
6715    (2) Define tcg_register_jit.  This should create a buffer containing
6716        the contents of a .debug_frame section that describes the post-
6717        prologue unwind info for the tcg machine.
6718 
6719    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6720 */
6721 
6722 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6723 typedef enum {
6724     JIT_NOACTION = 0,
6725     JIT_REGISTER_FN,
6726     JIT_UNREGISTER_FN
6727 } jit_actions_t;
6728 
6729 struct jit_code_entry {
6730     struct jit_code_entry *next_entry;
6731     struct jit_code_entry *prev_entry;
6732     const void *symfile_addr;
6733     uint64_t symfile_size;
6734 };
6735 
6736 struct jit_descriptor {
6737     uint32_t version;
6738     uint32_t action_flag;
6739     struct jit_code_entry *relevant_entry;
6740     struct jit_code_entry *first_entry;
6741 };
6742 
6743 void __jit_debug_register_code(void) __attribute__((noinline));
6744 void __jit_debug_register_code(void)
6745 {
6746     asm("");
6747 }
6748 
6749 /* Must statically initialize the version, because GDB may check
6750    the version before we can set it.  */
6751 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6752 
6753 /* End GDB interface.  */
6754 
6755 static int find_string(const char *strtab, const char *str)
6756 {
6757     const char *p = strtab + 1;
6758 
6759     while (1) {
6760         if (strcmp(p, str) == 0) {
6761             return p - strtab;
6762         }
6763         p += strlen(p) + 1;
6764     }
6765 }
6766 
6767 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6768                                  const void *debug_frame,
6769                                  size_t debug_frame_size)
6770 {
6771     struct __attribute__((packed)) DebugInfo {
6772         uint32_t  len;
6773         uint16_t  version;
6774         uint32_t  abbrev;
6775         uint8_t   ptr_size;
6776         uint8_t   cu_die;
6777         uint16_t  cu_lang;
6778         uintptr_t cu_low_pc;
6779         uintptr_t cu_high_pc;
6780         uint8_t   fn_die;
6781         char      fn_name[16];
6782         uintptr_t fn_low_pc;
6783         uintptr_t fn_high_pc;
6784         uint8_t   cu_eoc;
6785     };
6786 
6787     struct ElfImage {
6788         ElfW(Ehdr) ehdr;
6789         ElfW(Phdr) phdr;
6790         ElfW(Shdr) shdr[7];
6791         ElfW(Sym)  sym[2];
6792         struct DebugInfo di;
6793         uint8_t    da[24];
6794         char       str[80];
6795     };
6796 
6797     struct ElfImage *img;
6798 
6799     static const struct ElfImage img_template = {
6800         .ehdr = {
6801             .e_ident[EI_MAG0] = ELFMAG0,
6802             .e_ident[EI_MAG1] = ELFMAG1,
6803             .e_ident[EI_MAG2] = ELFMAG2,
6804             .e_ident[EI_MAG3] = ELFMAG3,
6805             .e_ident[EI_CLASS] = ELF_CLASS,
6806             .e_ident[EI_DATA] = ELF_DATA,
6807             .e_ident[EI_VERSION] = EV_CURRENT,
6808             .e_type = ET_EXEC,
6809             .e_machine = ELF_HOST_MACHINE,
6810             .e_version = EV_CURRENT,
6811             .e_phoff = offsetof(struct ElfImage, phdr),
6812             .e_shoff = offsetof(struct ElfImage, shdr),
6813             .e_ehsize = sizeof(ElfW(Shdr)),
6814             .e_phentsize = sizeof(ElfW(Phdr)),
6815             .e_phnum = 1,
6816             .e_shentsize = sizeof(ElfW(Shdr)),
6817             .e_shnum = ARRAY_SIZE(img->shdr),
6818             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6819 #ifdef ELF_HOST_FLAGS
6820             .e_flags = ELF_HOST_FLAGS,
6821 #endif
6822 #ifdef ELF_OSABI
6823             .e_ident[EI_OSABI] = ELF_OSABI,
6824 #endif
6825         },
6826         .phdr = {
6827             .p_type = PT_LOAD,
6828             .p_flags = PF_X,
6829         },
6830         .shdr = {
6831             [0] = { .sh_type = SHT_NULL },
6832             /* Trick: The contents of code_gen_buffer are not present in
6833                this fake ELF file; that got allocated elsewhere.  Therefore
6834                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6835                will not look for contents.  We can record any address.  */
6836             [1] = { /* .text */
6837                 .sh_type = SHT_NOBITS,
6838                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6839             },
6840             [2] = { /* .debug_info */
6841                 .sh_type = SHT_PROGBITS,
6842                 .sh_offset = offsetof(struct ElfImage, di),
6843                 .sh_size = sizeof(struct DebugInfo),
6844             },
6845             [3] = { /* .debug_abbrev */
6846                 .sh_type = SHT_PROGBITS,
6847                 .sh_offset = offsetof(struct ElfImage, da),
6848                 .sh_size = sizeof(img->da),
6849             },
6850             [4] = { /* .debug_frame */
6851                 .sh_type = SHT_PROGBITS,
6852                 .sh_offset = sizeof(struct ElfImage),
6853             },
6854             [5] = { /* .symtab */
6855                 .sh_type = SHT_SYMTAB,
6856                 .sh_offset = offsetof(struct ElfImage, sym),
6857                 .sh_size = sizeof(img->sym),
6858                 .sh_info = 1,
6859                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6860                 .sh_entsize = sizeof(ElfW(Sym)),
6861             },
6862             [6] = { /* .strtab */
6863                 .sh_type = SHT_STRTAB,
6864                 .sh_offset = offsetof(struct ElfImage, str),
6865                 .sh_size = sizeof(img->str),
6866             }
6867         },
6868         .sym = {
6869             [1] = { /* code_gen_buffer */
6870                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6871                 .st_shndx = 1,
6872             }
6873         },
6874         .di = {
6875             .len = sizeof(struct DebugInfo) - 4,
6876             .version = 2,
6877             .ptr_size = sizeof(void *),
6878             .cu_die = 1,
6879             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6880             .fn_die = 2,
6881             .fn_name = "code_gen_buffer"
6882         },
6883         .da = {
6884             1,          /* abbrev number (the cu) */
6885             0x11, 1,    /* DW_TAG_compile_unit, has children */
6886             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6887             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6888             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6889             0, 0,       /* end of abbrev */
6890             2,          /* abbrev number (the fn) */
6891             0x2e, 0,    /* DW_TAG_subprogram, no children */
6892             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6893             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6894             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6895             0, 0,       /* end of abbrev */
6896             0           /* no more abbrev */
6897         },
6898         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6899                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6900     };
6901 
6902     /* We only need a single jit entry; statically allocate it.  */
6903     static struct jit_code_entry one_entry;
6904 
6905     uintptr_t buf = (uintptr_t)buf_ptr;
6906     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6907     DebugFrameHeader *dfh;
6908 
6909     img = g_malloc(img_size);
6910     *img = img_template;
6911 
6912     img->phdr.p_vaddr = buf;
6913     img->phdr.p_paddr = buf;
6914     img->phdr.p_memsz = buf_size;
6915 
6916     img->shdr[1].sh_name = find_string(img->str, ".text");
6917     img->shdr[1].sh_addr = buf;
6918     img->shdr[1].sh_size = buf_size;
6919 
6920     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6921     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6922 
6923     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6924     img->shdr[4].sh_size = debug_frame_size;
6925 
6926     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6927     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6928 
6929     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6930     img->sym[1].st_value = buf;
6931     img->sym[1].st_size = buf_size;
6932 
6933     img->di.cu_low_pc = buf;
6934     img->di.cu_high_pc = buf + buf_size;
6935     img->di.fn_low_pc = buf;
6936     img->di.fn_high_pc = buf + buf_size;
6937 
6938     dfh = (DebugFrameHeader *)(img + 1);
6939     memcpy(dfh, debug_frame, debug_frame_size);
6940     dfh->fde.func_start = buf;
6941     dfh->fde.func_len = buf_size;
6942 
6943 #ifdef DEBUG_JIT
6944     /* Enable this block to be able to debug the ELF image file creation.
6945        One can use readelf, objdump, or other inspection utilities.  */
6946     {
6947         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6948         FILE *f = fopen(jit, "w+b");
6949         if (f) {
6950             if (fwrite(img, img_size, 1, f) != img_size) {
6951                 /* Avoid stupid unused return value warning for fwrite.  */
6952             }
6953             fclose(f);
6954         }
6955     }
6956 #endif
6957 
6958     one_entry.symfile_addr = img;
6959     one_entry.symfile_size = img_size;
6960 
6961     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6962     __jit_debug_descriptor.relevant_entry = &one_entry;
6963     __jit_debug_descriptor.first_entry = &one_entry;
6964     __jit_debug_register_code();
6965 }
6966 #else
6967 /* No support for the feature.  Provide the entry point expected by exec.c,
6968    and implement the internal function we declared earlier.  */
6969 
6970 static void tcg_register_jit_int(const void *buf, size_t size,
6971                                  const void *debug_frame,
6972                                  size_t debug_frame_size)
6973 {
6974 }
6975 
6976 void tcg_register_jit(const void *buf, size_t buf_size)
6977 {
6978 }
6979 #endif /* ELF_HOST_MACHINE */
6980 
6981 #if !TCG_TARGET_MAYBE_vec
6982 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6983 {
6984     g_assert_not_reached();
6985 }
6986 #endif
6987