xref: /openbmc/qemu/tcg/tcg.c (revision 3161f9f4)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "accel/tcg/perf.h"
59 #ifdef CONFIG_USER_ONLY
60 #include "exec/user/guest-base.h"
61 #endif
62 
63 /* Forward declarations for functions declared in tcg-target.c.inc and
64    used here. */
65 static void tcg_target_init(TCGContext *s);
66 static void tcg_target_qemu_prologue(TCGContext *s);
67 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
68                         intptr_t value, intptr_t addend);
69 
70 /* The CIE and FDE header definitions will be common to all hosts.  */
71 typedef struct {
72     uint32_t len __attribute__((aligned((sizeof(void *)))));
73     uint32_t id;
74     uint8_t version;
75     char augmentation[1];
76     uint8_t code_align;
77     uint8_t data_align;
78     uint8_t return_column;
79 } DebugFrameCIE;
80 
81 typedef struct QEMU_PACKED {
82     uint32_t len __attribute__((aligned((sizeof(void *)))));
83     uint32_t cie_offset;
84     uintptr_t func_start;
85     uintptr_t func_len;
86 } DebugFrameFDEHeader;
87 
88 typedef struct QEMU_PACKED {
89     DebugFrameCIE cie;
90     DebugFrameFDEHeader fde;
91 } DebugFrameHeader;
92 
93 typedef struct TCGLabelQemuLdst {
94     bool is_ld;             /* qemu_ld: true, qemu_st: false */
95     MemOpIdx oi;
96     TCGType type;           /* result type of a load */
97     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
98     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
99     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
100     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
101     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
102     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
103     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
104 } TCGLabelQemuLdst;
105 
106 static void tcg_register_jit_int(const void *buf, size_t size,
107                                  const void *debug_frame,
108                                  size_t debug_frame_size)
109     __attribute__((unused));
110 
111 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
112 static void tcg_out_tb_start(TCGContext *s);
113 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
114                        intptr_t arg2);
115 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
116 static void tcg_out_movi(TCGContext *s, TCGType type,
117                          TCGReg ret, tcg_target_long arg);
118 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
120 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
121 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
122 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
123 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
128 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
129 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
130 static void tcg_out_goto_tb(TCGContext *s, int which);
131 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
132                        const TCGArg args[TCG_MAX_OP_ARGS],
133                        const int const_args[TCG_MAX_OP_ARGS]);
134 #if TCG_TARGET_MAYBE_vec
135 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
136                             TCGReg dst, TCGReg src);
137 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
138                              TCGReg dst, TCGReg base, intptr_t offset);
139 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
140                              TCGReg dst, int64_t arg);
141 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
142                            unsigned vecl, unsigned vece,
143                            const TCGArg args[TCG_MAX_OP_ARGS],
144                            const int const_args[TCG_MAX_OP_ARGS]);
145 #else
146 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
147                                    TCGReg dst, TCGReg src)
148 {
149     g_assert_not_reached();
150 }
151 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
152                                     TCGReg dst, TCGReg base, intptr_t offset)
153 {
154     g_assert_not_reached();
155 }
156 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, int64_t arg)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
162                                   unsigned vecl, unsigned vece,
163                                   const TCGArg args[TCG_MAX_OP_ARGS],
164                                   const int const_args[TCG_MAX_OP_ARGS])
165 {
166     g_assert_not_reached();
167 }
168 #endif
169 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
170                        intptr_t arg2);
171 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
172                         TCGReg base, intptr_t ofs);
173 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
174                          const TCGHelperInfo *info);
175 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
176 static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece);
177 #ifdef TCG_TARGET_NEED_LDST_LABELS
178 static int tcg_out_ldst_finalize(TCGContext *s);
179 #endif
180 
181 #ifndef CONFIG_USER_ONLY
182 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
183 #endif
184 
185 typedef struct TCGLdstHelperParam {
186     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
187     unsigned ntmp;
188     int tmp[3];
189 } TCGLdstHelperParam;
190 
191 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
192                                    const TCGLdstHelperParam *p)
193     __attribute__((unused));
194 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
195                                   bool load_sign, const TCGLdstHelperParam *p)
196     __attribute__((unused));
197 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
198                                    const TCGLdstHelperParam *p)
199     __attribute__((unused));
200 
201 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
202     [MO_UB] = helper_ldub_mmu,
203     [MO_SB] = helper_ldsb_mmu,
204     [MO_UW] = helper_lduw_mmu,
205     [MO_SW] = helper_ldsw_mmu,
206     [MO_UL] = helper_ldul_mmu,
207     [MO_UQ] = helper_ldq_mmu,
208 #if TCG_TARGET_REG_BITS == 64
209     [MO_SL] = helper_ldsl_mmu,
210     [MO_128] = helper_ld16_mmu,
211 #endif
212 };
213 
214 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
215     [MO_8]  = helper_stb_mmu,
216     [MO_16] = helper_stw_mmu,
217     [MO_32] = helper_stl_mmu,
218     [MO_64] = helper_stq_mmu,
219 #if TCG_TARGET_REG_BITS == 64
220     [MO_128] = helper_st16_mmu,
221 #endif
222 };
223 
224 typedef struct {
225     MemOp atom;   /* lg2 bits of atomicity required */
226     MemOp align;  /* lg2 bits of alignment to use */
227 } TCGAtomAlign;
228 
229 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
230                                            MemOp host_atom, bool allow_two_ops)
231     __attribute__((unused));
232 
233 #ifdef CONFIG_USER_ONLY
234 bool tcg_use_softmmu;
235 #endif
236 
237 TCGContext tcg_init_ctx;
238 __thread TCGContext *tcg_ctx;
239 
240 TCGContext **tcg_ctxs;
241 unsigned int tcg_cur_ctxs;
242 unsigned int tcg_max_ctxs;
243 TCGv_env tcg_env;
244 const void *tcg_code_gen_epilogue;
245 uintptr_t tcg_splitwx_diff;
246 
247 #ifndef CONFIG_TCG_INTERPRETER
248 tcg_prologue_fn *tcg_qemu_tb_exec;
249 #endif
250 
251 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
252 static TCGRegSet tcg_target_call_clobber_regs;
253 
254 #if TCG_TARGET_INSN_UNIT_SIZE == 1
255 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
256 {
257     *s->code_ptr++ = v;
258 }
259 
260 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
261                                                       uint8_t v)
262 {
263     *p = v;
264 }
265 #endif
266 
267 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
268 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
269 {
270     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
271         *s->code_ptr++ = v;
272     } else {
273         tcg_insn_unit *p = s->code_ptr;
274         memcpy(p, &v, sizeof(v));
275         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
276     }
277 }
278 
279 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
280                                                        uint16_t v)
281 {
282     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
283         *p = v;
284     } else {
285         memcpy(p, &v, sizeof(v));
286     }
287 }
288 #endif
289 
290 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
291 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
292 {
293     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
294         *s->code_ptr++ = v;
295     } else {
296         tcg_insn_unit *p = s->code_ptr;
297         memcpy(p, &v, sizeof(v));
298         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
299     }
300 }
301 
302 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
303                                                        uint32_t v)
304 {
305     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
306         *p = v;
307     } else {
308         memcpy(p, &v, sizeof(v));
309     }
310 }
311 #endif
312 
313 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
314 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
315 {
316     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
317         *s->code_ptr++ = v;
318     } else {
319         tcg_insn_unit *p = s->code_ptr;
320         memcpy(p, &v, sizeof(v));
321         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
322     }
323 }
324 
325 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
326                                                        uint64_t v)
327 {
328     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
329         *p = v;
330     } else {
331         memcpy(p, &v, sizeof(v));
332     }
333 }
334 #endif
335 
336 /* label relocation processing */
337 
338 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
339                           TCGLabel *l, intptr_t addend)
340 {
341     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
342 
343     r->type = type;
344     r->ptr = code_ptr;
345     r->addend = addend;
346     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
347 }
348 
349 static void tcg_out_label(TCGContext *s, TCGLabel *l)
350 {
351     tcg_debug_assert(!l->has_value);
352     l->has_value = 1;
353     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
354 }
355 
356 TCGLabel *gen_new_label(void)
357 {
358     TCGContext *s = tcg_ctx;
359     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
360 
361     memset(l, 0, sizeof(TCGLabel));
362     l->id = s->nb_labels++;
363     QSIMPLEQ_INIT(&l->branches);
364     QSIMPLEQ_INIT(&l->relocs);
365 
366     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
367 
368     return l;
369 }
370 
371 static bool tcg_resolve_relocs(TCGContext *s)
372 {
373     TCGLabel *l;
374 
375     QSIMPLEQ_FOREACH(l, &s->labels, next) {
376         TCGRelocation *r;
377         uintptr_t value = l->u.value;
378 
379         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
380             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
381                 return false;
382             }
383         }
384     }
385     return true;
386 }
387 
388 static void set_jmp_reset_offset(TCGContext *s, int which)
389 {
390     /*
391      * We will check for overflow at the end of the opcode loop in
392      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
393      */
394     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
395 }
396 
397 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
398 {
399     /*
400      * We will check for overflow at the end of the opcode loop in
401      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
402      */
403     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
404 }
405 
406 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
407 {
408     /*
409      * Return the read-execute version of the pointer, for the benefit
410      * of any pc-relative addressing mode.
411      */
412     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
413 }
414 
415 static int __attribute__((unused))
416 tlb_mask_table_ofs(TCGContext *s, int which)
417 {
418     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
419             sizeof(CPUNegativeOffsetState));
420 }
421 
422 /* Signal overflow, starting over with fewer guest insns. */
423 static G_NORETURN
424 void tcg_raise_tb_overflow(TCGContext *s)
425 {
426     siglongjmp(s->jmp_trans, -2);
427 }
428 
429 /*
430  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
431  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
432  *
433  * However, tcg_out_helper_load_slots reuses this field to hold an
434  * argument slot number (which may designate a argument register or an
435  * argument stack slot), converting to TCGReg once all arguments that
436  * are destined for the stack are processed.
437  */
438 typedef struct TCGMovExtend {
439     unsigned dst;
440     TCGReg src;
441     TCGType dst_type;
442     TCGType src_type;
443     MemOp src_ext;
444 } TCGMovExtend;
445 
446 /**
447  * tcg_out_movext -- move and extend
448  * @s: tcg context
449  * @dst_type: integral type for destination
450  * @dst: destination register
451  * @src_type: integral type for source
452  * @src_ext: extension to apply to source
453  * @src: source register
454  *
455  * Move or extend @src into @dst, depending on @src_ext and the types.
456  */
457 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
458                            TCGType src_type, MemOp src_ext, TCGReg src)
459 {
460     switch (src_ext) {
461     case MO_UB:
462         tcg_out_ext8u(s, dst, src);
463         break;
464     case MO_SB:
465         tcg_out_ext8s(s, dst_type, dst, src);
466         break;
467     case MO_UW:
468         tcg_out_ext16u(s, dst, src);
469         break;
470     case MO_SW:
471         tcg_out_ext16s(s, dst_type, dst, src);
472         break;
473     case MO_UL:
474     case MO_SL:
475         if (dst_type == TCG_TYPE_I32) {
476             if (src_type == TCG_TYPE_I32) {
477                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
478             } else {
479                 tcg_out_extrl_i64_i32(s, dst, src);
480             }
481         } else if (src_type == TCG_TYPE_I32) {
482             if (src_ext & MO_SIGN) {
483                 tcg_out_exts_i32_i64(s, dst, src);
484             } else {
485                 tcg_out_extu_i32_i64(s, dst, src);
486             }
487         } else {
488             if (src_ext & MO_SIGN) {
489                 tcg_out_ext32s(s, dst, src);
490             } else {
491                 tcg_out_ext32u(s, dst, src);
492             }
493         }
494         break;
495     case MO_UQ:
496         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
497         if (dst_type == TCG_TYPE_I32) {
498             tcg_out_extrl_i64_i32(s, dst, src);
499         } else {
500             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
501         }
502         break;
503     default:
504         g_assert_not_reached();
505     }
506 }
507 
508 /* Minor variations on a theme, using a structure. */
509 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
510                                     TCGReg src)
511 {
512     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
513 }
514 
515 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
516 {
517     tcg_out_movext1_new_src(s, i, i->src);
518 }
519 
520 /**
521  * tcg_out_movext2 -- move and extend two pair
522  * @s: tcg context
523  * @i1: first move description
524  * @i2: second move description
525  * @scratch: temporary register, or -1 for none
526  *
527  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
528  * between the sources and destinations.
529  */
530 
531 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
532                             const TCGMovExtend *i2, int scratch)
533 {
534     TCGReg src1 = i1->src;
535     TCGReg src2 = i2->src;
536 
537     if (i1->dst != src2) {
538         tcg_out_movext1(s, i1);
539         tcg_out_movext1(s, i2);
540         return;
541     }
542     if (i2->dst == src1) {
543         TCGType src1_type = i1->src_type;
544         TCGType src2_type = i2->src_type;
545 
546         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
547             /* The data is now in the correct registers, now extend. */
548             src1 = i2->src;
549             src2 = i1->src;
550         } else {
551             tcg_debug_assert(scratch >= 0);
552             tcg_out_mov(s, src1_type, scratch, src1);
553             src1 = scratch;
554         }
555     }
556     tcg_out_movext1_new_src(s, i2, src2);
557     tcg_out_movext1_new_src(s, i1, src1);
558 }
559 
560 /**
561  * tcg_out_movext3 -- move and extend three pair
562  * @s: tcg context
563  * @i1: first move description
564  * @i2: second move description
565  * @i3: third move description
566  * @scratch: temporary register, or -1 for none
567  *
568  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
569  * between the sources and destinations.
570  */
571 
572 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
573                             const TCGMovExtend *i2, const TCGMovExtend *i3,
574                             int scratch)
575 {
576     TCGReg src1 = i1->src;
577     TCGReg src2 = i2->src;
578     TCGReg src3 = i3->src;
579 
580     if (i1->dst != src2 && i1->dst != src3) {
581         tcg_out_movext1(s, i1);
582         tcg_out_movext2(s, i2, i3, scratch);
583         return;
584     }
585     if (i2->dst != src1 && i2->dst != src3) {
586         tcg_out_movext1(s, i2);
587         tcg_out_movext2(s, i1, i3, scratch);
588         return;
589     }
590     if (i3->dst != src1 && i3->dst != src2) {
591         tcg_out_movext1(s, i3);
592         tcg_out_movext2(s, i1, i2, scratch);
593         return;
594     }
595 
596     /*
597      * There is a cycle.  Since there are only 3 nodes, the cycle is
598      * either "clockwise" or "anti-clockwise", and can be solved with
599      * a single scratch or two xchg.
600      */
601     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
602         /* "Clockwise" */
603         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
604             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
605             /* The data is now in the correct registers, now extend. */
606             tcg_out_movext1_new_src(s, i1, i1->dst);
607             tcg_out_movext1_new_src(s, i2, i2->dst);
608             tcg_out_movext1_new_src(s, i3, i3->dst);
609         } else {
610             tcg_debug_assert(scratch >= 0);
611             tcg_out_mov(s, i1->src_type, scratch, src1);
612             tcg_out_movext1(s, i3);
613             tcg_out_movext1(s, i2);
614             tcg_out_movext1_new_src(s, i1, scratch);
615         }
616     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
617         /* "Anti-clockwise" */
618         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
619             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
620             /* The data is now in the correct registers, now extend. */
621             tcg_out_movext1_new_src(s, i1, i1->dst);
622             tcg_out_movext1_new_src(s, i2, i2->dst);
623             tcg_out_movext1_new_src(s, i3, i3->dst);
624         } else {
625             tcg_debug_assert(scratch >= 0);
626             tcg_out_mov(s, i1->src_type, scratch, src1);
627             tcg_out_movext1(s, i2);
628             tcg_out_movext1(s, i3);
629             tcg_out_movext1_new_src(s, i1, scratch);
630         }
631     } else {
632         g_assert_not_reached();
633     }
634 }
635 
636 #define C_PFX1(P, A)                    P##A
637 #define C_PFX2(P, A, B)                 P##A##_##B
638 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
639 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
640 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
641 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
642 
643 /* Define an enumeration for the various combinations. */
644 
645 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
646 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
647 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
648 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
649 
650 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
651 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
652 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
653 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
654 
655 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
656 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
657 
658 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
659 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
660 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
661 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
662 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
663 
664 typedef enum {
665 #include "tcg-target-con-set.h"
666 } TCGConstraintSetIndex;
667 
668 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
669 
670 #undef C_O0_I1
671 #undef C_O0_I2
672 #undef C_O0_I3
673 #undef C_O0_I4
674 #undef C_O1_I1
675 #undef C_O1_I2
676 #undef C_O1_I3
677 #undef C_O1_I4
678 #undef C_N1_I2
679 #undef C_N2_I1
680 #undef C_O2_I1
681 #undef C_O2_I2
682 #undef C_O2_I3
683 #undef C_O2_I4
684 #undef C_N1_O1_I4
685 
686 /* Put all of the constraint sets into an array, indexed by the enum. */
687 
688 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
689 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
690 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
691 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
692 
693 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
694 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
695 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
696 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
697 
698 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
699 #define C_N2_I1(O1, O2, I1)             { .args_ct_str = { "&" #O1, "&" #O2, #I1 } },
700 
701 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
702 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
703 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
704 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
705 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
706 
707 static const TCGTargetOpDef constraint_sets[] = {
708 #include "tcg-target-con-set.h"
709 };
710 
711 
712 #undef C_O0_I1
713 #undef C_O0_I2
714 #undef C_O0_I3
715 #undef C_O0_I4
716 #undef C_O1_I1
717 #undef C_O1_I2
718 #undef C_O1_I3
719 #undef C_O1_I4
720 #undef C_N1_I2
721 #undef C_N2_I1
722 #undef C_O2_I1
723 #undef C_O2_I2
724 #undef C_O2_I3
725 #undef C_O2_I4
726 #undef C_N1_O1_I4
727 
728 /* Expand the enumerator to be returned from tcg_target_op_def(). */
729 
730 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
731 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
732 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
733 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
734 
735 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
736 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
737 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
738 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
739 
740 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
741 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
742 
743 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
744 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
745 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
746 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
747 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
748 
749 #include "tcg-target.c.inc"
750 
751 #ifndef CONFIG_TCG_INTERPRETER
752 /* Validate CPUTLBDescFast placement. */
753 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
754                         sizeof(CPUNegativeOffsetState))
755                   < MIN_TLB_MASK_TABLE_OFS);
756 #endif
757 
758 static void alloc_tcg_plugin_context(TCGContext *s)
759 {
760 #ifdef CONFIG_PLUGIN
761     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
762     s->plugin_tb->insns =
763         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
764 #endif
765 }
766 
767 /*
768  * All TCG threads except the parent (i.e. the one that called tcg_context_init
769  * and registered the target's TCG globals) must register with this function
770  * before initiating translation.
771  *
772  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
773  * of tcg_region_init() for the reasoning behind this.
774  *
775  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
776  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
777  * is not used anymore for translation once this function is called.
778  *
779  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
780  * iterates over the array (e.g. tcg_code_size() the same for both system/user
781  * modes.
782  */
783 #ifdef CONFIG_USER_ONLY
784 void tcg_register_thread(void)
785 {
786     tcg_ctx = &tcg_init_ctx;
787 }
788 #else
789 void tcg_register_thread(void)
790 {
791     TCGContext *s = g_malloc(sizeof(*s));
792     unsigned int i, n;
793 
794     *s = tcg_init_ctx;
795 
796     /* Relink mem_base.  */
797     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
798         if (tcg_init_ctx.temps[i].mem_base) {
799             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
800             tcg_debug_assert(b >= 0 && b < n);
801             s->temps[i].mem_base = &s->temps[b];
802         }
803     }
804 
805     /* Claim an entry in tcg_ctxs */
806     n = qatomic_fetch_inc(&tcg_cur_ctxs);
807     g_assert(n < tcg_max_ctxs);
808     qatomic_set(&tcg_ctxs[n], s);
809 
810     if (n > 0) {
811         alloc_tcg_plugin_context(s);
812         tcg_region_initial_alloc(s);
813     }
814 
815     tcg_ctx = s;
816 }
817 #endif /* !CONFIG_USER_ONLY */
818 
819 /* pool based memory allocation */
820 void *tcg_malloc_internal(TCGContext *s, int size)
821 {
822     TCGPool *p;
823     int pool_size;
824 
825     if (size > TCG_POOL_CHUNK_SIZE) {
826         /* big malloc: insert a new pool (XXX: could optimize) */
827         p = g_malloc(sizeof(TCGPool) + size);
828         p->size = size;
829         p->next = s->pool_first_large;
830         s->pool_first_large = p;
831         return p->data;
832     } else {
833         p = s->pool_current;
834         if (!p) {
835             p = s->pool_first;
836             if (!p)
837                 goto new_pool;
838         } else {
839             if (!p->next) {
840             new_pool:
841                 pool_size = TCG_POOL_CHUNK_SIZE;
842                 p = g_malloc(sizeof(TCGPool) + pool_size);
843                 p->size = pool_size;
844                 p->next = NULL;
845                 if (s->pool_current) {
846                     s->pool_current->next = p;
847                 } else {
848                     s->pool_first = p;
849                 }
850             } else {
851                 p = p->next;
852             }
853         }
854     }
855     s->pool_current = p;
856     s->pool_cur = p->data + size;
857     s->pool_end = p->data + p->size;
858     return p->data;
859 }
860 
861 void tcg_pool_reset(TCGContext *s)
862 {
863     TCGPool *p, *t;
864     for (p = s->pool_first_large; p; p = t) {
865         t = p->next;
866         g_free(p);
867     }
868     s->pool_first_large = NULL;
869     s->pool_cur = s->pool_end = NULL;
870     s->pool_current = NULL;
871 }
872 
873 /*
874  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
875  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
876  * We only use these for layout in tcg_out_ld_helper_ret and
877  * tcg_out_st_helper_args, and share them between several of
878  * the helpers, with the end result that it's easier to build manually.
879  */
880 
881 #if TCG_TARGET_REG_BITS == 32
882 # define dh_typecode_ttl  dh_typecode_i32
883 #else
884 # define dh_typecode_ttl  dh_typecode_i64
885 #endif
886 
887 static TCGHelperInfo info_helper_ld32_mmu = {
888     .flags = TCG_CALL_NO_WG,
889     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
890               | dh_typemask(env, 1)
891               | dh_typemask(i64, 2)  /* uint64_t addr */
892               | dh_typemask(i32, 3)  /* unsigned oi */
893               | dh_typemask(ptr, 4)  /* uintptr_t ra */
894 };
895 
896 static TCGHelperInfo info_helper_ld64_mmu = {
897     .flags = TCG_CALL_NO_WG,
898     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
899               | dh_typemask(env, 1)
900               | dh_typemask(i64, 2)  /* uint64_t addr */
901               | dh_typemask(i32, 3)  /* unsigned oi */
902               | dh_typemask(ptr, 4)  /* uintptr_t ra */
903 };
904 
905 static TCGHelperInfo info_helper_ld128_mmu = {
906     .flags = TCG_CALL_NO_WG,
907     .typemask = dh_typemask(i128, 0) /* return Int128 */
908               | dh_typemask(env, 1)
909               | dh_typemask(i64, 2)  /* uint64_t addr */
910               | dh_typemask(i32, 3)  /* unsigned oi */
911               | dh_typemask(ptr, 4)  /* uintptr_t ra */
912 };
913 
914 static TCGHelperInfo info_helper_st32_mmu = {
915     .flags = TCG_CALL_NO_WG,
916     .typemask = dh_typemask(void, 0)
917               | dh_typemask(env, 1)
918               | dh_typemask(i64, 2)  /* uint64_t addr */
919               | dh_typemask(i32, 3)  /* uint32_t data */
920               | dh_typemask(i32, 4)  /* unsigned oi */
921               | dh_typemask(ptr, 5)  /* uintptr_t ra */
922 };
923 
924 static TCGHelperInfo info_helper_st64_mmu = {
925     .flags = TCG_CALL_NO_WG,
926     .typemask = dh_typemask(void, 0)
927               | dh_typemask(env, 1)
928               | dh_typemask(i64, 2)  /* uint64_t addr */
929               | dh_typemask(i64, 3)  /* uint64_t data */
930               | dh_typemask(i32, 4)  /* unsigned oi */
931               | dh_typemask(ptr, 5)  /* uintptr_t ra */
932 };
933 
934 static TCGHelperInfo info_helper_st128_mmu = {
935     .flags = TCG_CALL_NO_WG,
936     .typemask = dh_typemask(void, 0)
937               | dh_typemask(env, 1)
938               | dh_typemask(i64, 2)  /* uint64_t addr */
939               | dh_typemask(i128, 3) /* Int128 data */
940               | dh_typemask(i32, 4)  /* unsigned oi */
941               | dh_typemask(ptr, 5)  /* uintptr_t ra */
942 };
943 
944 #ifdef CONFIG_TCG_INTERPRETER
945 static ffi_type *typecode_to_ffi(int argmask)
946 {
947     /*
948      * libffi does not support __int128_t, so we have forced Int128
949      * to use the structure definition instead of the builtin type.
950      */
951     static ffi_type *ffi_type_i128_elements[3] = {
952         &ffi_type_uint64,
953         &ffi_type_uint64,
954         NULL
955     };
956     static ffi_type ffi_type_i128 = {
957         .size = 16,
958         .alignment = __alignof__(Int128),
959         .type = FFI_TYPE_STRUCT,
960         .elements = ffi_type_i128_elements,
961     };
962 
963     switch (argmask) {
964     case dh_typecode_void:
965         return &ffi_type_void;
966     case dh_typecode_i32:
967         return &ffi_type_uint32;
968     case dh_typecode_s32:
969         return &ffi_type_sint32;
970     case dh_typecode_i64:
971         return &ffi_type_uint64;
972     case dh_typecode_s64:
973         return &ffi_type_sint64;
974     case dh_typecode_ptr:
975         return &ffi_type_pointer;
976     case dh_typecode_i128:
977         return &ffi_type_i128;
978     }
979     g_assert_not_reached();
980 }
981 
982 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
983 {
984     unsigned typemask = info->typemask;
985     struct {
986         ffi_cif cif;
987         ffi_type *args[];
988     } *ca;
989     ffi_status status;
990     int nargs;
991 
992     /* Ignoring the return type, find the last non-zero field. */
993     nargs = 32 - clz32(typemask >> 3);
994     nargs = DIV_ROUND_UP(nargs, 3);
995     assert(nargs <= MAX_CALL_IARGS);
996 
997     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
998     ca->cif.rtype = typecode_to_ffi(typemask & 7);
999     ca->cif.nargs = nargs;
1000 
1001     if (nargs != 0) {
1002         ca->cif.arg_types = ca->args;
1003         for (int j = 0; j < nargs; ++j) {
1004             int typecode = extract32(typemask, (j + 1) * 3, 3);
1005             ca->args[j] = typecode_to_ffi(typecode);
1006         }
1007     }
1008 
1009     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1010                           ca->cif.rtype, ca->cif.arg_types);
1011     assert(status == FFI_OK);
1012 
1013     return &ca->cif;
1014 }
1015 
1016 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1017 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1018 #else
1019 #define HELPER_INFO_INIT(I)      (&(I)->init)
1020 #define HELPER_INFO_INIT_VAL(I)  1
1021 #endif /* CONFIG_TCG_INTERPRETER */
1022 
1023 static inline bool arg_slot_reg_p(unsigned arg_slot)
1024 {
1025     /*
1026      * Split the sizeof away from the comparison to avoid Werror from
1027      * "unsigned < 0 is always false", when iarg_regs is empty.
1028      */
1029     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1030     return arg_slot < nreg;
1031 }
1032 
1033 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1034 {
1035     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1036     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1037 
1038     tcg_debug_assert(stk_slot < max);
1039     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1040 }
1041 
1042 typedef struct TCGCumulativeArgs {
1043     int arg_idx;                /* tcg_gen_callN args[] */
1044     int info_in_idx;            /* TCGHelperInfo in[] */
1045     int arg_slot;               /* regs+stack slot */
1046     int ref_slot;               /* stack slots for references */
1047 } TCGCumulativeArgs;
1048 
1049 static void layout_arg_even(TCGCumulativeArgs *cum)
1050 {
1051     cum->arg_slot += cum->arg_slot & 1;
1052 }
1053 
1054 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1055                          TCGCallArgumentKind kind)
1056 {
1057     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1058 
1059     *loc = (TCGCallArgumentLoc){
1060         .kind = kind,
1061         .arg_idx = cum->arg_idx,
1062         .arg_slot = cum->arg_slot,
1063     };
1064     cum->info_in_idx++;
1065     cum->arg_slot++;
1066 }
1067 
1068 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1069                                 TCGHelperInfo *info, int n)
1070 {
1071     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1072 
1073     for (int i = 0; i < n; ++i) {
1074         /* Layout all using the same arg_idx, adjusting the subindex. */
1075         loc[i] = (TCGCallArgumentLoc){
1076             .kind = TCG_CALL_ARG_NORMAL,
1077             .arg_idx = cum->arg_idx,
1078             .tmp_subindex = i,
1079             .arg_slot = cum->arg_slot + i,
1080         };
1081     }
1082     cum->info_in_idx += n;
1083     cum->arg_slot += n;
1084 }
1085 
1086 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1087 {
1088     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1089     int n = 128 / TCG_TARGET_REG_BITS;
1090 
1091     /* The first subindex carries the pointer. */
1092     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1093 
1094     /*
1095      * The callee is allowed to clobber memory associated with
1096      * structure pass by-reference.  Therefore we must make copies.
1097      * Allocate space from "ref_slot", which will be adjusted to
1098      * follow the parameters on the stack.
1099      */
1100     loc[0].ref_slot = cum->ref_slot;
1101 
1102     /*
1103      * Subsequent words also go into the reference slot, but
1104      * do not accumulate into the regular arguments.
1105      */
1106     for (int i = 1; i < n; ++i) {
1107         loc[i] = (TCGCallArgumentLoc){
1108             .kind = TCG_CALL_ARG_BY_REF_N,
1109             .arg_idx = cum->arg_idx,
1110             .tmp_subindex = i,
1111             .ref_slot = cum->ref_slot + i,
1112         };
1113     }
1114     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1115     cum->ref_slot += n;
1116 }
1117 
1118 static void init_call_layout(TCGHelperInfo *info)
1119 {
1120     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1121     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1122     unsigned typemask = info->typemask;
1123     unsigned typecode;
1124     TCGCumulativeArgs cum = { };
1125 
1126     /*
1127      * Parse and place any function return value.
1128      */
1129     typecode = typemask & 7;
1130     switch (typecode) {
1131     case dh_typecode_void:
1132         info->nr_out = 0;
1133         break;
1134     case dh_typecode_i32:
1135     case dh_typecode_s32:
1136     case dh_typecode_ptr:
1137         info->nr_out = 1;
1138         info->out_kind = TCG_CALL_RET_NORMAL;
1139         break;
1140     case dh_typecode_i64:
1141     case dh_typecode_s64:
1142         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1143         info->out_kind = TCG_CALL_RET_NORMAL;
1144         /* Query the last register now to trigger any assert early. */
1145         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1146         break;
1147     case dh_typecode_i128:
1148         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1149         info->out_kind = TCG_TARGET_CALL_RET_I128;
1150         switch (TCG_TARGET_CALL_RET_I128) {
1151         case TCG_CALL_RET_NORMAL:
1152             /* Query the last register now to trigger any assert early. */
1153             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1154             break;
1155         case TCG_CALL_RET_BY_VEC:
1156             /* Query the single register now to trigger any assert early. */
1157             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1158             break;
1159         case TCG_CALL_RET_BY_REF:
1160             /*
1161              * Allocate the first argument to the output.
1162              * We don't need to store this anywhere, just make it
1163              * unavailable for use in the input loop below.
1164              */
1165             cum.arg_slot = 1;
1166             break;
1167         default:
1168             qemu_build_not_reached();
1169         }
1170         break;
1171     default:
1172         g_assert_not_reached();
1173     }
1174 
1175     /*
1176      * Parse and place function arguments.
1177      */
1178     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1179         TCGCallArgumentKind kind;
1180         TCGType type;
1181 
1182         typecode = typemask & 7;
1183         switch (typecode) {
1184         case dh_typecode_i32:
1185         case dh_typecode_s32:
1186             type = TCG_TYPE_I32;
1187             break;
1188         case dh_typecode_i64:
1189         case dh_typecode_s64:
1190             type = TCG_TYPE_I64;
1191             break;
1192         case dh_typecode_ptr:
1193             type = TCG_TYPE_PTR;
1194             break;
1195         case dh_typecode_i128:
1196             type = TCG_TYPE_I128;
1197             break;
1198         default:
1199             g_assert_not_reached();
1200         }
1201 
1202         switch (type) {
1203         case TCG_TYPE_I32:
1204             switch (TCG_TARGET_CALL_ARG_I32) {
1205             case TCG_CALL_ARG_EVEN:
1206                 layout_arg_even(&cum);
1207                 /* fall through */
1208             case TCG_CALL_ARG_NORMAL:
1209                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1210                 break;
1211             case TCG_CALL_ARG_EXTEND:
1212                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1213                 layout_arg_1(&cum, info, kind);
1214                 break;
1215             default:
1216                 qemu_build_not_reached();
1217             }
1218             break;
1219 
1220         case TCG_TYPE_I64:
1221             switch (TCG_TARGET_CALL_ARG_I64) {
1222             case TCG_CALL_ARG_EVEN:
1223                 layout_arg_even(&cum);
1224                 /* fall through */
1225             case TCG_CALL_ARG_NORMAL:
1226                 if (TCG_TARGET_REG_BITS == 32) {
1227                     layout_arg_normal_n(&cum, info, 2);
1228                 } else {
1229                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1230                 }
1231                 break;
1232             default:
1233                 qemu_build_not_reached();
1234             }
1235             break;
1236 
1237         case TCG_TYPE_I128:
1238             switch (TCG_TARGET_CALL_ARG_I128) {
1239             case TCG_CALL_ARG_EVEN:
1240                 layout_arg_even(&cum);
1241                 /* fall through */
1242             case TCG_CALL_ARG_NORMAL:
1243                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1244                 break;
1245             case TCG_CALL_ARG_BY_REF:
1246                 layout_arg_by_ref(&cum, info);
1247                 break;
1248             default:
1249                 qemu_build_not_reached();
1250             }
1251             break;
1252 
1253         default:
1254             g_assert_not_reached();
1255         }
1256     }
1257     info->nr_in = cum.info_in_idx;
1258 
1259     /* Validate that we didn't overrun the input array. */
1260     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1261     /* Validate the backend has enough argument space. */
1262     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1263 
1264     /*
1265      * Relocate the "ref_slot" area to the end of the parameters.
1266      * Minimizing this stack offset helps code size for x86,
1267      * which has a signed 8-bit offset encoding.
1268      */
1269     if (cum.ref_slot != 0) {
1270         int ref_base = 0;
1271 
1272         if (cum.arg_slot > max_reg_slots) {
1273             int align = __alignof(Int128) / sizeof(tcg_target_long);
1274 
1275             ref_base = cum.arg_slot - max_reg_slots;
1276             if (align > 1) {
1277                 ref_base = ROUND_UP(ref_base, align);
1278             }
1279         }
1280         assert(ref_base + cum.ref_slot <= max_stk_slots);
1281         ref_base += max_reg_slots;
1282 
1283         if (ref_base != 0) {
1284             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1285                 TCGCallArgumentLoc *loc = &info->in[i];
1286                 switch (loc->kind) {
1287                 case TCG_CALL_ARG_BY_REF:
1288                 case TCG_CALL_ARG_BY_REF_N:
1289                     loc->ref_slot += ref_base;
1290                     break;
1291                 default:
1292                     break;
1293                 }
1294             }
1295         }
1296     }
1297 }
1298 
1299 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1300 static void process_op_defs(TCGContext *s);
1301 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1302                                             TCGReg reg, const char *name);
1303 
1304 static void tcg_context_init(unsigned max_cpus)
1305 {
1306     TCGContext *s = &tcg_init_ctx;
1307     int op, total_args, n, i;
1308     TCGOpDef *def;
1309     TCGArgConstraint *args_ct;
1310     TCGTemp *ts;
1311 
1312     memset(s, 0, sizeof(*s));
1313     s->nb_globals = 0;
1314 
1315     /* Count total number of arguments and allocate the corresponding
1316        space */
1317     total_args = 0;
1318     for(op = 0; op < NB_OPS; op++) {
1319         def = &tcg_op_defs[op];
1320         n = def->nb_iargs + def->nb_oargs;
1321         total_args += n;
1322     }
1323 
1324     args_ct = g_new0(TCGArgConstraint, total_args);
1325 
1326     for(op = 0; op < NB_OPS; op++) {
1327         def = &tcg_op_defs[op];
1328         def->args_ct = args_ct;
1329         n = def->nb_iargs + def->nb_oargs;
1330         args_ct += n;
1331     }
1332 
1333     init_call_layout(&info_helper_ld32_mmu);
1334     init_call_layout(&info_helper_ld64_mmu);
1335     init_call_layout(&info_helper_ld128_mmu);
1336     init_call_layout(&info_helper_st32_mmu);
1337     init_call_layout(&info_helper_st64_mmu);
1338     init_call_layout(&info_helper_st128_mmu);
1339 
1340     tcg_target_init(s);
1341     process_op_defs(s);
1342 
1343     /* Reverse the order of the saved registers, assuming they're all at
1344        the start of tcg_target_reg_alloc_order.  */
1345     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1346         int r = tcg_target_reg_alloc_order[n];
1347         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1348             break;
1349         }
1350     }
1351     for (i = 0; i < n; ++i) {
1352         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1353     }
1354     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1355         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1356     }
1357 
1358     alloc_tcg_plugin_context(s);
1359 
1360     tcg_ctx = s;
1361     /*
1362      * In user-mode we simply share the init context among threads, since we
1363      * use a single region. See the documentation tcg_region_init() for the
1364      * reasoning behind this.
1365      * In system-mode we will have at most max_cpus TCG threads.
1366      */
1367 #ifdef CONFIG_USER_ONLY
1368     tcg_ctxs = &tcg_ctx;
1369     tcg_cur_ctxs = 1;
1370     tcg_max_ctxs = 1;
1371 #else
1372     tcg_max_ctxs = max_cpus;
1373     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1374 #endif
1375 
1376     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1377     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1378     tcg_env = temp_tcgv_ptr(ts);
1379 }
1380 
1381 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1382 {
1383     tcg_context_init(max_cpus);
1384     tcg_region_init(tb_size, splitwx, max_cpus);
1385 }
1386 
1387 /*
1388  * Allocate TBs right before their corresponding translated code, making
1389  * sure that TBs and code are on different cache lines.
1390  */
1391 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1392 {
1393     uintptr_t align = qemu_icache_linesize;
1394     TranslationBlock *tb;
1395     void *next;
1396 
1397  retry:
1398     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1399     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1400 
1401     if (unlikely(next > s->code_gen_highwater)) {
1402         if (tcg_region_alloc(s)) {
1403             return NULL;
1404         }
1405         goto retry;
1406     }
1407     qatomic_set(&s->code_gen_ptr, next);
1408     s->data_gen_ptr = NULL;
1409     return tb;
1410 }
1411 
1412 void tcg_prologue_init(void)
1413 {
1414     TCGContext *s = tcg_ctx;
1415     size_t prologue_size;
1416 
1417     s->code_ptr = s->code_gen_ptr;
1418     s->code_buf = s->code_gen_ptr;
1419     s->data_gen_ptr = NULL;
1420 
1421 #ifndef CONFIG_TCG_INTERPRETER
1422     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1423 #endif
1424 
1425 #ifdef TCG_TARGET_NEED_POOL_LABELS
1426     s->pool_labels = NULL;
1427 #endif
1428 
1429     qemu_thread_jit_write();
1430     /* Generate the prologue.  */
1431     tcg_target_qemu_prologue(s);
1432 
1433 #ifdef TCG_TARGET_NEED_POOL_LABELS
1434     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1435     {
1436         int result = tcg_out_pool_finalize(s);
1437         tcg_debug_assert(result == 0);
1438     }
1439 #endif
1440 
1441     prologue_size = tcg_current_code_size(s);
1442     perf_report_prologue(s->code_gen_ptr, prologue_size);
1443 
1444 #ifndef CONFIG_TCG_INTERPRETER
1445     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1446                         (uintptr_t)s->code_buf, prologue_size);
1447 #endif
1448 
1449     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1450         FILE *logfile = qemu_log_trylock();
1451         if (logfile) {
1452             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1453             if (s->data_gen_ptr) {
1454                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1455                 size_t data_size = prologue_size - code_size;
1456                 size_t i;
1457 
1458                 disas(logfile, s->code_gen_ptr, code_size);
1459 
1460                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1461                     if (sizeof(tcg_target_ulong) == 8) {
1462                         fprintf(logfile,
1463                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1464                                 (uintptr_t)s->data_gen_ptr + i,
1465                                 *(uint64_t *)(s->data_gen_ptr + i));
1466                     } else {
1467                         fprintf(logfile,
1468                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1469                                 (uintptr_t)s->data_gen_ptr + i,
1470                                 *(uint32_t *)(s->data_gen_ptr + i));
1471                     }
1472                 }
1473             } else {
1474                 disas(logfile, s->code_gen_ptr, prologue_size);
1475             }
1476             fprintf(logfile, "\n");
1477             qemu_log_unlock(logfile);
1478         }
1479     }
1480 
1481 #ifndef CONFIG_TCG_INTERPRETER
1482     /*
1483      * Assert that goto_ptr is implemented completely, setting an epilogue.
1484      * For tci, we use NULL as the signal to return from the interpreter,
1485      * so skip this check.
1486      */
1487     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1488 #endif
1489 
1490     tcg_region_prologue_set(s);
1491 }
1492 
1493 void tcg_func_start(TCGContext *s)
1494 {
1495     tcg_pool_reset(s);
1496     s->nb_temps = s->nb_globals;
1497 
1498     /* No temps have been previously allocated for size or locality.  */
1499     memset(s->free_temps, 0, sizeof(s->free_temps));
1500 
1501     /* No constant temps have been previously allocated. */
1502     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1503         if (s->const_table[i]) {
1504             g_hash_table_remove_all(s->const_table[i]);
1505         }
1506     }
1507 
1508     s->nb_ops = 0;
1509     s->nb_labels = 0;
1510     s->current_frame_offset = s->frame_start;
1511 
1512 #ifdef CONFIG_DEBUG_TCG
1513     s->goto_tb_issue_mask = 0;
1514 #endif
1515 
1516     QTAILQ_INIT(&s->ops);
1517     QTAILQ_INIT(&s->free_ops);
1518     QSIMPLEQ_INIT(&s->labels);
1519 
1520     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1521                      s->addr_type == TCG_TYPE_I64);
1522 
1523     tcg_debug_assert(s->insn_start_words > 0);
1524 }
1525 
1526 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1527 {
1528     int n = s->nb_temps++;
1529 
1530     if (n >= TCG_MAX_TEMPS) {
1531         tcg_raise_tb_overflow(s);
1532     }
1533     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1534 }
1535 
1536 static TCGTemp *tcg_global_alloc(TCGContext *s)
1537 {
1538     TCGTemp *ts;
1539 
1540     tcg_debug_assert(s->nb_globals == s->nb_temps);
1541     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1542     s->nb_globals++;
1543     ts = tcg_temp_alloc(s);
1544     ts->kind = TEMP_GLOBAL;
1545 
1546     return ts;
1547 }
1548 
1549 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1550                                             TCGReg reg, const char *name)
1551 {
1552     TCGTemp *ts;
1553 
1554     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1555 
1556     ts = tcg_global_alloc(s);
1557     ts->base_type = type;
1558     ts->type = type;
1559     ts->kind = TEMP_FIXED;
1560     ts->reg = reg;
1561     ts->name = name;
1562     tcg_regset_set_reg(s->reserved_regs, reg);
1563 
1564     return ts;
1565 }
1566 
1567 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1568 {
1569     s->frame_start = start;
1570     s->frame_end = start + size;
1571     s->frame_temp
1572         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1573 }
1574 
1575 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1576                                             const char *name, TCGType type)
1577 {
1578     TCGContext *s = tcg_ctx;
1579     TCGTemp *base_ts = tcgv_ptr_temp(base);
1580     TCGTemp *ts = tcg_global_alloc(s);
1581     int indirect_reg = 0;
1582 
1583     switch (base_ts->kind) {
1584     case TEMP_FIXED:
1585         break;
1586     case TEMP_GLOBAL:
1587         /* We do not support double-indirect registers.  */
1588         tcg_debug_assert(!base_ts->indirect_reg);
1589         base_ts->indirect_base = 1;
1590         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1591                             ? 2 : 1);
1592         indirect_reg = 1;
1593         break;
1594     default:
1595         g_assert_not_reached();
1596     }
1597 
1598     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1599         TCGTemp *ts2 = tcg_global_alloc(s);
1600         char buf[64];
1601 
1602         ts->base_type = TCG_TYPE_I64;
1603         ts->type = TCG_TYPE_I32;
1604         ts->indirect_reg = indirect_reg;
1605         ts->mem_allocated = 1;
1606         ts->mem_base = base_ts;
1607         ts->mem_offset = offset;
1608         pstrcpy(buf, sizeof(buf), name);
1609         pstrcat(buf, sizeof(buf), "_0");
1610         ts->name = strdup(buf);
1611 
1612         tcg_debug_assert(ts2 == ts + 1);
1613         ts2->base_type = TCG_TYPE_I64;
1614         ts2->type = TCG_TYPE_I32;
1615         ts2->indirect_reg = indirect_reg;
1616         ts2->mem_allocated = 1;
1617         ts2->mem_base = base_ts;
1618         ts2->mem_offset = offset + 4;
1619         ts2->temp_subindex = 1;
1620         pstrcpy(buf, sizeof(buf), name);
1621         pstrcat(buf, sizeof(buf), "_1");
1622         ts2->name = strdup(buf);
1623     } else {
1624         ts->base_type = type;
1625         ts->type = type;
1626         ts->indirect_reg = indirect_reg;
1627         ts->mem_allocated = 1;
1628         ts->mem_base = base_ts;
1629         ts->mem_offset = offset;
1630         ts->name = name;
1631     }
1632     return ts;
1633 }
1634 
1635 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1636 {
1637     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1638     return temp_tcgv_i32(ts);
1639 }
1640 
1641 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1642 {
1643     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1644     return temp_tcgv_i64(ts);
1645 }
1646 
1647 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1648 {
1649     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1650     return temp_tcgv_ptr(ts);
1651 }
1652 
1653 static TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1654 {
1655     TCGContext *s = tcg_ctx;
1656     TCGTemp *ts;
1657     int n;
1658 
1659     if (kind == TEMP_EBB) {
1660         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1661 
1662         if (idx < TCG_MAX_TEMPS) {
1663             /* There is already an available temp with the right type.  */
1664             clear_bit(idx, s->free_temps[type].l);
1665 
1666             ts = &s->temps[idx];
1667             ts->temp_allocated = 1;
1668             tcg_debug_assert(ts->base_type == type);
1669             tcg_debug_assert(ts->kind == kind);
1670             return ts;
1671         }
1672     } else {
1673         tcg_debug_assert(kind == TEMP_TB);
1674     }
1675 
1676     switch (type) {
1677     case TCG_TYPE_I32:
1678     case TCG_TYPE_V64:
1679     case TCG_TYPE_V128:
1680     case TCG_TYPE_V256:
1681         n = 1;
1682         break;
1683     case TCG_TYPE_I64:
1684         n = 64 / TCG_TARGET_REG_BITS;
1685         break;
1686     case TCG_TYPE_I128:
1687         n = 128 / TCG_TARGET_REG_BITS;
1688         break;
1689     default:
1690         g_assert_not_reached();
1691     }
1692 
1693     ts = tcg_temp_alloc(s);
1694     ts->base_type = type;
1695     ts->temp_allocated = 1;
1696     ts->kind = kind;
1697 
1698     if (n == 1) {
1699         ts->type = type;
1700     } else {
1701         ts->type = TCG_TYPE_REG;
1702 
1703         for (int i = 1; i < n; ++i) {
1704             TCGTemp *ts2 = tcg_temp_alloc(s);
1705 
1706             tcg_debug_assert(ts2 == ts + i);
1707             ts2->base_type = type;
1708             ts2->type = TCG_TYPE_REG;
1709             ts2->temp_allocated = 1;
1710             ts2->temp_subindex = i;
1711             ts2->kind = kind;
1712         }
1713     }
1714     return ts;
1715 }
1716 
1717 TCGv_i32 tcg_temp_new_i32(void)
1718 {
1719     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1720 }
1721 
1722 TCGv_i32 tcg_temp_ebb_new_i32(void)
1723 {
1724     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1725 }
1726 
1727 TCGv_i64 tcg_temp_new_i64(void)
1728 {
1729     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1730 }
1731 
1732 TCGv_i64 tcg_temp_ebb_new_i64(void)
1733 {
1734     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1735 }
1736 
1737 TCGv_ptr tcg_temp_new_ptr(void)
1738 {
1739     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1740 }
1741 
1742 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1743 {
1744     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1745 }
1746 
1747 TCGv_i128 tcg_temp_new_i128(void)
1748 {
1749     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1750 }
1751 
1752 TCGv_i128 tcg_temp_ebb_new_i128(void)
1753 {
1754     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1755 }
1756 
1757 TCGv_vec tcg_temp_new_vec(TCGType type)
1758 {
1759     TCGTemp *t;
1760 
1761 #ifdef CONFIG_DEBUG_TCG
1762     switch (type) {
1763     case TCG_TYPE_V64:
1764         assert(TCG_TARGET_HAS_v64);
1765         break;
1766     case TCG_TYPE_V128:
1767         assert(TCG_TARGET_HAS_v128);
1768         break;
1769     case TCG_TYPE_V256:
1770         assert(TCG_TARGET_HAS_v256);
1771         break;
1772     default:
1773         g_assert_not_reached();
1774     }
1775 #endif
1776 
1777     t = tcg_temp_new_internal(type, TEMP_EBB);
1778     return temp_tcgv_vec(t);
1779 }
1780 
1781 /* Create a new temp of the same type as an existing temp.  */
1782 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1783 {
1784     TCGTemp *t = tcgv_vec_temp(match);
1785 
1786     tcg_debug_assert(t->temp_allocated != 0);
1787 
1788     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1789     return temp_tcgv_vec(t);
1790 }
1791 
1792 void tcg_temp_free_internal(TCGTemp *ts)
1793 {
1794     TCGContext *s = tcg_ctx;
1795 
1796     switch (ts->kind) {
1797     case TEMP_CONST:
1798     case TEMP_TB:
1799         /* Silently ignore free. */
1800         break;
1801     case TEMP_EBB:
1802         tcg_debug_assert(ts->temp_allocated != 0);
1803         ts->temp_allocated = 0;
1804         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1805         break;
1806     default:
1807         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1808         g_assert_not_reached();
1809     }
1810 }
1811 
1812 void tcg_temp_free_i32(TCGv_i32 arg)
1813 {
1814     tcg_temp_free_internal(tcgv_i32_temp(arg));
1815 }
1816 
1817 void tcg_temp_free_i64(TCGv_i64 arg)
1818 {
1819     tcg_temp_free_internal(tcgv_i64_temp(arg));
1820 }
1821 
1822 void tcg_temp_free_i128(TCGv_i128 arg)
1823 {
1824     tcg_temp_free_internal(tcgv_i128_temp(arg));
1825 }
1826 
1827 void tcg_temp_free_ptr(TCGv_ptr arg)
1828 {
1829     tcg_temp_free_internal(tcgv_ptr_temp(arg));
1830 }
1831 
1832 void tcg_temp_free_vec(TCGv_vec arg)
1833 {
1834     tcg_temp_free_internal(tcgv_vec_temp(arg));
1835 }
1836 
1837 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1838 {
1839     TCGContext *s = tcg_ctx;
1840     GHashTable *h = s->const_table[type];
1841     TCGTemp *ts;
1842 
1843     if (h == NULL) {
1844         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1845         s->const_table[type] = h;
1846     }
1847 
1848     ts = g_hash_table_lookup(h, &val);
1849     if (ts == NULL) {
1850         int64_t *val_ptr;
1851 
1852         ts = tcg_temp_alloc(s);
1853 
1854         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1855             TCGTemp *ts2 = tcg_temp_alloc(s);
1856 
1857             tcg_debug_assert(ts2 == ts + 1);
1858 
1859             ts->base_type = TCG_TYPE_I64;
1860             ts->type = TCG_TYPE_I32;
1861             ts->kind = TEMP_CONST;
1862             ts->temp_allocated = 1;
1863 
1864             ts2->base_type = TCG_TYPE_I64;
1865             ts2->type = TCG_TYPE_I32;
1866             ts2->kind = TEMP_CONST;
1867             ts2->temp_allocated = 1;
1868             ts2->temp_subindex = 1;
1869 
1870             /*
1871              * Retain the full value of the 64-bit constant in the low
1872              * part, so that the hash table works.  Actual uses will
1873              * truncate the value to the low part.
1874              */
1875             ts[HOST_BIG_ENDIAN].val = val;
1876             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1877             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1878         } else {
1879             ts->base_type = type;
1880             ts->type = type;
1881             ts->kind = TEMP_CONST;
1882             ts->temp_allocated = 1;
1883             ts->val = val;
1884             val_ptr = &ts->val;
1885         }
1886         g_hash_table_insert(h, val_ptr, ts);
1887     }
1888 
1889     return ts;
1890 }
1891 
1892 TCGv_i32 tcg_constant_i32(int32_t val)
1893 {
1894     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
1895 }
1896 
1897 TCGv_i64 tcg_constant_i64(int64_t val)
1898 {
1899     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
1900 }
1901 
1902 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
1903 {
1904     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
1905 }
1906 
1907 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1908 {
1909     val = dup_const(vece, val);
1910     return temp_tcgv_vec(tcg_constant_internal(type, val));
1911 }
1912 
1913 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1914 {
1915     TCGTemp *t = tcgv_vec_temp(match);
1916 
1917     tcg_debug_assert(t->temp_allocated != 0);
1918     return tcg_constant_vec(t->base_type, vece, val);
1919 }
1920 
1921 #ifdef CONFIG_DEBUG_TCG
1922 size_t temp_idx(TCGTemp *ts)
1923 {
1924     ptrdiff_t n = ts - tcg_ctx->temps;
1925     assert(n >= 0 && n < tcg_ctx->nb_temps);
1926     return n;
1927 }
1928 
1929 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1930 {
1931     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1932 
1933     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1934     assert(o % sizeof(TCGTemp) == 0);
1935 
1936     return (void *)tcg_ctx + (uintptr_t)v;
1937 }
1938 #endif /* CONFIG_DEBUG_TCG */
1939 
1940 /* Return true if OP may appear in the opcode stream.
1941    Test the runtime variable that controls each opcode.  */
1942 bool tcg_op_supported(TCGOpcode op)
1943 {
1944     const bool have_vec
1945         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1946 
1947     switch (op) {
1948     case INDEX_op_discard:
1949     case INDEX_op_set_label:
1950     case INDEX_op_call:
1951     case INDEX_op_br:
1952     case INDEX_op_mb:
1953     case INDEX_op_insn_start:
1954     case INDEX_op_exit_tb:
1955     case INDEX_op_goto_tb:
1956     case INDEX_op_goto_ptr:
1957     case INDEX_op_qemu_ld_a32_i32:
1958     case INDEX_op_qemu_ld_a64_i32:
1959     case INDEX_op_qemu_st_a32_i32:
1960     case INDEX_op_qemu_st_a64_i32:
1961     case INDEX_op_qemu_ld_a32_i64:
1962     case INDEX_op_qemu_ld_a64_i64:
1963     case INDEX_op_qemu_st_a32_i64:
1964     case INDEX_op_qemu_st_a64_i64:
1965         return true;
1966 
1967     case INDEX_op_qemu_st8_a32_i32:
1968     case INDEX_op_qemu_st8_a64_i32:
1969         return TCG_TARGET_HAS_qemu_st8_i32;
1970 
1971     case INDEX_op_qemu_ld_a32_i128:
1972     case INDEX_op_qemu_ld_a64_i128:
1973     case INDEX_op_qemu_st_a32_i128:
1974     case INDEX_op_qemu_st_a64_i128:
1975         return TCG_TARGET_HAS_qemu_ldst_i128;
1976 
1977     case INDEX_op_mov_i32:
1978     case INDEX_op_setcond_i32:
1979     case INDEX_op_brcond_i32:
1980     case INDEX_op_movcond_i32:
1981     case INDEX_op_ld8u_i32:
1982     case INDEX_op_ld8s_i32:
1983     case INDEX_op_ld16u_i32:
1984     case INDEX_op_ld16s_i32:
1985     case INDEX_op_ld_i32:
1986     case INDEX_op_st8_i32:
1987     case INDEX_op_st16_i32:
1988     case INDEX_op_st_i32:
1989     case INDEX_op_add_i32:
1990     case INDEX_op_sub_i32:
1991     case INDEX_op_neg_i32:
1992     case INDEX_op_mul_i32:
1993     case INDEX_op_and_i32:
1994     case INDEX_op_or_i32:
1995     case INDEX_op_xor_i32:
1996     case INDEX_op_shl_i32:
1997     case INDEX_op_shr_i32:
1998     case INDEX_op_sar_i32:
1999         return true;
2000 
2001     case INDEX_op_negsetcond_i32:
2002         return TCG_TARGET_HAS_negsetcond_i32;
2003     case INDEX_op_div_i32:
2004     case INDEX_op_divu_i32:
2005         return TCG_TARGET_HAS_div_i32;
2006     case INDEX_op_rem_i32:
2007     case INDEX_op_remu_i32:
2008         return TCG_TARGET_HAS_rem_i32;
2009     case INDEX_op_div2_i32:
2010     case INDEX_op_divu2_i32:
2011         return TCG_TARGET_HAS_div2_i32;
2012     case INDEX_op_rotl_i32:
2013     case INDEX_op_rotr_i32:
2014         return TCG_TARGET_HAS_rot_i32;
2015     case INDEX_op_deposit_i32:
2016         return TCG_TARGET_HAS_deposit_i32;
2017     case INDEX_op_extract_i32:
2018         return TCG_TARGET_HAS_extract_i32;
2019     case INDEX_op_sextract_i32:
2020         return TCG_TARGET_HAS_sextract_i32;
2021     case INDEX_op_extract2_i32:
2022         return TCG_TARGET_HAS_extract2_i32;
2023     case INDEX_op_add2_i32:
2024         return TCG_TARGET_HAS_add2_i32;
2025     case INDEX_op_sub2_i32:
2026         return TCG_TARGET_HAS_sub2_i32;
2027     case INDEX_op_mulu2_i32:
2028         return TCG_TARGET_HAS_mulu2_i32;
2029     case INDEX_op_muls2_i32:
2030         return TCG_TARGET_HAS_muls2_i32;
2031     case INDEX_op_muluh_i32:
2032         return TCG_TARGET_HAS_muluh_i32;
2033     case INDEX_op_mulsh_i32:
2034         return TCG_TARGET_HAS_mulsh_i32;
2035     case INDEX_op_ext8s_i32:
2036         return TCG_TARGET_HAS_ext8s_i32;
2037     case INDEX_op_ext16s_i32:
2038         return TCG_TARGET_HAS_ext16s_i32;
2039     case INDEX_op_ext8u_i32:
2040         return TCG_TARGET_HAS_ext8u_i32;
2041     case INDEX_op_ext16u_i32:
2042         return TCG_TARGET_HAS_ext16u_i32;
2043     case INDEX_op_bswap16_i32:
2044         return TCG_TARGET_HAS_bswap16_i32;
2045     case INDEX_op_bswap32_i32:
2046         return TCG_TARGET_HAS_bswap32_i32;
2047     case INDEX_op_not_i32:
2048         return TCG_TARGET_HAS_not_i32;
2049     case INDEX_op_andc_i32:
2050         return TCG_TARGET_HAS_andc_i32;
2051     case INDEX_op_orc_i32:
2052         return TCG_TARGET_HAS_orc_i32;
2053     case INDEX_op_eqv_i32:
2054         return TCG_TARGET_HAS_eqv_i32;
2055     case INDEX_op_nand_i32:
2056         return TCG_TARGET_HAS_nand_i32;
2057     case INDEX_op_nor_i32:
2058         return TCG_TARGET_HAS_nor_i32;
2059     case INDEX_op_clz_i32:
2060         return TCG_TARGET_HAS_clz_i32;
2061     case INDEX_op_ctz_i32:
2062         return TCG_TARGET_HAS_ctz_i32;
2063     case INDEX_op_ctpop_i32:
2064         return TCG_TARGET_HAS_ctpop_i32;
2065 
2066     case INDEX_op_brcond2_i32:
2067     case INDEX_op_setcond2_i32:
2068         return TCG_TARGET_REG_BITS == 32;
2069 
2070     case INDEX_op_mov_i64:
2071     case INDEX_op_setcond_i64:
2072     case INDEX_op_brcond_i64:
2073     case INDEX_op_movcond_i64:
2074     case INDEX_op_ld8u_i64:
2075     case INDEX_op_ld8s_i64:
2076     case INDEX_op_ld16u_i64:
2077     case INDEX_op_ld16s_i64:
2078     case INDEX_op_ld32u_i64:
2079     case INDEX_op_ld32s_i64:
2080     case INDEX_op_ld_i64:
2081     case INDEX_op_st8_i64:
2082     case INDEX_op_st16_i64:
2083     case INDEX_op_st32_i64:
2084     case INDEX_op_st_i64:
2085     case INDEX_op_add_i64:
2086     case INDEX_op_sub_i64:
2087     case INDEX_op_neg_i64:
2088     case INDEX_op_mul_i64:
2089     case INDEX_op_and_i64:
2090     case INDEX_op_or_i64:
2091     case INDEX_op_xor_i64:
2092     case INDEX_op_shl_i64:
2093     case INDEX_op_shr_i64:
2094     case INDEX_op_sar_i64:
2095     case INDEX_op_ext_i32_i64:
2096     case INDEX_op_extu_i32_i64:
2097         return TCG_TARGET_REG_BITS == 64;
2098 
2099     case INDEX_op_negsetcond_i64:
2100         return TCG_TARGET_HAS_negsetcond_i64;
2101     case INDEX_op_div_i64:
2102     case INDEX_op_divu_i64:
2103         return TCG_TARGET_HAS_div_i64;
2104     case INDEX_op_rem_i64:
2105     case INDEX_op_remu_i64:
2106         return TCG_TARGET_HAS_rem_i64;
2107     case INDEX_op_div2_i64:
2108     case INDEX_op_divu2_i64:
2109         return TCG_TARGET_HAS_div2_i64;
2110     case INDEX_op_rotl_i64:
2111     case INDEX_op_rotr_i64:
2112         return TCG_TARGET_HAS_rot_i64;
2113     case INDEX_op_deposit_i64:
2114         return TCG_TARGET_HAS_deposit_i64;
2115     case INDEX_op_extract_i64:
2116         return TCG_TARGET_HAS_extract_i64;
2117     case INDEX_op_sextract_i64:
2118         return TCG_TARGET_HAS_sextract_i64;
2119     case INDEX_op_extract2_i64:
2120         return TCG_TARGET_HAS_extract2_i64;
2121     case INDEX_op_extrl_i64_i32:
2122     case INDEX_op_extrh_i64_i32:
2123         return TCG_TARGET_HAS_extr_i64_i32;
2124     case INDEX_op_ext8s_i64:
2125         return TCG_TARGET_HAS_ext8s_i64;
2126     case INDEX_op_ext16s_i64:
2127         return TCG_TARGET_HAS_ext16s_i64;
2128     case INDEX_op_ext32s_i64:
2129         return TCG_TARGET_HAS_ext32s_i64;
2130     case INDEX_op_ext8u_i64:
2131         return TCG_TARGET_HAS_ext8u_i64;
2132     case INDEX_op_ext16u_i64:
2133         return TCG_TARGET_HAS_ext16u_i64;
2134     case INDEX_op_ext32u_i64:
2135         return TCG_TARGET_HAS_ext32u_i64;
2136     case INDEX_op_bswap16_i64:
2137         return TCG_TARGET_HAS_bswap16_i64;
2138     case INDEX_op_bswap32_i64:
2139         return TCG_TARGET_HAS_bswap32_i64;
2140     case INDEX_op_bswap64_i64:
2141         return TCG_TARGET_HAS_bswap64_i64;
2142     case INDEX_op_not_i64:
2143         return TCG_TARGET_HAS_not_i64;
2144     case INDEX_op_andc_i64:
2145         return TCG_TARGET_HAS_andc_i64;
2146     case INDEX_op_orc_i64:
2147         return TCG_TARGET_HAS_orc_i64;
2148     case INDEX_op_eqv_i64:
2149         return TCG_TARGET_HAS_eqv_i64;
2150     case INDEX_op_nand_i64:
2151         return TCG_TARGET_HAS_nand_i64;
2152     case INDEX_op_nor_i64:
2153         return TCG_TARGET_HAS_nor_i64;
2154     case INDEX_op_clz_i64:
2155         return TCG_TARGET_HAS_clz_i64;
2156     case INDEX_op_ctz_i64:
2157         return TCG_TARGET_HAS_ctz_i64;
2158     case INDEX_op_ctpop_i64:
2159         return TCG_TARGET_HAS_ctpop_i64;
2160     case INDEX_op_add2_i64:
2161         return TCG_TARGET_HAS_add2_i64;
2162     case INDEX_op_sub2_i64:
2163         return TCG_TARGET_HAS_sub2_i64;
2164     case INDEX_op_mulu2_i64:
2165         return TCG_TARGET_HAS_mulu2_i64;
2166     case INDEX_op_muls2_i64:
2167         return TCG_TARGET_HAS_muls2_i64;
2168     case INDEX_op_muluh_i64:
2169         return TCG_TARGET_HAS_muluh_i64;
2170     case INDEX_op_mulsh_i64:
2171         return TCG_TARGET_HAS_mulsh_i64;
2172 
2173     case INDEX_op_mov_vec:
2174     case INDEX_op_dup_vec:
2175     case INDEX_op_dupm_vec:
2176     case INDEX_op_ld_vec:
2177     case INDEX_op_st_vec:
2178     case INDEX_op_add_vec:
2179     case INDEX_op_sub_vec:
2180     case INDEX_op_and_vec:
2181     case INDEX_op_or_vec:
2182     case INDEX_op_xor_vec:
2183     case INDEX_op_cmp_vec:
2184         return have_vec;
2185     case INDEX_op_dup2_vec:
2186         return have_vec && TCG_TARGET_REG_BITS == 32;
2187     case INDEX_op_not_vec:
2188         return have_vec && TCG_TARGET_HAS_not_vec;
2189     case INDEX_op_neg_vec:
2190         return have_vec && TCG_TARGET_HAS_neg_vec;
2191     case INDEX_op_abs_vec:
2192         return have_vec && TCG_TARGET_HAS_abs_vec;
2193     case INDEX_op_andc_vec:
2194         return have_vec && TCG_TARGET_HAS_andc_vec;
2195     case INDEX_op_orc_vec:
2196         return have_vec && TCG_TARGET_HAS_orc_vec;
2197     case INDEX_op_nand_vec:
2198         return have_vec && TCG_TARGET_HAS_nand_vec;
2199     case INDEX_op_nor_vec:
2200         return have_vec && TCG_TARGET_HAS_nor_vec;
2201     case INDEX_op_eqv_vec:
2202         return have_vec && TCG_TARGET_HAS_eqv_vec;
2203     case INDEX_op_mul_vec:
2204         return have_vec && TCG_TARGET_HAS_mul_vec;
2205     case INDEX_op_shli_vec:
2206     case INDEX_op_shri_vec:
2207     case INDEX_op_sari_vec:
2208         return have_vec && TCG_TARGET_HAS_shi_vec;
2209     case INDEX_op_shls_vec:
2210     case INDEX_op_shrs_vec:
2211     case INDEX_op_sars_vec:
2212         return have_vec && TCG_TARGET_HAS_shs_vec;
2213     case INDEX_op_shlv_vec:
2214     case INDEX_op_shrv_vec:
2215     case INDEX_op_sarv_vec:
2216         return have_vec && TCG_TARGET_HAS_shv_vec;
2217     case INDEX_op_rotli_vec:
2218         return have_vec && TCG_TARGET_HAS_roti_vec;
2219     case INDEX_op_rotls_vec:
2220         return have_vec && TCG_TARGET_HAS_rots_vec;
2221     case INDEX_op_rotlv_vec:
2222     case INDEX_op_rotrv_vec:
2223         return have_vec && TCG_TARGET_HAS_rotv_vec;
2224     case INDEX_op_ssadd_vec:
2225     case INDEX_op_usadd_vec:
2226     case INDEX_op_sssub_vec:
2227     case INDEX_op_ussub_vec:
2228         return have_vec && TCG_TARGET_HAS_sat_vec;
2229     case INDEX_op_smin_vec:
2230     case INDEX_op_umin_vec:
2231     case INDEX_op_smax_vec:
2232     case INDEX_op_umax_vec:
2233         return have_vec && TCG_TARGET_HAS_minmax_vec;
2234     case INDEX_op_bitsel_vec:
2235         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2236     case INDEX_op_cmpsel_vec:
2237         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2238 
2239     default:
2240         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2241         return true;
2242     }
2243 }
2244 
2245 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2246 
2247 static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2248 {
2249     TCGv_i64 extend_free[MAX_CALL_IARGS];
2250     int n_extend = 0;
2251     TCGOp *op;
2252     int i, n, pi = 0, total_args;
2253 
2254     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2255         init_call_layout(info);
2256         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2257     }
2258 
2259     total_args = info->nr_out + info->nr_in + 2;
2260     op = tcg_op_alloc(INDEX_op_call, total_args);
2261 
2262 #ifdef CONFIG_PLUGIN
2263     /* Flag helpers that may affect guest state */
2264     if (tcg_ctx->plugin_insn &&
2265         !(info->flags & TCG_CALL_PLUGIN) &&
2266         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2267         tcg_ctx->plugin_insn->calls_helpers = true;
2268     }
2269 #endif
2270 
2271     TCGOP_CALLO(op) = n = info->nr_out;
2272     switch (n) {
2273     case 0:
2274         tcg_debug_assert(ret == NULL);
2275         break;
2276     case 1:
2277         tcg_debug_assert(ret != NULL);
2278         op->args[pi++] = temp_arg(ret);
2279         break;
2280     case 2:
2281     case 4:
2282         tcg_debug_assert(ret != NULL);
2283         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2284         tcg_debug_assert(ret->temp_subindex == 0);
2285         for (i = 0; i < n; ++i) {
2286             op->args[pi++] = temp_arg(ret + i);
2287         }
2288         break;
2289     default:
2290         g_assert_not_reached();
2291     }
2292 
2293     TCGOP_CALLI(op) = n = info->nr_in;
2294     for (i = 0; i < n; i++) {
2295         const TCGCallArgumentLoc *loc = &info->in[i];
2296         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2297 
2298         switch (loc->kind) {
2299         case TCG_CALL_ARG_NORMAL:
2300         case TCG_CALL_ARG_BY_REF:
2301         case TCG_CALL_ARG_BY_REF_N:
2302             op->args[pi++] = temp_arg(ts);
2303             break;
2304 
2305         case TCG_CALL_ARG_EXTEND_U:
2306         case TCG_CALL_ARG_EXTEND_S:
2307             {
2308                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2309                 TCGv_i32 orig = temp_tcgv_i32(ts);
2310 
2311                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2312                     tcg_gen_ext_i32_i64(temp, orig);
2313                 } else {
2314                     tcg_gen_extu_i32_i64(temp, orig);
2315                 }
2316                 op->args[pi++] = tcgv_i64_arg(temp);
2317                 extend_free[n_extend++] = temp;
2318             }
2319             break;
2320 
2321         default:
2322             g_assert_not_reached();
2323         }
2324     }
2325     op->args[pi++] = (uintptr_t)info->func;
2326     op->args[pi++] = (uintptr_t)info;
2327     tcg_debug_assert(pi == total_args);
2328 
2329     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2330 
2331     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2332     for (i = 0; i < n_extend; ++i) {
2333         tcg_temp_free_i64(extend_free[i]);
2334     }
2335 }
2336 
2337 void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2338 {
2339     tcg_gen_callN(info, ret, NULL);
2340 }
2341 
2342 void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2343 {
2344     tcg_gen_callN(info, ret, &t1);
2345 }
2346 
2347 void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2348 {
2349     TCGTemp *args[2] = { t1, t2 };
2350     tcg_gen_callN(info, ret, args);
2351 }
2352 
2353 void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2354                    TCGTemp *t2, TCGTemp *t3)
2355 {
2356     TCGTemp *args[3] = { t1, t2, t3 };
2357     tcg_gen_callN(info, ret, args);
2358 }
2359 
2360 void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2361                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2362 {
2363     TCGTemp *args[4] = { t1, t2, t3, t4 };
2364     tcg_gen_callN(info, ret, args);
2365 }
2366 
2367 void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2368                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2369 {
2370     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2371     tcg_gen_callN(info, ret, args);
2372 }
2373 
2374 void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2375                    TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2376 {
2377     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2378     tcg_gen_callN(info, ret, args);
2379 }
2380 
2381 void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2382                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2383                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2384 {
2385     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2386     tcg_gen_callN(info, ret, args);
2387 }
2388 
2389 static void tcg_reg_alloc_start(TCGContext *s)
2390 {
2391     int i, n;
2392 
2393     for (i = 0, n = s->nb_temps; i < n; i++) {
2394         TCGTemp *ts = &s->temps[i];
2395         TCGTempVal val = TEMP_VAL_MEM;
2396 
2397         switch (ts->kind) {
2398         case TEMP_CONST:
2399             val = TEMP_VAL_CONST;
2400             break;
2401         case TEMP_FIXED:
2402             val = TEMP_VAL_REG;
2403             break;
2404         case TEMP_GLOBAL:
2405             break;
2406         case TEMP_EBB:
2407             val = TEMP_VAL_DEAD;
2408             /* fall through */
2409         case TEMP_TB:
2410             ts->mem_allocated = 0;
2411             break;
2412         default:
2413             g_assert_not_reached();
2414         }
2415         ts->val_type = val;
2416     }
2417 
2418     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2419 }
2420 
2421 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2422                                  TCGTemp *ts)
2423 {
2424     int idx = temp_idx(ts);
2425 
2426     switch (ts->kind) {
2427     case TEMP_FIXED:
2428     case TEMP_GLOBAL:
2429         pstrcpy(buf, buf_size, ts->name);
2430         break;
2431     case TEMP_TB:
2432         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2433         break;
2434     case TEMP_EBB:
2435         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2436         break;
2437     case TEMP_CONST:
2438         switch (ts->type) {
2439         case TCG_TYPE_I32:
2440             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2441             break;
2442 #if TCG_TARGET_REG_BITS > 32
2443         case TCG_TYPE_I64:
2444             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2445             break;
2446 #endif
2447         case TCG_TYPE_V64:
2448         case TCG_TYPE_V128:
2449         case TCG_TYPE_V256:
2450             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2451                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2452             break;
2453         default:
2454             g_assert_not_reached();
2455         }
2456         break;
2457     }
2458     return buf;
2459 }
2460 
2461 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2462                              int buf_size, TCGArg arg)
2463 {
2464     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2465 }
2466 
2467 static const char * const cond_name[] =
2468 {
2469     [TCG_COND_NEVER] = "never",
2470     [TCG_COND_ALWAYS] = "always",
2471     [TCG_COND_EQ] = "eq",
2472     [TCG_COND_NE] = "ne",
2473     [TCG_COND_LT] = "lt",
2474     [TCG_COND_GE] = "ge",
2475     [TCG_COND_LE] = "le",
2476     [TCG_COND_GT] = "gt",
2477     [TCG_COND_LTU] = "ltu",
2478     [TCG_COND_GEU] = "geu",
2479     [TCG_COND_LEU] = "leu",
2480     [TCG_COND_GTU] = "gtu"
2481 };
2482 
2483 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2484 {
2485     [MO_UB]   = "ub",
2486     [MO_SB]   = "sb",
2487     [MO_LEUW] = "leuw",
2488     [MO_LESW] = "lesw",
2489     [MO_LEUL] = "leul",
2490     [MO_LESL] = "lesl",
2491     [MO_LEUQ] = "leq",
2492     [MO_BEUW] = "beuw",
2493     [MO_BESW] = "besw",
2494     [MO_BEUL] = "beul",
2495     [MO_BESL] = "besl",
2496     [MO_BEUQ] = "beq",
2497     [MO_128 + MO_BE] = "beo",
2498     [MO_128 + MO_LE] = "leo",
2499 };
2500 
2501 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2502     [MO_UNALN >> MO_ASHIFT]    = "un+",
2503     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2504     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2505     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2506     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2507     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2508     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2509     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2510 };
2511 
2512 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2513     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2514     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2515     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2516     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2517     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2518     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2519 };
2520 
2521 static const char bswap_flag_name[][6] = {
2522     [TCG_BSWAP_IZ] = "iz",
2523     [TCG_BSWAP_OZ] = "oz",
2524     [TCG_BSWAP_OS] = "os",
2525     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2526     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2527 };
2528 
2529 static inline bool tcg_regset_single(TCGRegSet d)
2530 {
2531     return (d & (d - 1)) == 0;
2532 }
2533 
2534 static inline TCGReg tcg_regset_first(TCGRegSet d)
2535 {
2536     if (TCG_TARGET_NB_REGS <= 32) {
2537         return ctz32(d);
2538     } else {
2539         return ctz64(d);
2540     }
2541 }
2542 
2543 /* Return only the number of characters output -- no error return. */
2544 #define ne_fprintf(...) \
2545     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2546 
2547 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2548 {
2549     char buf[128];
2550     TCGOp *op;
2551 
2552     QTAILQ_FOREACH(op, &s->ops, link) {
2553         int i, k, nb_oargs, nb_iargs, nb_cargs;
2554         const TCGOpDef *def;
2555         TCGOpcode c;
2556         int col = 0;
2557 
2558         c = op->opc;
2559         def = &tcg_op_defs[c];
2560 
2561         if (c == INDEX_op_insn_start) {
2562             nb_oargs = 0;
2563             col += ne_fprintf(f, "\n ----");
2564 
2565             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2566                 col += ne_fprintf(f, " %016" PRIx64,
2567                                   tcg_get_insn_start_param(op, i));
2568             }
2569         } else if (c == INDEX_op_call) {
2570             const TCGHelperInfo *info = tcg_call_info(op);
2571             void *func = tcg_call_func(op);
2572 
2573             /* variable number of arguments */
2574             nb_oargs = TCGOP_CALLO(op);
2575             nb_iargs = TCGOP_CALLI(op);
2576             nb_cargs = def->nb_cargs;
2577 
2578             col += ne_fprintf(f, " %s ", def->name);
2579 
2580             /*
2581              * Print the function name from TCGHelperInfo, if available.
2582              * Note that plugins have a template function for the info,
2583              * but the actual function pointer comes from the plugin.
2584              */
2585             if (func == info->func) {
2586                 col += ne_fprintf(f, "%s", info->name);
2587             } else {
2588                 col += ne_fprintf(f, "plugin(%p)", func);
2589             }
2590 
2591             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2592             for (i = 0; i < nb_oargs; i++) {
2593                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2594                                                             op->args[i]));
2595             }
2596             for (i = 0; i < nb_iargs; i++) {
2597                 TCGArg arg = op->args[nb_oargs + i];
2598                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2599                 col += ne_fprintf(f, ",%s", t);
2600             }
2601         } else {
2602             col += ne_fprintf(f, " %s ", def->name);
2603 
2604             nb_oargs = def->nb_oargs;
2605             nb_iargs = def->nb_iargs;
2606             nb_cargs = def->nb_cargs;
2607 
2608             if (def->flags & TCG_OPF_VECTOR) {
2609                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2610                                   8 << TCGOP_VECE(op));
2611             }
2612 
2613             k = 0;
2614             for (i = 0; i < nb_oargs; i++) {
2615                 const char *sep =  k ? "," : "";
2616                 col += ne_fprintf(f, "%s%s", sep,
2617                                   tcg_get_arg_str(s, buf, sizeof(buf),
2618                                                   op->args[k++]));
2619             }
2620             for (i = 0; i < nb_iargs; i++) {
2621                 const char *sep =  k ? "," : "";
2622                 col += ne_fprintf(f, "%s%s", sep,
2623                                   tcg_get_arg_str(s, buf, sizeof(buf),
2624                                                   op->args[k++]));
2625             }
2626             switch (c) {
2627             case INDEX_op_brcond_i32:
2628             case INDEX_op_setcond_i32:
2629             case INDEX_op_negsetcond_i32:
2630             case INDEX_op_movcond_i32:
2631             case INDEX_op_brcond2_i32:
2632             case INDEX_op_setcond2_i32:
2633             case INDEX_op_brcond_i64:
2634             case INDEX_op_setcond_i64:
2635             case INDEX_op_negsetcond_i64:
2636             case INDEX_op_movcond_i64:
2637             case INDEX_op_cmp_vec:
2638             case INDEX_op_cmpsel_vec:
2639                 if (op->args[k] < ARRAY_SIZE(cond_name)
2640                     && cond_name[op->args[k]]) {
2641                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2642                 } else {
2643                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2644                 }
2645                 i = 1;
2646                 break;
2647             case INDEX_op_qemu_ld_a32_i32:
2648             case INDEX_op_qemu_ld_a64_i32:
2649             case INDEX_op_qemu_st_a32_i32:
2650             case INDEX_op_qemu_st_a64_i32:
2651             case INDEX_op_qemu_st8_a32_i32:
2652             case INDEX_op_qemu_st8_a64_i32:
2653             case INDEX_op_qemu_ld_a32_i64:
2654             case INDEX_op_qemu_ld_a64_i64:
2655             case INDEX_op_qemu_st_a32_i64:
2656             case INDEX_op_qemu_st_a64_i64:
2657             case INDEX_op_qemu_ld_a32_i128:
2658             case INDEX_op_qemu_ld_a64_i128:
2659             case INDEX_op_qemu_st_a32_i128:
2660             case INDEX_op_qemu_st_a64_i128:
2661                 {
2662                     const char *s_al, *s_op, *s_at;
2663                     MemOpIdx oi = op->args[k++];
2664                     MemOp mop = get_memop(oi);
2665                     unsigned ix = get_mmuidx(oi);
2666 
2667                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2668                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2669                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2670                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2671 
2672                     /* If all fields are accounted for, print symbolically. */
2673                     if (!mop && s_al && s_op && s_at) {
2674                         col += ne_fprintf(f, ",%s%s%s,%u",
2675                                           s_at, s_al, s_op, ix);
2676                     } else {
2677                         mop = get_memop(oi);
2678                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2679                     }
2680                     i = 1;
2681                 }
2682                 break;
2683             case INDEX_op_bswap16_i32:
2684             case INDEX_op_bswap16_i64:
2685             case INDEX_op_bswap32_i32:
2686             case INDEX_op_bswap32_i64:
2687             case INDEX_op_bswap64_i64:
2688                 {
2689                     TCGArg flags = op->args[k];
2690                     const char *name = NULL;
2691 
2692                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2693                         name = bswap_flag_name[flags];
2694                     }
2695                     if (name) {
2696                         col += ne_fprintf(f, ",%s", name);
2697                     } else {
2698                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2699                     }
2700                     i = k = 1;
2701                 }
2702                 break;
2703             default:
2704                 i = 0;
2705                 break;
2706             }
2707             switch (c) {
2708             case INDEX_op_set_label:
2709             case INDEX_op_br:
2710             case INDEX_op_brcond_i32:
2711             case INDEX_op_brcond_i64:
2712             case INDEX_op_brcond2_i32:
2713                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2714                                   arg_label(op->args[k])->id);
2715                 i++, k++;
2716                 break;
2717             case INDEX_op_mb:
2718                 {
2719                     TCGBar membar = op->args[k];
2720                     const char *b_op, *m_op;
2721 
2722                     switch (membar & TCG_BAR_SC) {
2723                     case 0:
2724                         b_op = "none";
2725                         break;
2726                     case TCG_BAR_LDAQ:
2727                         b_op = "acq";
2728                         break;
2729                     case TCG_BAR_STRL:
2730                         b_op = "rel";
2731                         break;
2732                     case TCG_BAR_SC:
2733                         b_op = "seq";
2734                         break;
2735                     default:
2736                         g_assert_not_reached();
2737                     }
2738 
2739                     switch (membar & TCG_MO_ALL) {
2740                     case 0:
2741                         m_op = "none";
2742                         break;
2743                     case TCG_MO_LD_LD:
2744                         m_op = "rr";
2745                         break;
2746                     case TCG_MO_LD_ST:
2747                         m_op = "rw";
2748                         break;
2749                     case TCG_MO_ST_LD:
2750                         m_op = "wr";
2751                         break;
2752                     case TCG_MO_ST_ST:
2753                         m_op = "ww";
2754                         break;
2755                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2756                         m_op = "rr+rw";
2757                         break;
2758                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2759                         m_op = "rr+wr";
2760                         break;
2761                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2762                         m_op = "rr+ww";
2763                         break;
2764                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2765                         m_op = "rw+wr";
2766                         break;
2767                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2768                         m_op = "rw+ww";
2769                         break;
2770                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2771                         m_op = "wr+ww";
2772                         break;
2773                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2774                         m_op = "rr+rw+wr";
2775                         break;
2776                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2777                         m_op = "rr+rw+ww";
2778                         break;
2779                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2780                         m_op = "rr+wr+ww";
2781                         break;
2782                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2783                         m_op = "rw+wr+ww";
2784                         break;
2785                     case TCG_MO_ALL:
2786                         m_op = "all";
2787                         break;
2788                     default:
2789                         g_assert_not_reached();
2790                     }
2791 
2792                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2793                     i++, k++;
2794                 }
2795                 break;
2796             default:
2797                 break;
2798             }
2799             for (; i < nb_cargs; i++, k++) {
2800                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2801                                   op->args[k]);
2802             }
2803         }
2804 
2805         if (have_prefs || op->life) {
2806             for (; col < 40; ++col) {
2807                 putc(' ', f);
2808             }
2809         }
2810 
2811         if (op->life) {
2812             unsigned life = op->life;
2813 
2814             if (life & (SYNC_ARG * 3)) {
2815                 ne_fprintf(f, "  sync:");
2816                 for (i = 0; i < 2; ++i) {
2817                     if (life & (SYNC_ARG << i)) {
2818                         ne_fprintf(f, " %d", i);
2819                     }
2820                 }
2821             }
2822             life /= DEAD_ARG;
2823             if (life) {
2824                 ne_fprintf(f, "  dead:");
2825                 for (i = 0; life; ++i, life >>= 1) {
2826                     if (life & 1) {
2827                         ne_fprintf(f, " %d", i);
2828                     }
2829                 }
2830             }
2831         }
2832 
2833         if (have_prefs) {
2834             for (i = 0; i < nb_oargs; ++i) {
2835                 TCGRegSet set = output_pref(op, i);
2836 
2837                 if (i == 0) {
2838                     ne_fprintf(f, "  pref=");
2839                 } else {
2840                     ne_fprintf(f, ",");
2841                 }
2842                 if (set == 0) {
2843                     ne_fprintf(f, "none");
2844                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2845                     ne_fprintf(f, "all");
2846 #ifdef CONFIG_DEBUG_TCG
2847                 } else if (tcg_regset_single(set)) {
2848                     TCGReg reg = tcg_regset_first(set);
2849                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2850 #endif
2851                 } else if (TCG_TARGET_NB_REGS <= 32) {
2852                     ne_fprintf(f, "0x%x", (uint32_t)set);
2853                 } else {
2854                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2855                 }
2856             }
2857         }
2858 
2859         putc('\n', f);
2860     }
2861 }
2862 
2863 /* we give more priority to constraints with less registers */
2864 static int get_constraint_priority(const TCGOpDef *def, int k)
2865 {
2866     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2867     int n = ctpop64(arg_ct->regs);
2868 
2869     /*
2870      * Sort constraints of a single register first, which includes output
2871      * aliases (which must exactly match the input already allocated).
2872      */
2873     if (n == 1 || arg_ct->oalias) {
2874         return INT_MAX;
2875     }
2876 
2877     /*
2878      * Sort register pairs next, first then second immediately after.
2879      * Arbitrarily sort multiple pairs by the index of the first reg;
2880      * there shouldn't be many pairs.
2881      */
2882     switch (arg_ct->pair) {
2883     case 1:
2884     case 3:
2885         return (k + 1) * 2;
2886     case 2:
2887         return (arg_ct->pair_index + 1) * 2 - 1;
2888     }
2889 
2890     /* Finally, sort by decreasing register count. */
2891     assert(n > 1);
2892     return -n;
2893 }
2894 
2895 /* sort from highest priority to lowest */
2896 static void sort_constraints(TCGOpDef *def, int start, int n)
2897 {
2898     int i, j;
2899     TCGArgConstraint *a = def->args_ct;
2900 
2901     for (i = 0; i < n; i++) {
2902         a[start + i].sort_index = start + i;
2903     }
2904     if (n <= 1) {
2905         return;
2906     }
2907     for (i = 0; i < n - 1; i++) {
2908         for (j = i + 1; j < n; j++) {
2909             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2910             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2911             if (p1 < p2) {
2912                 int tmp = a[start + i].sort_index;
2913                 a[start + i].sort_index = a[start + j].sort_index;
2914                 a[start + j].sort_index = tmp;
2915             }
2916         }
2917     }
2918 }
2919 
2920 static void process_op_defs(TCGContext *s)
2921 {
2922     TCGOpcode op;
2923 
2924     for (op = 0; op < NB_OPS; op++) {
2925         TCGOpDef *def = &tcg_op_defs[op];
2926         const TCGTargetOpDef *tdefs;
2927         bool saw_alias_pair = false;
2928         int i, o, i2, o2, nb_args;
2929 
2930         if (def->flags & TCG_OPF_NOT_PRESENT) {
2931             continue;
2932         }
2933 
2934         nb_args = def->nb_iargs + def->nb_oargs;
2935         if (nb_args == 0) {
2936             continue;
2937         }
2938 
2939         /*
2940          * Macro magic should make it impossible, but double-check that
2941          * the array index is in range.  Since the signness of an enum
2942          * is implementation defined, force the result to unsigned.
2943          */
2944         unsigned con_set = tcg_target_op_def(op);
2945         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2946         tdefs = &constraint_sets[con_set];
2947 
2948         for (i = 0; i < nb_args; i++) {
2949             const char *ct_str = tdefs->args_ct_str[i];
2950             bool input_p = i >= def->nb_oargs;
2951 
2952             /* Incomplete TCGTargetOpDef entry. */
2953             tcg_debug_assert(ct_str != NULL);
2954 
2955             switch (*ct_str) {
2956             case '0' ... '9':
2957                 o = *ct_str - '0';
2958                 tcg_debug_assert(input_p);
2959                 tcg_debug_assert(o < def->nb_oargs);
2960                 tcg_debug_assert(def->args_ct[o].regs != 0);
2961                 tcg_debug_assert(!def->args_ct[o].oalias);
2962                 def->args_ct[i] = def->args_ct[o];
2963                 /* The output sets oalias.  */
2964                 def->args_ct[o].oalias = 1;
2965                 def->args_ct[o].alias_index = i;
2966                 /* The input sets ialias. */
2967                 def->args_ct[i].ialias = 1;
2968                 def->args_ct[i].alias_index = o;
2969                 if (def->args_ct[i].pair) {
2970                     saw_alias_pair = true;
2971                 }
2972                 tcg_debug_assert(ct_str[1] == '\0');
2973                 continue;
2974 
2975             case '&':
2976                 tcg_debug_assert(!input_p);
2977                 def->args_ct[i].newreg = true;
2978                 ct_str++;
2979                 break;
2980 
2981             case 'p': /* plus */
2982                 /* Allocate to the register after the previous. */
2983                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2984                 o = i - 1;
2985                 tcg_debug_assert(!def->args_ct[o].pair);
2986                 tcg_debug_assert(!def->args_ct[o].ct);
2987                 def->args_ct[i] = (TCGArgConstraint){
2988                     .pair = 2,
2989                     .pair_index = o,
2990                     .regs = def->args_ct[o].regs << 1,
2991                 };
2992                 def->args_ct[o].pair = 1;
2993                 def->args_ct[o].pair_index = i;
2994                 tcg_debug_assert(ct_str[1] == '\0');
2995                 continue;
2996 
2997             case 'm': /* minus */
2998                 /* Allocate to the register before the previous. */
2999                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
3000                 o = i - 1;
3001                 tcg_debug_assert(!def->args_ct[o].pair);
3002                 tcg_debug_assert(!def->args_ct[o].ct);
3003                 def->args_ct[i] = (TCGArgConstraint){
3004                     .pair = 1,
3005                     .pair_index = o,
3006                     .regs = def->args_ct[o].regs >> 1,
3007                 };
3008                 def->args_ct[o].pair = 2;
3009                 def->args_ct[o].pair_index = i;
3010                 tcg_debug_assert(ct_str[1] == '\0');
3011                 continue;
3012             }
3013 
3014             do {
3015                 switch (*ct_str) {
3016                 case 'i':
3017                     def->args_ct[i].ct |= TCG_CT_CONST;
3018                     break;
3019 
3020                 /* Include all of the target-specific constraints. */
3021 
3022 #undef CONST
3023 #define CONST(CASE, MASK) \
3024     case CASE: def->args_ct[i].ct |= MASK; break;
3025 #define REGS(CASE, MASK) \
3026     case CASE: def->args_ct[i].regs |= MASK; break;
3027 
3028 #include "tcg-target-con-str.h"
3029 
3030 #undef REGS
3031 #undef CONST
3032                 default:
3033                 case '0' ... '9':
3034                 case '&':
3035                 case 'p':
3036                 case 'm':
3037                     /* Typo in TCGTargetOpDef constraint. */
3038                     g_assert_not_reached();
3039                 }
3040             } while (*++ct_str != '\0');
3041         }
3042 
3043         /* TCGTargetOpDef entry with too much information? */
3044         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
3045 
3046         /*
3047          * Fix up output pairs that are aliased with inputs.
3048          * When we created the alias, we copied pair from the output.
3049          * There are three cases:
3050          *    (1a) Pairs of inputs alias pairs of outputs.
3051          *    (1b) One input aliases the first of a pair of outputs.
3052          *    (2)  One input aliases the second of a pair of outputs.
3053          *
3054          * Case 1a is handled by making sure that the pair_index'es are
3055          * properly updated so that they appear the same as a pair of inputs.
3056          *
3057          * Case 1b is handled by setting the pair_index of the input to
3058          * itself, simply so it doesn't point to an unrelated argument.
3059          * Since we don't encounter the "second" during the input allocation
3060          * phase, nothing happens with the second half of the input pair.
3061          *
3062          * Case 2 is handled by setting the second input to pair=3, the
3063          * first output to pair=3, and the pair_index'es to match.
3064          */
3065         if (saw_alias_pair) {
3066             for (i = def->nb_oargs; i < nb_args; i++) {
3067                 /*
3068                  * Since [0-9pm] must be alone in the constraint string,
3069                  * the only way they can both be set is if the pair comes
3070                  * from the output alias.
3071                  */
3072                 if (!def->args_ct[i].ialias) {
3073                     continue;
3074                 }
3075                 switch (def->args_ct[i].pair) {
3076                 case 0:
3077                     break;
3078                 case 1:
3079                     o = def->args_ct[i].alias_index;
3080                     o2 = def->args_ct[o].pair_index;
3081                     tcg_debug_assert(def->args_ct[o].pair == 1);
3082                     tcg_debug_assert(def->args_ct[o2].pair == 2);
3083                     if (def->args_ct[o2].oalias) {
3084                         /* Case 1a */
3085                         i2 = def->args_ct[o2].alias_index;
3086                         tcg_debug_assert(def->args_ct[i2].pair == 2);
3087                         def->args_ct[i2].pair_index = i;
3088                         def->args_ct[i].pair_index = i2;
3089                     } else {
3090                         /* Case 1b */
3091                         def->args_ct[i].pair_index = i;
3092                     }
3093                     break;
3094                 case 2:
3095                     o = def->args_ct[i].alias_index;
3096                     o2 = def->args_ct[o].pair_index;
3097                     tcg_debug_assert(def->args_ct[o].pair == 2);
3098                     tcg_debug_assert(def->args_ct[o2].pair == 1);
3099                     if (def->args_ct[o2].oalias) {
3100                         /* Case 1a */
3101                         i2 = def->args_ct[o2].alias_index;
3102                         tcg_debug_assert(def->args_ct[i2].pair == 1);
3103                         def->args_ct[i2].pair_index = i;
3104                         def->args_ct[i].pair_index = i2;
3105                     } else {
3106                         /* Case 2 */
3107                         def->args_ct[i].pair = 3;
3108                         def->args_ct[o2].pair = 3;
3109                         def->args_ct[i].pair_index = o2;
3110                         def->args_ct[o2].pair_index = i;
3111                     }
3112                     break;
3113                 default:
3114                     g_assert_not_reached();
3115                 }
3116             }
3117         }
3118 
3119         /* sort the constraints (XXX: this is just an heuristic) */
3120         sort_constraints(def, 0, def->nb_oargs);
3121         sort_constraints(def, def->nb_oargs, def->nb_iargs);
3122     }
3123 }
3124 
3125 static void remove_label_use(TCGOp *op, int idx)
3126 {
3127     TCGLabel *label = arg_label(op->args[idx]);
3128     TCGLabelUse *use;
3129 
3130     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3131         if (use->op == op) {
3132             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3133             return;
3134         }
3135     }
3136     g_assert_not_reached();
3137 }
3138 
3139 void tcg_op_remove(TCGContext *s, TCGOp *op)
3140 {
3141     switch (op->opc) {
3142     case INDEX_op_br:
3143         remove_label_use(op, 0);
3144         break;
3145     case INDEX_op_brcond_i32:
3146     case INDEX_op_brcond_i64:
3147         remove_label_use(op, 3);
3148         break;
3149     case INDEX_op_brcond2_i32:
3150         remove_label_use(op, 5);
3151         break;
3152     default:
3153         break;
3154     }
3155 
3156     QTAILQ_REMOVE(&s->ops, op, link);
3157     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3158     s->nb_ops--;
3159 }
3160 
3161 void tcg_remove_ops_after(TCGOp *op)
3162 {
3163     TCGContext *s = tcg_ctx;
3164 
3165     while (true) {
3166         TCGOp *last = tcg_last_op();
3167         if (last == op) {
3168             return;
3169         }
3170         tcg_op_remove(s, last);
3171     }
3172 }
3173 
3174 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3175 {
3176     TCGContext *s = tcg_ctx;
3177     TCGOp *op = NULL;
3178 
3179     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3180         QTAILQ_FOREACH(op, &s->free_ops, link) {
3181             if (nargs <= op->nargs) {
3182                 QTAILQ_REMOVE(&s->free_ops, op, link);
3183                 nargs = op->nargs;
3184                 goto found;
3185             }
3186         }
3187     }
3188 
3189     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3190     nargs = MAX(4, nargs);
3191     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3192 
3193  found:
3194     memset(op, 0, offsetof(TCGOp, link));
3195     op->opc = opc;
3196     op->nargs = nargs;
3197 
3198     /* Check for bitfield overflow. */
3199     tcg_debug_assert(op->nargs == nargs);
3200 
3201     s->nb_ops++;
3202     return op;
3203 }
3204 
3205 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3206 {
3207     TCGOp *op = tcg_op_alloc(opc, nargs);
3208     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3209     return op;
3210 }
3211 
3212 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3213                             TCGOpcode opc, unsigned nargs)
3214 {
3215     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3216     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3217     return new_op;
3218 }
3219 
3220 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3221                            TCGOpcode opc, unsigned nargs)
3222 {
3223     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3224     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3225     return new_op;
3226 }
3227 
3228 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3229 {
3230     TCGLabelUse *u;
3231 
3232     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3233         TCGOp *op = u->op;
3234         switch (op->opc) {
3235         case INDEX_op_br:
3236             op->args[0] = label_arg(to);
3237             break;
3238         case INDEX_op_brcond_i32:
3239         case INDEX_op_brcond_i64:
3240             op->args[3] = label_arg(to);
3241             break;
3242         case INDEX_op_brcond2_i32:
3243             op->args[5] = label_arg(to);
3244             break;
3245         default:
3246             g_assert_not_reached();
3247         }
3248     }
3249 
3250     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3251 }
3252 
3253 /* Reachable analysis : remove unreachable code.  */
3254 static void __attribute__((noinline))
3255 reachable_code_pass(TCGContext *s)
3256 {
3257     TCGOp *op, *op_next, *op_prev;
3258     bool dead = false;
3259 
3260     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3261         bool remove = dead;
3262         TCGLabel *label;
3263 
3264         switch (op->opc) {
3265         case INDEX_op_set_label:
3266             label = arg_label(op->args[0]);
3267 
3268             /*
3269              * Note that the first op in the TB is always a load,
3270              * so there is always something before a label.
3271              */
3272             op_prev = QTAILQ_PREV(op, link);
3273 
3274             /*
3275              * If we find two sequential labels, move all branches to
3276              * reference the second label and remove the first label.
3277              * Do this before branch to next optimization, so that the
3278              * middle label is out of the way.
3279              */
3280             if (op_prev->opc == INDEX_op_set_label) {
3281                 move_label_uses(label, arg_label(op_prev->args[0]));
3282                 tcg_op_remove(s, op_prev);
3283                 op_prev = QTAILQ_PREV(op, link);
3284             }
3285 
3286             /*
3287              * Optimization can fold conditional branches to unconditional.
3288              * If we find a label which is preceded by an unconditional
3289              * branch to next, remove the branch.  We couldn't do this when
3290              * processing the branch because any dead code between the branch
3291              * and label had not yet been removed.
3292              */
3293             if (op_prev->opc == INDEX_op_br &&
3294                 label == arg_label(op_prev->args[0])) {
3295                 tcg_op_remove(s, op_prev);
3296                 /* Fall through means insns become live again.  */
3297                 dead = false;
3298             }
3299 
3300             if (QSIMPLEQ_EMPTY(&label->branches)) {
3301                 /*
3302                  * While there is an occasional backward branch, virtually
3303                  * all branches generated by the translators are forward.
3304                  * Which means that generally we will have already removed
3305                  * all references to the label that will be, and there is
3306                  * little to be gained by iterating.
3307                  */
3308                 remove = true;
3309             } else {
3310                 /* Once we see a label, insns become live again.  */
3311                 dead = false;
3312                 remove = false;
3313             }
3314             break;
3315 
3316         case INDEX_op_br:
3317         case INDEX_op_exit_tb:
3318         case INDEX_op_goto_ptr:
3319             /* Unconditional branches; everything following is dead.  */
3320             dead = true;
3321             break;
3322 
3323         case INDEX_op_call:
3324             /* Notice noreturn helper calls, raising exceptions.  */
3325             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3326                 dead = true;
3327             }
3328             break;
3329 
3330         case INDEX_op_insn_start:
3331             /* Never remove -- we need to keep these for unwind.  */
3332             remove = false;
3333             break;
3334 
3335         default:
3336             break;
3337         }
3338 
3339         if (remove) {
3340             tcg_op_remove(s, op);
3341         }
3342     }
3343 }
3344 
3345 #define TS_DEAD  1
3346 #define TS_MEM   2
3347 
3348 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3349 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3350 
3351 /* For liveness_pass_1, the register preferences for a given temp.  */
3352 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3353 {
3354     return ts->state_ptr;
3355 }
3356 
3357 /* For liveness_pass_1, reset the preferences for a given temp to the
3358  * maximal regset for its type.
3359  */
3360 static inline void la_reset_pref(TCGTemp *ts)
3361 {
3362     *la_temp_pref(ts)
3363         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3364 }
3365 
3366 /* liveness analysis: end of function: all temps are dead, and globals
3367    should be in memory. */
3368 static void la_func_end(TCGContext *s, int ng, int nt)
3369 {
3370     int i;
3371 
3372     for (i = 0; i < ng; ++i) {
3373         s->temps[i].state = TS_DEAD | TS_MEM;
3374         la_reset_pref(&s->temps[i]);
3375     }
3376     for (i = ng; i < nt; ++i) {
3377         s->temps[i].state = TS_DEAD;
3378         la_reset_pref(&s->temps[i]);
3379     }
3380 }
3381 
3382 /* liveness analysis: end of basic block: all temps are dead, globals
3383    and local temps should be in memory. */
3384 static void la_bb_end(TCGContext *s, int ng, int nt)
3385 {
3386     int i;
3387 
3388     for (i = 0; i < nt; ++i) {
3389         TCGTemp *ts = &s->temps[i];
3390         int state;
3391 
3392         switch (ts->kind) {
3393         case TEMP_FIXED:
3394         case TEMP_GLOBAL:
3395         case TEMP_TB:
3396             state = TS_DEAD | TS_MEM;
3397             break;
3398         case TEMP_EBB:
3399         case TEMP_CONST:
3400             state = TS_DEAD;
3401             break;
3402         default:
3403             g_assert_not_reached();
3404         }
3405         ts->state = state;
3406         la_reset_pref(ts);
3407     }
3408 }
3409 
3410 /* liveness analysis: sync globals back to memory.  */
3411 static void la_global_sync(TCGContext *s, int ng)
3412 {
3413     int i;
3414 
3415     for (i = 0; i < ng; ++i) {
3416         int state = s->temps[i].state;
3417         s->temps[i].state = state | TS_MEM;
3418         if (state == TS_DEAD) {
3419             /* If the global was previously dead, reset prefs.  */
3420             la_reset_pref(&s->temps[i]);
3421         }
3422     }
3423 }
3424 
3425 /*
3426  * liveness analysis: conditional branch: all temps are dead unless
3427  * explicitly live-across-conditional-branch, globals and local temps
3428  * should be synced.
3429  */
3430 static void la_bb_sync(TCGContext *s, int ng, int nt)
3431 {
3432     la_global_sync(s, ng);
3433 
3434     for (int i = ng; i < nt; ++i) {
3435         TCGTemp *ts = &s->temps[i];
3436         int state;
3437 
3438         switch (ts->kind) {
3439         case TEMP_TB:
3440             state = ts->state;
3441             ts->state = state | TS_MEM;
3442             if (state != TS_DEAD) {
3443                 continue;
3444             }
3445             break;
3446         case TEMP_EBB:
3447         case TEMP_CONST:
3448             continue;
3449         default:
3450             g_assert_not_reached();
3451         }
3452         la_reset_pref(&s->temps[i]);
3453     }
3454 }
3455 
3456 /* liveness analysis: sync globals back to memory and kill.  */
3457 static void la_global_kill(TCGContext *s, int ng)
3458 {
3459     int i;
3460 
3461     for (i = 0; i < ng; i++) {
3462         s->temps[i].state = TS_DEAD | TS_MEM;
3463         la_reset_pref(&s->temps[i]);
3464     }
3465 }
3466 
3467 /* liveness analysis: note live globals crossing calls.  */
3468 static void la_cross_call(TCGContext *s, int nt)
3469 {
3470     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3471     int i;
3472 
3473     for (i = 0; i < nt; i++) {
3474         TCGTemp *ts = &s->temps[i];
3475         if (!(ts->state & TS_DEAD)) {
3476             TCGRegSet *pset = la_temp_pref(ts);
3477             TCGRegSet set = *pset;
3478 
3479             set &= mask;
3480             /* If the combination is not possible, restart.  */
3481             if (set == 0) {
3482                 set = tcg_target_available_regs[ts->type] & mask;
3483             }
3484             *pset = set;
3485         }
3486     }
3487 }
3488 
3489 /*
3490  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3491  * to TEMP_EBB, if possible.
3492  */
3493 static void __attribute__((noinline))
3494 liveness_pass_0(TCGContext *s)
3495 {
3496     void * const multiple_ebb = (void *)(uintptr_t)-1;
3497     int nb_temps = s->nb_temps;
3498     TCGOp *op, *ebb;
3499 
3500     for (int i = s->nb_globals; i < nb_temps; ++i) {
3501         s->temps[i].state_ptr = NULL;
3502     }
3503 
3504     /*
3505      * Represent each EBB by the op at which it begins.  In the case of
3506      * the first EBB, this is the first op, otherwise it is a label.
3507      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3508      * within a single EBB, else MULTIPLE_EBB.
3509      */
3510     ebb = QTAILQ_FIRST(&s->ops);
3511     QTAILQ_FOREACH(op, &s->ops, link) {
3512         const TCGOpDef *def;
3513         int nb_oargs, nb_iargs;
3514 
3515         switch (op->opc) {
3516         case INDEX_op_set_label:
3517             ebb = op;
3518             continue;
3519         case INDEX_op_discard:
3520             continue;
3521         case INDEX_op_call:
3522             nb_oargs = TCGOP_CALLO(op);
3523             nb_iargs = TCGOP_CALLI(op);
3524             break;
3525         default:
3526             def = &tcg_op_defs[op->opc];
3527             nb_oargs = def->nb_oargs;
3528             nb_iargs = def->nb_iargs;
3529             break;
3530         }
3531 
3532         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3533             TCGTemp *ts = arg_temp(op->args[i]);
3534 
3535             if (ts->kind != TEMP_TB) {
3536                 continue;
3537             }
3538             if (ts->state_ptr == NULL) {
3539                 ts->state_ptr = ebb;
3540             } else if (ts->state_ptr != ebb) {
3541                 ts->state_ptr = multiple_ebb;
3542             }
3543         }
3544     }
3545 
3546     /*
3547      * For TEMP_TB that turned out not to be used beyond one EBB,
3548      * reduce the liveness to TEMP_EBB.
3549      */
3550     for (int i = s->nb_globals; i < nb_temps; ++i) {
3551         TCGTemp *ts = &s->temps[i];
3552         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3553             ts->kind = TEMP_EBB;
3554         }
3555     }
3556 }
3557 
3558 /* Liveness analysis : update the opc_arg_life array to tell if a
3559    given input arguments is dead. Instructions updating dead
3560    temporaries are removed. */
3561 static void __attribute__((noinline))
3562 liveness_pass_1(TCGContext *s)
3563 {
3564     int nb_globals = s->nb_globals;
3565     int nb_temps = s->nb_temps;
3566     TCGOp *op, *op_prev;
3567     TCGRegSet *prefs;
3568     int i;
3569 
3570     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3571     for (i = 0; i < nb_temps; ++i) {
3572         s->temps[i].state_ptr = prefs + i;
3573     }
3574 
3575     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3576     la_func_end(s, nb_globals, nb_temps);
3577 
3578     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3579         int nb_iargs, nb_oargs;
3580         TCGOpcode opc_new, opc_new2;
3581         bool have_opc_new2;
3582         TCGLifeData arg_life = 0;
3583         TCGTemp *ts;
3584         TCGOpcode opc = op->opc;
3585         const TCGOpDef *def = &tcg_op_defs[opc];
3586 
3587         switch (opc) {
3588         case INDEX_op_call:
3589             {
3590                 const TCGHelperInfo *info = tcg_call_info(op);
3591                 int call_flags = tcg_call_flags(op);
3592 
3593                 nb_oargs = TCGOP_CALLO(op);
3594                 nb_iargs = TCGOP_CALLI(op);
3595 
3596                 /* pure functions can be removed if their result is unused */
3597                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3598                     for (i = 0; i < nb_oargs; i++) {
3599                         ts = arg_temp(op->args[i]);
3600                         if (ts->state != TS_DEAD) {
3601                             goto do_not_remove_call;
3602                         }
3603                     }
3604                     goto do_remove;
3605                 }
3606             do_not_remove_call:
3607 
3608                 /* Output args are dead.  */
3609                 for (i = 0; i < nb_oargs; i++) {
3610                     ts = arg_temp(op->args[i]);
3611                     if (ts->state & TS_DEAD) {
3612                         arg_life |= DEAD_ARG << i;
3613                     }
3614                     if (ts->state & TS_MEM) {
3615                         arg_life |= SYNC_ARG << i;
3616                     }
3617                     ts->state = TS_DEAD;
3618                     la_reset_pref(ts);
3619                 }
3620 
3621                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3622                 memset(op->output_pref, 0, sizeof(op->output_pref));
3623 
3624                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3625                                     TCG_CALL_NO_READ_GLOBALS))) {
3626                     la_global_kill(s, nb_globals);
3627                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3628                     la_global_sync(s, nb_globals);
3629                 }
3630 
3631                 /* Record arguments that die in this helper.  */
3632                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3633                     ts = arg_temp(op->args[i]);
3634                     if (ts->state & TS_DEAD) {
3635                         arg_life |= DEAD_ARG << i;
3636                     }
3637                 }
3638 
3639                 /* For all live registers, remove call-clobbered prefs.  */
3640                 la_cross_call(s, nb_temps);
3641 
3642                 /*
3643                  * Input arguments are live for preceding opcodes.
3644                  *
3645                  * For those arguments that die, and will be allocated in
3646                  * registers, clear the register set for that arg, to be
3647                  * filled in below.  For args that will be on the stack,
3648                  * reset to any available reg.  Process arguments in reverse
3649                  * order so that if a temp is used more than once, the stack
3650                  * reset to max happens before the register reset to 0.
3651                  */
3652                 for (i = nb_iargs - 1; i >= 0; i--) {
3653                     const TCGCallArgumentLoc *loc = &info->in[i];
3654                     ts = arg_temp(op->args[nb_oargs + i]);
3655 
3656                     if (ts->state & TS_DEAD) {
3657                         switch (loc->kind) {
3658                         case TCG_CALL_ARG_NORMAL:
3659                         case TCG_CALL_ARG_EXTEND_U:
3660                         case TCG_CALL_ARG_EXTEND_S:
3661                             if (arg_slot_reg_p(loc->arg_slot)) {
3662                                 *la_temp_pref(ts) = 0;
3663                                 break;
3664                             }
3665                             /* fall through */
3666                         default:
3667                             *la_temp_pref(ts) =
3668                                 tcg_target_available_regs[ts->type];
3669                             break;
3670                         }
3671                         ts->state &= ~TS_DEAD;
3672                     }
3673                 }
3674 
3675                 /*
3676                  * For each input argument, add its input register to prefs.
3677                  * If a temp is used once, this produces a single set bit;
3678                  * if a temp is used multiple times, this produces a set.
3679                  */
3680                 for (i = 0; i < nb_iargs; i++) {
3681                     const TCGCallArgumentLoc *loc = &info->in[i];
3682                     ts = arg_temp(op->args[nb_oargs + i]);
3683 
3684                     switch (loc->kind) {
3685                     case TCG_CALL_ARG_NORMAL:
3686                     case TCG_CALL_ARG_EXTEND_U:
3687                     case TCG_CALL_ARG_EXTEND_S:
3688                         if (arg_slot_reg_p(loc->arg_slot)) {
3689                             tcg_regset_set_reg(*la_temp_pref(ts),
3690                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3691                         }
3692                         break;
3693                     default:
3694                         break;
3695                     }
3696                 }
3697             }
3698             break;
3699         case INDEX_op_insn_start:
3700             break;
3701         case INDEX_op_discard:
3702             /* mark the temporary as dead */
3703             ts = arg_temp(op->args[0]);
3704             ts->state = TS_DEAD;
3705             la_reset_pref(ts);
3706             break;
3707 
3708         case INDEX_op_add2_i32:
3709             opc_new = INDEX_op_add_i32;
3710             goto do_addsub2;
3711         case INDEX_op_sub2_i32:
3712             opc_new = INDEX_op_sub_i32;
3713             goto do_addsub2;
3714         case INDEX_op_add2_i64:
3715             opc_new = INDEX_op_add_i64;
3716             goto do_addsub2;
3717         case INDEX_op_sub2_i64:
3718             opc_new = INDEX_op_sub_i64;
3719         do_addsub2:
3720             nb_iargs = 4;
3721             nb_oargs = 2;
3722             /* Test if the high part of the operation is dead, but not
3723                the low part.  The result can be optimized to a simple
3724                add or sub.  This happens often for x86_64 guest when the
3725                cpu mode is set to 32 bit.  */
3726             if (arg_temp(op->args[1])->state == TS_DEAD) {
3727                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3728                     goto do_remove;
3729                 }
3730                 /* Replace the opcode and adjust the args in place,
3731                    leaving 3 unused args at the end.  */
3732                 op->opc = opc = opc_new;
3733                 op->args[1] = op->args[2];
3734                 op->args[2] = op->args[4];
3735                 /* Fall through and mark the single-word operation live.  */
3736                 nb_iargs = 2;
3737                 nb_oargs = 1;
3738             }
3739             goto do_not_remove;
3740 
3741         case INDEX_op_mulu2_i32:
3742             opc_new = INDEX_op_mul_i32;
3743             opc_new2 = INDEX_op_muluh_i32;
3744             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3745             goto do_mul2;
3746         case INDEX_op_muls2_i32:
3747             opc_new = INDEX_op_mul_i32;
3748             opc_new2 = INDEX_op_mulsh_i32;
3749             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3750             goto do_mul2;
3751         case INDEX_op_mulu2_i64:
3752             opc_new = INDEX_op_mul_i64;
3753             opc_new2 = INDEX_op_muluh_i64;
3754             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3755             goto do_mul2;
3756         case INDEX_op_muls2_i64:
3757             opc_new = INDEX_op_mul_i64;
3758             opc_new2 = INDEX_op_mulsh_i64;
3759             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3760             goto do_mul2;
3761         do_mul2:
3762             nb_iargs = 2;
3763             nb_oargs = 2;
3764             if (arg_temp(op->args[1])->state == TS_DEAD) {
3765                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3766                     /* Both parts of the operation are dead.  */
3767                     goto do_remove;
3768                 }
3769                 /* The high part of the operation is dead; generate the low. */
3770                 op->opc = opc = opc_new;
3771                 op->args[1] = op->args[2];
3772                 op->args[2] = op->args[3];
3773             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3774                 /* The low part of the operation is dead; generate the high. */
3775                 op->opc = opc = opc_new2;
3776                 op->args[0] = op->args[1];
3777                 op->args[1] = op->args[2];
3778                 op->args[2] = op->args[3];
3779             } else {
3780                 goto do_not_remove;
3781             }
3782             /* Mark the single-word operation live.  */
3783             nb_oargs = 1;
3784             goto do_not_remove;
3785 
3786         default:
3787             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3788             nb_iargs = def->nb_iargs;
3789             nb_oargs = def->nb_oargs;
3790 
3791             /* Test if the operation can be removed because all
3792                its outputs are dead. We assume that nb_oargs == 0
3793                implies side effects */
3794             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3795                 for (i = 0; i < nb_oargs; i++) {
3796                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3797                         goto do_not_remove;
3798                     }
3799                 }
3800                 goto do_remove;
3801             }
3802             goto do_not_remove;
3803 
3804         do_remove:
3805             tcg_op_remove(s, op);
3806             break;
3807 
3808         do_not_remove:
3809             for (i = 0; i < nb_oargs; i++) {
3810                 ts = arg_temp(op->args[i]);
3811 
3812                 /* Remember the preference of the uses that followed.  */
3813                 if (i < ARRAY_SIZE(op->output_pref)) {
3814                     op->output_pref[i] = *la_temp_pref(ts);
3815                 }
3816 
3817                 /* Output args are dead.  */
3818                 if (ts->state & TS_DEAD) {
3819                     arg_life |= DEAD_ARG << i;
3820                 }
3821                 if (ts->state & TS_MEM) {
3822                     arg_life |= SYNC_ARG << i;
3823                 }
3824                 ts->state = TS_DEAD;
3825                 la_reset_pref(ts);
3826             }
3827 
3828             /* If end of basic block, update.  */
3829             if (def->flags & TCG_OPF_BB_EXIT) {
3830                 la_func_end(s, nb_globals, nb_temps);
3831             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3832                 la_bb_sync(s, nb_globals, nb_temps);
3833             } else if (def->flags & TCG_OPF_BB_END) {
3834                 la_bb_end(s, nb_globals, nb_temps);
3835             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3836                 la_global_sync(s, nb_globals);
3837                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3838                     la_cross_call(s, nb_temps);
3839                 }
3840             }
3841 
3842             /* Record arguments that die in this opcode.  */
3843             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3844                 ts = arg_temp(op->args[i]);
3845                 if (ts->state & TS_DEAD) {
3846                     arg_life |= DEAD_ARG << i;
3847                 }
3848             }
3849 
3850             /* Input arguments are live for preceding opcodes.  */
3851             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3852                 ts = arg_temp(op->args[i]);
3853                 if (ts->state & TS_DEAD) {
3854                     /* For operands that were dead, initially allow
3855                        all regs for the type.  */
3856                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3857                     ts->state &= ~TS_DEAD;
3858                 }
3859             }
3860 
3861             /* Incorporate constraints for this operand.  */
3862             switch (opc) {
3863             case INDEX_op_mov_i32:
3864             case INDEX_op_mov_i64:
3865                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3866                    have proper constraints.  That said, special case
3867                    moves to propagate preferences backward.  */
3868                 if (IS_DEAD_ARG(1)) {
3869                     *la_temp_pref(arg_temp(op->args[0]))
3870                         = *la_temp_pref(arg_temp(op->args[1]));
3871                 }
3872                 break;
3873 
3874             default:
3875                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3876                     const TCGArgConstraint *ct = &def->args_ct[i];
3877                     TCGRegSet set, *pset;
3878 
3879                     ts = arg_temp(op->args[i]);
3880                     pset = la_temp_pref(ts);
3881                     set = *pset;
3882 
3883                     set &= ct->regs;
3884                     if (ct->ialias) {
3885                         set &= output_pref(op, ct->alias_index);
3886                     }
3887                     /* If the combination is not possible, restart.  */
3888                     if (set == 0) {
3889                         set = ct->regs;
3890                     }
3891                     *pset = set;
3892                 }
3893                 break;
3894             }
3895             break;
3896         }
3897         op->life = arg_life;
3898     }
3899 }
3900 
3901 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3902 static bool __attribute__((noinline))
3903 liveness_pass_2(TCGContext *s)
3904 {
3905     int nb_globals = s->nb_globals;
3906     int nb_temps, i;
3907     bool changes = false;
3908     TCGOp *op, *op_next;
3909 
3910     /* Create a temporary for each indirect global.  */
3911     for (i = 0; i < nb_globals; ++i) {
3912         TCGTemp *its = &s->temps[i];
3913         if (its->indirect_reg) {
3914             TCGTemp *dts = tcg_temp_alloc(s);
3915             dts->type = its->type;
3916             dts->base_type = its->base_type;
3917             dts->temp_subindex = its->temp_subindex;
3918             dts->kind = TEMP_EBB;
3919             its->state_ptr = dts;
3920         } else {
3921             its->state_ptr = NULL;
3922         }
3923         /* All globals begin dead.  */
3924         its->state = TS_DEAD;
3925     }
3926     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3927         TCGTemp *its = &s->temps[i];
3928         its->state_ptr = NULL;
3929         its->state = TS_DEAD;
3930     }
3931 
3932     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3933         TCGOpcode opc = op->opc;
3934         const TCGOpDef *def = &tcg_op_defs[opc];
3935         TCGLifeData arg_life = op->life;
3936         int nb_iargs, nb_oargs, call_flags;
3937         TCGTemp *arg_ts, *dir_ts;
3938 
3939         if (opc == INDEX_op_call) {
3940             nb_oargs = TCGOP_CALLO(op);
3941             nb_iargs = TCGOP_CALLI(op);
3942             call_flags = tcg_call_flags(op);
3943         } else {
3944             nb_iargs = def->nb_iargs;
3945             nb_oargs = def->nb_oargs;
3946 
3947             /* Set flags similar to how calls require.  */
3948             if (def->flags & TCG_OPF_COND_BRANCH) {
3949                 /* Like reading globals: sync_globals */
3950                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3951             } else if (def->flags & TCG_OPF_BB_END) {
3952                 /* Like writing globals: save_globals */
3953                 call_flags = 0;
3954             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3955                 /* Like reading globals: sync_globals */
3956                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3957             } else {
3958                 /* No effect on globals.  */
3959                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3960                               TCG_CALL_NO_WRITE_GLOBALS);
3961             }
3962         }
3963 
3964         /* Make sure that input arguments are available.  */
3965         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3966             arg_ts = arg_temp(op->args[i]);
3967             dir_ts = arg_ts->state_ptr;
3968             if (dir_ts && arg_ts->state == TS_DEAD) {
3969                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3970                                   ? INDEX_op_ld_i32
3971                                   : INDEX_op_ld_i64);
3972                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3973 
3974                 lop->args[0] = temp_arg(dir_ts);
3975                 lop->args[1] = temp_arg(arg_ts->mem_base);
3976                 lop->args[2] = arg_ts->mem_offset;
3977 
3978                 /* Loaded, but synced with memory.  */
3979                 arg_ts->state = TS_MEM;
3980             }
3981         }
3982 
3983         /* Perform input replacement, and mark inputs that became dead.
3984            No action is required except keeping temp_state up to date
3985            so that we reload when needed.  */
3986         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3987             arg_ts = arg_temp(op->args[i]);
3988             dir_ts = arg_ts->state_ptr;
3989             if (dir_ts) {
3990                 op->args[i] = temp_arg(dir_ts);
3991                 changes = true;
3992                 if (IS_DEAD_ARG(i)) {
3993                     arg_ts->state = TS_DEAD;
3994                 }
3995             }
3996         }
3997 
3998         /* Liveness analysis should ensure that the following are
3999            all correct, for call sites and basic block end points.  */
4000         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4001             /* Nothing to do */
4002         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4003             for (i = 0; i < nb_globals; ++i) {
4004                 /* Liveness should see that globals are synced back,
4005                    that is, either TS_DEAD or TS_MEM.  */
4006                 arg_ts = &s->temps[i];
4007                 tcg_debug_assert(arg_ts->state_ptr == 0
4008                                  || arg_ts->state != 0);
4009             }
4010         } else {
4011             for (i = 0; i < nb_globals; ++i) {
4012                 /* Liveness should see that globals are saved back,
4013                    that is, TS_DEAD, waiting to be reloaded.  */
4014                 arg_ts = &s->temps[i];
4015                 tcg_debug_assert(arg_ts->state_ptr == 0
4016                                  || arg_ts->state == TS_DEAD);
4017             }
4018         }
4019 
4020         /* Outputs become available.  */
4021         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
4022             arg_ts = arg_temp(op->args[0]);
4023             dir_ts = arg_ts->state_ptr;
4024             if (dir_ts) {
4025                 op->args[0] = temp_arg(dir_ts);
4026                 changes = true;
4027 
4028                 /* The output is now live and modified.  */
4029                 arg_ts->state = 0;
4030 
4031                 if (NEED_SYNC_ARG(0)) {
4032                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4033                                       ? INDEX_op_st_i32
4034                                       : INDEX_op_st_i64);
4035                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4036                     TCGTemp *out_ts = dir_ts;
4037 
4038                     if (IS_DEAD_ARG(0)) {
4039                         out_ts = arg_temp(op->args[1]);
4040                         arg_ts->state = TS_DEAD;
4041                         tcg_op_remove(s, op);
4042                     } else {
4043                         arg_ts->state = TS_MEM;
4044                     }
4045 
4046                     sop->args[0] = temp_arg(out_ts);
4047                     sop->args[1] = temp_arg(arg_ts->mem_base);
4048                     sop->args[2] = arg_ts->mem_offset;
4049                 } else {
4050                     tcg_debug_assert(!IS_DEAD_ARG(0));
4051                 }
4052             }
4053         } else {
4054             for (i = 0; i < nb_oargs; i++) {
4055                 arg_ts = arg_temp(op->args[i]);
4056                 dir_ts = arg_ts->state_ptr;
4057                 if (!dir_ts) {
4058                     continue;
4059                 }
4060                 op->args[i] = temp_arg(dir_ts);
4061                 changes = true;
4062 
4063                 /* The output is now live and modified.  */
4064                 arg_ts->state = 0;
4065 
4066                 /* Sync outputs upon their last write.  */
4067                 if (NEED_SYNC_ARG(i)) {
4068                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4069                                       ? INDEX_op_st_i32
4070                                       : INDEX_op_st_i64);
4071                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4072 
4073                     sop->args[0] = temp_arg(dir_ts);
4074                     sop->args[1] = temp_arg(arg_ts->mem_base);
4075                     sop->args[2] = arg_ts->mem_offset;
4076 
4077                     arg_ts->state = TS_MEM;
4078                 }
4079                 /* Drop outputs that are dead.  */
4080                 if (IS_DEAD_ARG(i)) {
4081                     arg_ts->state = TS_DEAD;
4082                 }
4083             }
4084         }
4085     }
4086 
4087     return changes;
4088 }
4089 
4090 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4091 {
4092     intptr_t off;
4093     int size, align;
4094 
4095     /* When allocating an object, look at the full type. */
4096     size = tcg_type_size(ts->base_type);
4097     switch (ts->base_type) {
4098     case TCG_TYPE_I32:
4099         align = 4;
4100         break;
4101     case TCG_TYPE_I64:
4102     case TCG_TYPE_V64:
4103         align = 8;
4104         break;
4105     case TCG_TYPE_I128:
4106     case TCG_TYPE_V128:
4107     case TCG_TYPE_V256:
4108         /*
4109          * Note that we do not require aligned storage for V256,
4110          * and that we provide alignment for I128 to match V128,
4111          * even if that's above what the host ABI requires.
4112          */
4113         align = 16;
4114         break;
4115     default:
4116         g_assert_not_reached();
4117     }
4118 
4119     /*
4120      * Assume the stack is sufficiently aligned.
4121      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4122      * and do not require 16 byte vector alignment.  This seems slightly
4123      * easier than fully parameterizing the above switch statement.
4124      */
4125     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4126     off = ROUND_UP(s->current_frame_offset, align);
4127 
4128     /* If we've exhausted the stack frame, restart with a smaller TB. */
4129     if (off + size > s->frame_end) {
4130         tcg_raise_tb_overflow(s);
4131     }
4132     s->current_frame_offset = off + size;
4133 #if defined(__sparc__)
4134     off += TCG_TARGET_STACK_BIAS;
4135 #endif
4136 
4137     /* If the object was subdivided, assign memory to all the parts. */
4138     if (ts->base_type != ts->type) {
4139         int part_size = tcg_type_size(ts->type);
4140         int part_count = size / part_size;
4141 
4142         /*
4143          * Each part is allocated sequentially in tcg_temp_new_internal.
4144          * Jump back to the first part by subtracting the current index.
4145          */
4146         ts -= ts->temp_subindex;
4147         for (int i = 0; i < part_count; ++i) {
4148             ts[i].mem_offset = off + i * part_size;
4149             ts[i].mem_base = s->frame_temp;
4150             ts[i].mem_allocated = 1;
4151         }
4152     } else {
4153         ts->mem_offset = off;
4154         ts->mem_base = s->frame_temp;
4155         ts->mem_allocated = 1;
4156     }
4157 }
4158 
4159 /* Assign @reg to @ts, and update reg_to_temp[]. */
4160 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4161 {
4162     if (ts->val_type == TEMP_VAL_REG) {
4163         TCGReg old = ts->reg;
4164         tcg_debug_assert(s->reg_to_temp[old] == ts);
4165         if (old == reg) {
4166             return;
4167         }
4168         s->reg_to_temp[old] = NULL;
4169     }
4170     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4171     s->reg_to_temp[reg] = ts;
4172     ts->val_type = TEMP_VAL_REG;
4173     ts->reg = reg;
4174 }
4175 
4176 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4177 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4178 {
4179     tcg_debug_assert(type != TEMP_VAL_REG);
4180     if (ts->val_type == TEMP_VAL_REG) {
4181         TCGReg reg = ts->reg;
4182         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4183         s->reg_to_temp[reg] = NULL;
4184     }
4185     ts->val_type = type;
4186 }
4187 
4188 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4189 
4190 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4191    mark it free; otherwise mark it dead.  */
4192 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4193 {
4194     TCGTempVal new_type;
4195 
4196     switch (ts->kind) {
4197     case TEMP_FIXED:
4198         return;
4199     case TEMP_GLOBAL:
4200     case TEMP_TB:
4201         new_type = TEMP_VAL_MEM;
4202         break;
4203     case TEMP_EBB:
4204         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4205         break;
4206     case TEMP_CONST:
4207         new_type = TEMP_VAL_CONST;
4208         break;
4209     default:
4210         g_assert_not_reached();
4211     }
4212     set_temp_val_nonreg(s, ts, new_type);
4213 }
4214 
4215 /* Mark a temporary as dead.  */
4216 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4217 {
4218     temp_free_or_dead(s, ts, 1);
4219 }
4220 
4221 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4222    registers needs to be allocated to store a constant.  If 'free_or_dead'
4223    is non-zero, subsequently release the temporary; if it is positive, the
4224    temp is dead; if it is negative, the temp is free.  */
4225 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4226                       TCGRegSet preferred_regs, int free_or_dead)
4227 {
4228     if (!temp_readonly(ts) && !ts->mem_coherent) {
4229         if (!ts->mem_allocated) {
4230             temp_allocate_frame(s, ts);
4231         }
4232         switch (ts->val_type) {
4233         case TEMP_VAL_CONST:
4234             /* If we're going to free the temp immediately, then we won't
4235                require it later in a register, so attempt to store the
4236                constant to memory directly.  */
4237             if (free_or_dead
4238                 && tcg_out_sti(s, ts->type, ts->val,
4239                                ts->mem_base->reg, ts->mem_offset)) {
4240                 break;
4241             }
4242             temp_load(s, ts, tcg_target_available_regs[ts->type],
4243                       allocated_regs, preferred_regs);
4244             /* fallthrough */
4245 
4246         case TEMP_VAL_REG:
4247             tcg_out_st(s, ts->type, ts->reg,
4248                        ts->mem_base->reg, ts->mem_offset);
4249             break;
4250 
4251         case TEMP_VAL_MEM:
4252             break;
4253 
4254         case TEMP_VAL_DEAD:
4255         default:
4256             g_assert_not_reached();
4257         }
4258         ts->mem_coherent = 1;
4259     }
4260     if (free_or_dead) {
4261         temp_free_or_dead(s, ts, free_or_dead);
4262     }
4263 }
4264 
4265 /* free register 'reg' by spilling the corresponding temporary if necessary */
4266 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4267 {
4268     TCGTemp *ts = s->reg_to_temp[reg];
4269     if (ts != NULL) {
4270         temp_sync(s, ts, allocated_regs, 0, -1);
4271     }
4272 }
4273 
4274 /**
4275  * tcg_reg_alloc:
4276  * @required_regs: Set of registers in which we must allocate.
4277  * @allocated_regs: Set of registers which must be avoided.
4278  * @preferred_regs: Set of registers we should prefer.
4279  * @rev: True if we search the registers in "indirect" order.
4280  *
4281  * The allocated register must be in @required_regs & ~@allocated_regs,
4282  * but if we can put it in @preferred_regs we may save a move later.
4283  */
4284 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4285                             TCGRegSet allocated_regs,
4286                             TCGRegSet preferred_regs, bool rev)
4287 {
4288     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4289     TCGRegSet reg_ct[2];
4290     const int *order;
4291 
4292     reg_ct[1] = required_regs & ~allocated_regs;
4293     tcg_debug_assert(reg_ct[1] != 0);
4294     reg_ct[0] = reg_ct[1] & preferred_regs;
4295 
4296     /* Skip the preferred_regs option if it cannot be satisfied,
4297        or if the preference made no difference.  */
4298     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4299 
4300     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4301 
4302     /* Try free registers, preferences first.  */
4303     for (j = f; j < 2; j++) {
4304         TCGRegSet set = reg_ct[j];
4305 
4306         if (tcg_regset_single(set)) {
4307             /* One register in the set.  */
4308             TCGReg reg = tcg_regset_first(set);
4309             if (s->reg_to_temp[reg] == NULL) {
4310                 return reg;
4311             }
4312         } else {
4313             for (i = 0; i < n; i++) {
4314                 TCGReg reg = order[i];
4315                 if (s->reg_to_temp[reg] == NULL &&
4316                     tcg_regset_test_reg(set, reg)) {
4317                     return reg;
4318                 }
4319             }
4320         }
4321     }
4322 
4323     /* We must spill something.  */
4324     for (j = f; j < 2; j++) {
4325         TCGRegSet set = reg_ct[j];
4326 
4327         if (tcg_regset_single(set)) {
4328             /* One register in the set.  */
4329             TCGReg reg = tcg_regset_first(set);
4330             tcg_reg_free(s, reg, allocated_regs);
4331             return reg;
4332         } else {
4333             for (i = 0; i < n; i++) {
4334                 TCGReg reg = order[i];
4335                 if (tcg_regset_test_reg(set, reg)) {
4336                     tcg_reg_free(s, reg, allocated_regs);
4337                     return reg;
4338                 }
4339             }
4340         }
4341     }
4342 
4343     g_assert_not_reached();
4344 }
4345 
4346 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4347                                  TCGRegSet allocated_regs,
4348                                  TCGRegSet preferred_regs, bool rev)
4349 {
4350     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4351     TCGRegSet reg_ct[2];
4352     const int *order;
4353 
4354     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4355     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4356     tcg_debug_assert(reg_ct[1] != 0);
4357     reg_ct[0] = reg_ct[1] & preferred_regs;
4358 
4359     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4360 
4361     /*
4362      * Skip the preferred_regs option if it cannot be satisfied,
4363      * or if the preference made no difference.
4364      */
4365     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4366 
4367     /*
4368      * Minimize the number of flushes by looking for 2 free registers first,
4369      * then a single flush, then two flushes.
4370      */
4371     for (fmin = 2; fmin >= 0; fmin--) {
4372         for (j = k; j < 2; j++) {
4373             TCGRegSet set = reg_ct[j];
4374 
4375             for (i = 0; i < n; i++) {
4376                 TCGReg reg = order[i];
4377 
4378                 if (tcg_regset_test_reg(set, reg)) {
4379                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4380                     if (f >= fmin) {
4381                         tcg_reg_free(s, reg, allocated_regs);
4382                         tcg_reg_free(s, reg + 1, allocated_regs);
4383                         return reg;
4384                     }
4385                 }
4386             }
4387         }
4388     }
4389     g_assert_not_reached();
4390 }
4391 
4392 /* Make sure the temporary is in a register.  If needed, allocate the register
4393    from DESIRED while avoiding ALLOCATED.  */
4394 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4395                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4396 {
4397     TCGReg reg;
4398 
4399     switch (ts->val_type) {
4400     case TEMP_VAL_REG:
4401         return;
4402     case TEMP_VAL_CONST:
4403         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4404                             preferred_regs, ts->indirect_base);
4405         if (ts->type <= TCG_TYPE_I64) {
4406             tcg_out_movi(s, ts->type, reg, ts->val);
4407         } else {
4408             uint64_t val = ts->val;
4409             MemOp vece = MO_64;
4410 
4411             /*
4412              * Find the minimal vector element that matches the constant.
4413              * The targets will, in general, have to do this search anyway,
4414              * do this generically.
4415              */
4416             if (val == dup_const(MO_8, val)) {
4417                 vece = MO_8;
4418             } else if (val == dup_const(MO_16, val)) {
4419                 vece = MO_16;
4420             } else if (val == dup_const(MO_32, val)) {
4421                 vece = MO_32;
4422             }
4423 
4424             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4425         }
4426         ts->mem_coherent = 0;
4427         break;
4428     case TEMP_VAL_MEM:
4429         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4430                             preferred_regs, ts->indirect_base);
4431         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4432         ts->mem_coherent = 1;
4433         break;
4434     case TEMP_VAL_DEAD:
4435     default:
4436         g_assert_not_reached();
4437     }
4438     set_temp_val_reg(s, ts, reg);
4439 }
4440 
4441 /* Save a temporary to memory. 'allocated_regs' is used in case a
4442    temporary registers needs to be allocated to store a constant.  */
4443 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4444 {
4445     /* The liveness analysis already ensures that globals are back
4446        in memory. Keep an tcg_debug_assert for safety. */
4447     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4448 }
4449 
4450 /* save globals to their canonical location and assume they can be
4451    modified be the following code. 'allocated_regs' is used in case a
4452    temporary registers needs to be allocated to store a constant. */
4453 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4454 {
4455     int i, n;
4456 
4457     for (i = 0, n = s->nb_globals; i < n; i++) {
4458         temp_save(s, &s->temps[i], allocated_regs);
4459     }
4460 }
4461 
4462 /* sync globals to their canonical location and assume they can be
4463    read by the following code. 'allocated_regs' is used in case a
4464    temporary registers needs to be allocated to store a constant. */
4465 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4466 {
4467     int i, n;
4468 
4469     for (i = 0, n = s->nb_globals; i < n; i++) {
4470         TCGTemp *ts = &s->temps[i];
4471         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4472                          || ts->kind == TEMP_FIXED
4473                          || ts->mem_coherent);
4474     }
4475 }
4476 
4477 /* at the end of a basic block, we assume all temporaries are dead and
4478    all globals are stored at their canonical location. */
4479 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4480 {
4481     int i;
4482 
4483     for (i = s->nb_globals; i < s->nb_temps; i++) {
4484         TCGTemp *ts = &s->temps[i];
4485 
4486         switch (ts->kind) {
4487         case TEMP_TB:
4488             temp_save(s, ts, allocated_regs);
4489             break;
4490         case TEMP_EBB:
4491             /* The liveness analysis already ensures that temps are dead.
4492                Keep an tcg_debug_assert for safety. */
4493             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4494             break;
4495         case TEMP_CONST:
4496             /* Similarly, we should have freed any allocated register. */
4497             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4498             break;
4499         default:
4500             g_assert_not_reached();
4501         }
4502     }
4503 
4504     save_globals(s, allocated_regs);
4505 }
4506 
4507 /*
4508  * At a conditional branch, we assume all temporaries are dead unless
4509  * explicitly live-across-conditional-branch; all globals and local
4510  * temps are synced to their location.
4511  */
4512 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4513 {
4514     sync_globals(s, allocated_regs);
4515 
4516     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4517         TCGTemp *ts = &s->temps[i];
4518         /*
4519          * The liveness analysis already ensures that temps are dead.
4520          * Keep tcg_debug_asserts for safety.
4521          */
4522         switch (ts->kind) {
4523         case TEMP_TB:
4524             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4525             break;
4526         case TEMP_EBB:
4527         case TEMP_CONST:
4528             break;
4529         default:
4530             g_assert_not_reached();
4531         }
4532     }
4533 }
4534 
4535 /*
4536  * Specialized code generation for INDEX_op_mov_* with a constant.
4537  */
4538 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4539                                   tcg_target_ulong val, TCGLifeData arg_life,
4540                                   TCGRegSet preferred_regs)
4541 {
4542     /* ENV should not be modified.  */
4543     tcg_debug_assert(!temp_readonly(ots));
4544 
4545     /* The movi is not explicitly generated here.  */
4546     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4547     ots->val = val;
4548     ots->mem_coherent = 0;
4549     if (NEED_SYNC_ARG(0)) {
4550         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4551     } else if (IS_DEAD_ARG(0)) {
4552         temp_dead(s, ots);
4553     }
4554 }
4555 
4556 /*
4557  * Specialized code generation for INDEX_op_mov_*.
4558  */
4559 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4560 {
4561     const TCGLifeData arg_life = op->life;
4562     TCGRegSet allocated_regs, preferred_regs;
4563     TCGTemp *ts, *ots;
4564     TCGType otype, itype;
4565     TCGReg oreg, ireg;
4566 
4567     allocated_regs = s->reserved_regs;
4568     preferred_regs = output_pref(op, 0);
4569     ots = arg_temp(op->args[0]);
4570     ts = arg_temp(op->args[1]);
4571 
4572     /* ENV should not be modified.  */
4573     tcg_debug_assert(!temp_readonly(ots));
4574 
4575     /* Note that otype != itype for no-op truncation.  */
4576     otype = ots->type;
4577     itype = ts->type;
4578 
4579     if (ts->val_type == TEMP_VAL_CONST) {
4580         /* propagate constant or generate sti */
4581         tcg_target_ulong val = ts->val;
4582         if (IS_DEAD_ARG(1)) {
4583             temp_dead(s, ts);
4584         }
4585         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4586         return;
4587     }
4588 
4589     /* If the source value is in memory we're going to be forced
4590        to have it in a register in order to perform the copy.  Copy
4591        the SOURCE value into its own register first, that way we
4592        don't have to reload SOURCE the next time it is used. */
4593     if (ts->val_type == TEMP_VAL_MEM) {
4594         temp_load(s, ts, tcg_target_available_regs[itype],
4595                   allocated_regs, preferred_regs);
4596     }
4597     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4598     ireg = ts->reg;
4599 
4600     if (IS_DEAD_ARG(0)) {
4601         /* mov to a non-saved dead register makes no sense (even with
4602            liveness analysis disabled). */
4603         tcg_debug_assert(NEED_SYNC_ARG(0));
4604         if (!ots->mem_allocated) {
4605             temp_allocate_frame(s, ots);
4606         }
4607         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4608         if (IS_DEAD_ARG(1)) {
4609             temp_dead(s, ts);
4610         }
4611         temp_dead(s, ots);
4612         return;
4613     }
4614 
4615     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4616         /*
4617          * The mov can be suppressed.  Kill input first, so that it
4618          * is unlinked from reg_to_temp, then set the output to the
4619          * reg that we saved from the input.
4620          */
4621         temp_dead(s, ts);
4622         oreg = ireg;
4623     } else {
4624         if (ots->val_type == TEMP_VAL_REG) {
4625             oreg = ots->reg;
4626         } else {
4627             /* Make sure to not spill the input register during allocation. */
4628             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4629                                  allocated_regs | ((TCGRegSet)1 << ireg),
4630                                  preferred_regs, ots->indirect_base);
4631         }
4632         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4633             /*
4634              * Cross register class move not supported.
4635              * Store the source register into the destination slot
4636              * and leave the destination temp as TEMP_VAL_MEM.
4637              */
4638             assert(!temp_readonly(ots));
4639             if (!ts->mem_allocated) {
4640                 temp_allocate_frame(s, ots);
4641             }
4642             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4643             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4644             ots->mem_coherent = 1;
4645             return;
4646         }
4647     }
4648     set_temp_val_reg(s, ots, oreg);
4649     ots->mem_coherent = 0;
4650 
4651     if (NEED_SYNC_ARG(0)) {
4652         temp_sync(s, ots, allocated_regs, 0, 0);
4653     }
4654 }
4655 
4656 /*
4657  * Specialized code generation for INDEX_op_dup_vec.
4658  */
4659 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4660 {
4661     const TCGLifeData arg_life = op->life;
4662     TCGRegSet dup_out_regs, dup_in_regs;
4663     TCGTemp *its, *ots;
4664     TCGType itype, vtype;
4665     unsigned vece;
4666     int lowpart_ofs;
4667     bool ok;
4668 
4669     ots = arg_temp(op->args[0]);
4670     its = arg_temp(op->args[1]);
4671 
4672     /* ENV should not be modified.  */
4673     tcg_debug_assert(!temp_readonly(ots));
4674 
4675     itype = its->type;
4676     vece = TCGOP_VECE(op);
4677     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4678 
4679     if (its->val_type == TEMP_VAL_CONST) {
4680         /* Propagate constant via movi -> dupi.  */
4681         tcg_target_ulong val = its->val;
4682         if (IS_DEAD_ARG(1)) {
4683             temp_dead(s, its);
4684         }
4685         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4686         return;
4687     }
4688 
4689     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4690     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4691 
4692     /* Allocate the output register now.  */
4693     if (ots->val_type != TEMP_VAL_REG) {
4694         TCGRegSet allocated_regs = s->reserved_regs;
4695         TCGReg oreg;
4696 
4697         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4698             /* Make sure to not spill the input register. */
4699             tcg_regset_set_reg(allocated_regs, its->reg);
4700         }
4701         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4702                              output_pref(op, 0), ots->indirect_base);
4703         set_temp_val_reg(s, ots, oreg);
4704     }
4705 
4706     switch (its->val_type) {
4707     case TEMP_VAL_REG:
4708         /*
4709          * The dup constriaints must be broad, covering all possible VECE.
4710          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4711          * to fail, indicating that extra moves are required for that case.
4712          */
4713         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4714             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4715                 goto done;
4716             }
4717             /* Try again from memory or a vector input register.  */
4718         }
4719         if (!its->mem_coherent) {
4720             /*
4721              * The input register is not synced, and so an extra store
4722              * would be required to use memory.  Attempt an integer-vector
4723              * register move first.  We do not have a TCGRegSet for this.
4724              */
4725             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4726                 break;
4727             }
4728             /* Sync the temp back to its slot and load from there.  */
4729             temp_sync(s, its, s->reserved_regs, 0, 0);
4730         }
4731         /* fall through */
4732 
4733     case TEMP_VAL_MEM:
4734         lowpart_ofs = 0;
4735         if (HOST_BIG_ENDIAN) {
4736             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4737         }
4738         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4739                              its->mem_offset + lowpart_ofs)) {
4740             goto done;
4741         }
4742         /* Load the input into the destination vector register. */
4743         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4744         break;
4745 
4746     default:
4747         g_assert_not_reached();
4748     }
4749 
4750     /* We now have a vector input register, so dup must succeed. */
4751     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4752     tcg_debug_assert(ok);
4753 
4754  done:
4755     ots->mem_coherent = 0;
4756     if (IS_DEAD_ARG(1)) {
4757         temp_dead(s, its);
4758     }
4759     if (NEED_SYNC_ARG(0)) {
4760         temp_sync(s, ots, s->reserved_regs, 0, 0);
4761     }
4762     if (IS_DEAD_ARG(0)) {
4763         temp_dead(s, ots);
4764     }
4765 }
4766 
4767 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4768 {
4769     const TCGLifeData arg_life = op->life;
4770     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4771     TCGRegSet i_allocated_regs;
4772     TCGRegSet o_allocated_regs;
4773     int i, k, nb_iargs, nb_oargs;
4774     TCGReg reg;
4775     TCGArg arg;
4776     const TCGArgConstraint *arg_ct;
4777     TCGTemp *ts;
4778     TCGArg new_args[TCG_MAX_OP_ARGS];
4779     int const_args[TCG_MAX_OP_ARGS];
4780 
4781     nb_oargs = def->nb_oargs;
4782     nb_iargs = def->nb_iargs;
4783 
4784     /* copy constants */
4785     memcpy(new_args + nb_oargs + nb_iargs,
4786            op->args + nb_oargs + nb_iargs,
4787            sizeof(TCGArg) * def->nb_cargs);
4788 
4789     i_allocated_regs = s->reserved_regs;
4790     o_allocated_regs = s->reserved_regs;
4791 
4792     /* satisfy input constraints */
4793     for (k = 0; k < nb_iargs; k++) {
4794         TCGRegSet i_preferred_regs, i_required_regs;
4795         bool allocate_new_reg, copyto_new_reg;
4796         TCGTemp *ts2;
4797         int i1, i2;
4798 
4799         i = def->args_ct[nb_oargs + k].sort_index;
4800         arg = op->args[i];
4801         arg_ct = &def->args_ct[i];
4802         ts = arg_temp(arg);
4803 
4804         if (ts->val_type == TEMP_VAL_CONST
4805             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct, TCGOP_VECE(op))) {
4806             /* constant is OK for instruction */
4807             const_args[i] = 1;
4808             new_args[i] = ts->val;
4809             continue;
4810         }
4811 
4812         reg = ts->reg;
4813         i_preferred_regs = 0;
4814         i_required_regs = arg_ct->regs;
4815         allocate_new_reg = false;
4816         copyto_new_reg = false;
4817 
4818         switch (arg_ct->pair) {
4819         case 0: /* not paired */
4820             if (arg_ct->ialias) {
4821                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4822 
4823                 /*
4824                  * If the input is readonly, then it cannot also be an
4825                  * output and aliased to itself.  If the input is not
4826                  * dead after the instruction, we must allocate a new
4827                  * register and move it.
4828                  */
4829                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4830                     || def->args_ct[arg_ct->alias_index].newreg) {
4831                     allocate_new_reg = true;
4832                 } else if (ts->val_type == TEMP_VAL_REG) {
4833                     /*
4834                      * Check if the current register has already been
4835                      * allocated for another input.
4836                      */
4837                     allocate_new_reg =
4838                         tcg_regset_test_reg(i_allocated_regs, reg);
4839                 }
4840             }
4841             if (!allocate_new_reg) {
4842                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4843                           i_preferred_regs);
4844                 reg = ts->reg;
4845                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4846             }
4847             if (allocate_new_reg) {
4848                 /*
4849                  * Allocate a new register matching the constraint
4850                  * and move the temporary register into it.
4851                  */
4852                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4853                           i_allocated_regs, 0);
4854                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4855                                     i_preferred_regs, ts->indirect_base);
4856                 copyto_new_reg = true;
4857             }
4858             break;
4859 
4860         case 1:
4861             /* First of an input pair; if i1 == i2, the second is an output. */
4862             i1 = i;
4863             i2 = arg_ct->pair_index;
4864             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4865 
4866             /*
4867              * It is easier to default to allocating a new pair
4868              * and to identify a few cases where it's not required.
4869              */
4870             if (arg_ct->ialias) {
4871                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4872                 if (IS_DEAD_ARG(i1) &&
4873                     IS_DEAD_ARG(i2) &&
4874                     !temp_readonly(ts) &&
4875                     ts->val_type == TEMP_VAL_REG &&
4876                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4877                     tcg_regset_test_reg(i_required_regs, reg) &&
4878                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4879                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4880                     (ts2
4881                      ? ts2->val_type == TEMP_VAL_REG &&
4882                        ts2->reg == reg + 1 &&
4883                        !temp_readonly(ts2)
4884                      : s->reg_to_temp[reg + 1] == NULL)) {
4885                     break;
4886                 }
4887             } else {
4888                 /* Without aliasing, the pair must also be an input. */
4889                 tcg_debug_assert(ts2);
4890                 if (ts->val_type == TEMP_VAL_REG &&
4891                     ts2->val_type == TEMP_VAL_REG &&
4892                     ts2->reg == reg + 1 &&
4893                     tcg_regset_test_reg(i_required_regs, reg)) {
4894                     break;
4895                 }
4896             }
4897             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4898                                      0, ts->indirect_base);
4899             goto do_pair;
4900 
4901         case 2: /* pair second */
4902             reg = new_args[arg_ct->pair_index] + 1;
4903             goto do_pair;
4904 
4905         case 3: /* ialias with second output, no first input */
4906             tcg_debug_assert(arg_ct->ialias);
4907             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4908 
4909             if (IS_DEAD_ARG(i) &&
4910                 !temp_readonly(ts) &&
4911                 ts->val_type == TEMP_VAL_REG &&
4912                 reg > 0 &&
4913                 s->reg_to_temp[reg - 1] == NULL &&
4914                 tcg_regset_test_reg(i_required_regs, reg) &&
4915                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4916                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4917                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4918                 break;
4919             }
4920             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4921                                      i_allocated_regs, 0,
4922                                      ts->indirect_base);
4923             tcg_regset_set_reg(i_allocated_regs, reg);
4924             reg += 1;
4925             goto do_pair;
4926 
4927         do_pair:
4928             /*
4929              * If an aliased input is not dead after the instruction,
4930              * we must allocate a new register and move it.
4931              */
4932             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4933                 TCGRegSet t_allocated_regs = i_allocated_regs;
4934 
4935                 /*
4936                  * Because of the alias, and the continued life, make sure
4937                  * that the temp is somewhere *other* than the reg pair,
4938                  * and we get a copy in reg.
4939                  */
4940                 tcg_regset_set_reg(t_allocated_regs, reg);
4941                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4942                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4943                     /* If ts was already in reg, copy it somewhere else. */
4944                     TCGReg nr;
4945                     bool ok;
4946 
4947                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4948                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4949                                        t_allocated_regs, 0, ts->indirect_base);
4950                     ok = tcg_out_mov(s, ts->type, nr, reg);
4951                     tcg_debug_assert(ok);
4952 
4953                     set_temp_val_reg(s, ts, nr);
4954                 } else {
4955                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4956                               t_allocated_regs, 0);
4957                     copyto_new_reg = true;
4958                 }
4959             } else {
4960                 /* Preferably allocate to reg, otherwise copy. */
4961                 i_required_regs = (TCGRegSet)1 << reg;
4962                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4963                           i_preferred_regs);
4964                 copyto_new_reg = ts->reg != reg;
4965             }
4966             break;
4967 
4968         default:
4969             g_assert_not_reached();
4970         }
4971 
4972         if (copyto_new_reg) {
4973             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4974                 /*
4975                  * Cross register class move not supported.  Sync the
4976                  * temp back to its slot and load from there.
4977                  */
4978                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4979                 tcg_out_ld(s, ts->type, reg,
4980                            ts->mem_base->reg, ts->mem_offset);
4981             }
4982         }
4983         new_args[i] = reg;
4984         const_args[i] = 0;
4985         tcg_regset_set_reg(i_allocated_regs, reg);
4986     }
4987 
4988     /* mark dead temporaries and free the associated registers */
4989     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4990         if (IS_DEAD_ARG(i)) {
4991             temp_dead(s, arg_temp(op->args[i]));
4992         }
4993     }
4994 
4995     if (def->flags & TCG_OPF_COND_BRANCH) {
4996         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4997     } else if (def->flags & TCG_OPF_BB_END) {
4998         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4999     } else {
5000         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5001             /* XXX: permit generic clobber register list ? */
5002             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5003                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5004                     tcg_reg_free(s, i, i_allocated_regs);
5005                 }
5006             }
5007         }
5008         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5009             /* sync globals if the op has side effects and might trigger
5010                an exception. */
5011             sync_globals(s, i_allocated_regs);
5012         }
5013 
5014         /* satisfy the output constraints */
5015         for(k = 0; k < nb_oargs; k++) {
5016             i = def->args_ct[k].sort_index;
5017             arg = op->args[i];
5018             arg_ct = &def->args_ct[i];
5019             ts = arg_temp(arg);
5020 
5021             /* ENV should not be modified.  */
5022             tcg_debug_assert(!temp_readonly(ts));
5023 
5024             switch (arg_ct->pair) {
5025             case 0: /* not paired */
5026                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5027                     reg = new_args[arg_ct->alias_index];
5028                 } else if (arg_ct->newreg) {
5029                     reg = tcg_reg_alloc(s, arg_ct->regs,
5030                                         i_allocated_regs | o_allocated_regs,
5031                                         output_pref(op, k), ts->indirect_base);
5032                 } else {
5033                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5034                                         output_pref(op, k), ts->indirect_base);
5035                 }
5036                 break;
5037 
5038             case 1: /* first of pair */
5039                 tcg_debug_assert(!arg_ct->newreg);
5040                 if (arg_ct->oalias) {
5041                     reg = new_args[arg_ct->alias_index];
5042                     break;
5043                 }
5044                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5045                                          output_pref(op, k), ts->indirect_base);
5046                 break;
5047 
5048             case 2: /* second of pair */
5049                 tcg_debug_assert(!arg_ct->newreg);
5050                 if (arg_ct->oalias) {
5051                     reg = new_args[arg_ct->alias_index];
5052                 } else {
5053                     reg = new_args[arg_ct->pair_index] + 1;
5054                 }
5055                 break;
5056 
5057             case 3: /* first of pair, aliasing with a second input */
5058                 tcg_debug_assert(!arg_ct->newreg);
5059                 reg = new_args[arg_ct->pair_index] - 1;
5060                 break;
5061 
5062             default:
5063                 g_assert_not_reached();
5064             }
5065             tcg_regset_set_reg(o_allocated_regs, reg);
5066             set_temp_val_reg(s, ts, reg);
5067             ts->mem_coherent = 0;
5068             new_args[i] = reg;
5069         }
5070     }
5071 
5072     /* emit instruction */
5073     switch (op->opc) {
5074     case INDEX_op_ext8s_i32:
5075         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5076         break;
5077     case INDEX_op_ext8s_i64:
5078         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5079         break;
5080     case INDEX_op_ext8u_i32:
5081     case INDEX_op_ext8u_i64:
5082         tcg_out_ext8u(s, new_args[0], new_args[1]);
5083         break;
5084     case INDEX_op_ext16s_i32:
5085         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5086         break;
5087     case INDEX_op_ext16s_i64:
5088         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5089         break;
5090     case INDEX_op_ext16u_i32:
5091     case INDEX_op_ext16u_i64:
5092         tcg_out_ext16u(s, new_args[0], new_args[1]);
5093         break;
5094     case INDEX_op_ext32s_i64:
5095         tcg_out_ext32s(s, new_args[0], new_args[1]);
5096         break;
5097     case INDEX_op_ext32u_i64:
5098         tcg_out_ext32u(s, new_args[0], new_args[1]);
5099         break;
5100     case INDEX_op_ext_i32_i64:
5101         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5102         break;
5103     case INDEX_op_extu_i32_i64:
5104         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5105         break;
5106     case INDEX_op_extrl_i64_i32:
5107         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5108         break;
5109     default:
5110         if (def->flags & TCG_OPF_VECTOR) {
5111             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
5112                            new_args, const_args);
5113         } else {
5114             tcg_out_op(s, op->opc, new_args, const_args);
5115         }
5116         break;
5117     }
5118 
5119     /* move the outputs in the correct register if needed */
5120     for(i = 0; i < nb_oargs; i++) {
5121         ts = arg_temp(op->args[i]);
5122 
5123         /* ENV should not be modified.  */
5124         tcg_debug_assert(!temp_readonly(ts));
5125 
5126         if (NEED_SYNC_ARG(i)) {
5127             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5128         } else if (IS_DEAD_ARG(i)) {
5129             temp_dead(s, ts);
5130         }
5131     }
5132 }
5133 
5134 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5135 {
5136     const TCGLifeData arg_life = op->life;
5137     TCGTemp *ots, *itsl, *itsh;
5138     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5139 
5140     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5141     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5142     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5143 
5144     ots = arg_temp(op->args[0]);
5145     itsl = arg_temp(op->args[1]);
5146     itsh = arg_temp(op->args[2]);
5147 
5148     /* ENV should not be modified.  */
5149     tcg_debug_assert(!temp_readonly(ots));
5150 
5151     /* Allocate the output register now.  */
5152     if (ots->val_type != TEMP_VAL_REG) {
5153         TCGRegSet allocated_regs = s->reserved_regs;
5154         TCGRegSet dup_out_regs =
5155             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5156         TCGReg oreg;
5157 
5158         /* Make sure to not spill the input registers. */
5159         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5160             tcg_regset_set_reg(allocated_regs, itsl->reg);
5161         }
5162         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5163             tcg_regset_set_reg(allocated_regs, itsh->reg);
5164         }
5165 
5166         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5167                              output_pref(op, 0), ots->indirect_base);
5168         set_temp_val_reg(s, ots, oreg);
5169     }
5170 
5171     /* Promote dup2 of immediates to dupi_vec. */
5172     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5173         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5174         MemOp vece = MO_64;
5175 
5176         if (val == dup_const(MO_8, val)) {
5177             vece = MO_8;
5178         } else if (val == dup_const(MO_16, val)) {
5179             vece = MO_16;
5180         } else if (val == dup_const(MO_32, val)) {
5181             vece = MO_32;
5182         }
5183 
5184         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5185         goto done;
5186     }
5187 
5188     /* If the two inputs form one 64-bit value, try dupm_vec. */
5189     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5190         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5191         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5192         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5193 
5194         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5195         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5196 
5197         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5198                              its->mem_base->reg, its->mem_offset)) {
5199             goto done;
5200         }
5201     }
5202 
5203     /* Fall back to generic expansion. */
5204     return false;
5205 
5206  done:
5207     ots->mem_coherent = 0;
5208     if (IS_DEAD_ARG(1)) {
5209         temp_dead(s, itsl);
5210     }
5211     if (IS_DEAD_ARG(2)) {
5212         temp_dead(s, itsh);
5213     }
5214     if (NEED_SYNC_ARG(0)) {
5215         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5216     } else if (IS_DEAD_ARG(0)) {
5217         temp_dead(s, ots);
5218     }
5219     return true;
5220 }
5221 
5222 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5223                          TCGRegSet allocated_regs)
5224 {
5225     if (ts->val_type == TEMP_VAL_REG) {
5226         if (ts->reg != reg) {
5227             tcg_reg_free(s, reg, allocated_regs);
5228             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5229                 /*
5230                  * Cross register class move not supported.  Sync the
5231                  * temp back to its slot and load from there.
5232                  */
5233                 temp_sync(s, ts, allocated_regs, 0, 0);
5234                 tcg_out_ld(s, ts->type, reg,
5235                            ts->mem_base->reg, ts->mem_offset);
5236             }
5237         }
5238     } else {
5239         TCGRegSet arg_set = 0;
5240 
5241         tcg_reg_free(s, reg, allocated_regs);
5242         tcg_regset_set_reg(arg_set, reg);
5243         temp_load(s, ts, arg_set, allocated_regs, 0);
5244     }
5245 }
5246 
5247 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5248                          TCGRegSet allocated_regs)
5249 {
5250     /*
5251      * When the destination is on the stack, load up the temp and store.
5252      * If there are many call-saved registers, the temp might live to
5253      * see another use; otherwise it'll be discarded.
5254      */
5255     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5256     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5257                arg_slot_stk_ofs(arg_slot));
5258 }
5259 
5260 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5261                             TCGTemp *ts, TCGRegSet *allocated_regs)
5262 {
5263     if (arg_slot_reg_p(l->arg_slot)) {
5264         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5265         load_arg_reg(s, reg, ts, *allocated_regs);
5266         tcg_regset_set_reg(*allocated_regs, reg);
5267     } else {
5268         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5269     }
5270 }
5271 
5272 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5273                          intptr_t ref_off, TCGRegSet *allocated_regs)
5274 {
5275     TCGReg reg;
5276 
5277     if (arg_slot_reg_p(arg_slot)) {
5278         reg = tcg_target_call_iarg_regs[arg_slot];
5279         tcg_reg_free(s, reg, *allocated_regs);
5280         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5281         tcg_regset_set_reg(*allocated_regs, reg);
5282     } else {
5283         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5284                             *allocated_regs, 0, false);
5285         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5286         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5287                    arg_slot_stk_ofs(arg_slot));
5288     }
5289 }
5290 
5291 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5292 {
5293     const int nb_oargs = TCGOP_CALLO(op);
5294     const int nb_iargs = TCGOP_CALLI(op);
5295     const TCGLifeData arg_life = op->life;
5296     const TCGHelperInfo *info = tcg_call_info(op);
5297     TCGRegSet allocated_regs = s->reserved_regs;
5298     int i;
5299 
5300     /*
5301      * Move inputs into place in reverse order,
5302      * so that we place stacked arguments first.
5303      */
5304     for (i = nb_iargs - 1; i >= 0; --i) {
5305         const TCGCallArgumentLoc *loc = &info->in[i];
5306         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5307 
5308         switch (loc->kind) {
5309         case TCG_CALL_ARG_NORMAL:
5310         case TCG_CALL_ARG_EXTEND_U:
5311         case TCG_CALL_ARG_EXTEND_S:
5312             load_arg_normal(s, loc, ts, &allocated_regs);
5313             break;
5314         case TCG_CALL_ARG_BY_REF:
5315             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5316             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5317                          arg_slot_stk_ofs(loc->ref_slot),
5318                          &allocated_regs);
5319             break;
5320         case TCG_CALL_ARG_BY_REF_N:
5321             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5322             break;
5323         default:
5324             g_assert_not_reached();
5325         }
5326     }
5327 
5328     /* Mark dead temporaries and free the associated registers.  */
5329     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5330         if (IS_DEAD_ARG(i)) {
5331             temp_dead(s, arg_temp(op->args[i]));
5332         }
5333     }
5334 
5335     /* Clobber call registers.  */
5336     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5337         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5338             tcg_reg_free(s, i, allocated_regs);
5339         }
5340     }
5341 
5342     /*
5343      * Save globals if they might be written by the helper,
5344      * sync them if they might be read.
5345      */
5346     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5347         /* Nothing to do */
5348     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5349         sync_globals(s, allocated_regs);
5350     } else {
5351         save_globals(s, allocated_regs);
5352     }
5353 
5354     /*
5355      * If the ABI passes a pointer to the returned struct as the first
5356      * argument, load that now.  Pass a pointer to the output home slot.
5357      */
5358     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5359         TCGTemp *ts = arg_temp(op->args[0]);
5360 
5361         if (!ts->mem_allocated) {
5362             temp_allocate_frame(s, ts);
5363         }
5364         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5365     }
5366 
5367     tcg_out_call(s, tcg_call_func(op), info);
5368 
5369     /* Assign output registers and emit moves if needed.  */
5370     switch (info->out_kind) {
5371     case TCG_CALL_RET_NORMAL:
5372         for (i = 0; i < nb_oargs; i++) {
5373             TCGTemp *ts = arg_temp(op->args[i]);
5374             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5375 
5376             /* ENV should not be modified.  */
5377             tcg_debug_assert(!temp_readonly(ts));
5378 
5379             set_temp_val_reg(s, ts, reg);
5380             ts->mem_coherent = 0;
5381         }
5382         break;
5383 
5384     case TCG_CALL_RET_BY_VEC:
5385         {
5386             TCGTemp *ts = arg_temp(op->args[0]);
5387 
5388             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5389             tcg_debug_assert(ts->temp_subindex == 0);
5390             if (!ts->mem_allocated) {
5391                 temp_allocate_frame(s, ts);
5392             }
5393             tcg_out_st(s, TCG_TYPE_V128,
5394                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5395                        ts->mem_base->reg, ts->mem_offset);
5396         }
5397         /* fall through to mark all parts in memory */
5398 
5399     case TCG_CALL_RET_BY_REF:
5400         /* The callee has performed a write through the reference. */
5401         for (i = 0; i < nb_oargs; i++) {
5402             TCGTemp *ts = arg_temp(op->args[i]);
5403             ts->val_type = TEMP_VAL_MEM;
5404         }
5405         break;
5406 
5407     default:
5408         g_assert_not_reached();
5409     }
5410 
5411     /* Flush or discard output registers as needed. */
5412     for (i = 0; i < nb_oargs; i++) {
5413         TCGTemp *ts = arg_temp(op->args[i]);
5414         if (NEED_SYNC_ARG(i)) {
5415             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5416         } else if (IS_DEAD_ARG(i)) {
5417             temp_dead(s, ts);
5418         }
5419     }
5420 }
5421 
5422 /**
5423  * atom_and_align_for_opc:
5424  * @s: tcg context
5425  * @opc: memory operation code
5426  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5427  * @allow_two_ops: true if we are prepared to issue two operations
5428  *
5429  * Return the alignment and atomicity to use for the inline fast path
5430  * for the given memory operation.  The alignment may be larger than
5431  * that specified in @opc, and the correct alignment will be diagnosed
5432  * by the slow path helper.
5433  *
5434  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5435  * and issue two loads or stores for subalignment.
5436  */
5437 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5438                                            MemOp host_atom, bool allow_two_ops)
5439 {
5440     MemOp align = get_alignment_bits(opc);
5441     MemOp size = opc & MO_SIZE;
5442     MemOp half = size ? size - 1 : 0;
5443     MemOp atmax;
5444     MemOp atom;
5445 
5446     /* When serialized, no further atomicity required.  */
5447     if (s->gen_tb->cflags & CF_PARALLEL) {
5448         atom = opc & MO_ATOM_MASK;
5449     } else {
5450         atom = MO_ATOM_NONE;
5451     }
5452 
5453     switch (atom) {
5454     case MO_ATOM_NONE:
5455         /* The operation requires no specific atomicity. */
5456         atmax = MO_8;
5457         break;
5458 
5459     case MO_ATOM_IFALIGN:
5460         atmax = size;
5461         break;
5462 
5463     case MO_ATOM_IFALIGN_PAIR:
5464         atmax = half;
5465         break;
5466 
5467     case MO_ATOM_WITHIN16:
5468         atmax = size;
5469         if (size == MO_128) {
5470             /* Misalignment implies !within16, and therefore no atomicity. */
5471         } else if (host_atom != MO_ATOM_WITHIN16) {
5472             /* The host does not implement within16, so require alignment. */
5473             align = MAX(align, size);
5474         }
5475         break;
5476 
5477     case MO_ATOM_WITHIN16_PAIR:
5478         atmax = size;
5479         /*
5480          * Misalignment implies !within16, and therefore half atomicity.
5481          * Any host prepared for two operations can implement this with
5482          * half alignment.
5483          */
5484         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5485             align = MAX(align, half);
5486         }
5487         break;
5488 
5489     case MO_ATOM_SUBALIGN:
5490         atmax = size;
5491         if (host_atom != MO_ATOM_SUBALIGN) {
5492             /* If unaligned but not odd, there are subobjects up to half. */
5493             if (allow_two_ops) {
5494                 align = MAX(align, half);
5495             } else {
5496                 align = MAX(align, size);
5497             }
5498         }
5499         break;
5500 
5501     default:
5502         g_assert_not_reached();
5503     }
5504 
5505     return (TCGAtomAlign){ .atom = atmax, .align = align };
5506 }
5507 
5508 /*
5509  * Similarly for qemu_ld/st slow path helpers.
5510  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5511  * using only the provided backend tcg_out_* functions.
5512  */
5513 
5514 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5515 {
5516     int ofs = arg_slot_stk_ofs(slot);
5517 
5518     /*
5519      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5520      * require extension to uint64_t, adjust the address for uint32_t.
5521      */
5522     if (HOST_BIG_ENDIAN &&
5523         TCG_TARGET_REG_BITS == 64 &&
5524         type == TCG_TYPE_I32) {
5525         ofs += 4;
5526     }
5527     return ofs;
5528 }
5529 
5530 static void tcg_out_helper_load_slots(TCGContext *s,
5531                                       unsigned nmov, TCGMovExtend *mov,
5532                                       const TCGLdstHelperParam *parm)
5533 {
5534     unsigned i;
5535     TCGReg dst3;
5536 
5537     /*
5538      * Start from the end, storing to the stack first.
5539      * This frees those registers, so we need not consider overlap.
5540      */
5541     for (i = nmov; i-- > 0; ) {
5542         unsigned slot = mov[i].dst;
5543 
5544         if (arg_slot_reg_p(slot)) {
5545             goto found_reg;
5546         }
5547 
5548         TCGReg src = mov[i].src;
5549         TCGType dst_type = mov[i].dst_type;
5550         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5551 
5552         /* The argument is going onto the stack; extend into scratch. */
5553         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5554             tcg_debug_assert(parm->ntmp != 0);
5555             mov[i].dst = src = parm->tmp[0];
5556             tcg_out_movext1(s, &mov[i]);
5557         }
5558 
5559         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5560                    tcg_out_helper_stk_ofs(dst_type, slot));
5561     }
5562     return;
5563 
5564  found_reg:
5565     /*
5566      * The remaining arguments are in registers.
5567      * Convert slot numbers to argument registers.
5568      */
5569     nmov = i + 1;
5570     for (i = 0; i < nmov; ++i) {
5571         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5572     }
5573 
5574     switch (nmov) {
5575     case 4:
5576         /* The backend must have provided enough temps for the worst case. */
5577         tcg_debug_assert(parm->ntmp >= 2);
5578 
5579         dst3 = mov[3].dst;
5580         for (unsigned j = 0; j < 3; ++j) {
5581             if (dst3 == mov[j].src) {
5582                 /*
5583                  * Conflict. Copy the source to a temporary, perform the
5584                  * remaining moves, then the extension from our scratch
5585                  * on the way out.
5586                  */
5587                 TCGReg scratch = parm->tmp[1];
5588 
5589                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5590                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5591                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5592                 break;
5593             }
5594         }
5595 
5596         /* No conflicts: perform this move and continue. */
5597         tcg_out_movext1(s, &mov[3]);
5598         /* fall through */
5599 
5600     case 3:
5601         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5602                         parm->ntmp ? parm->tmp[0] : -1);
5603         break;
5604     case 2:
5605         tcg_out_movext2(s, mov, mov + 1,
5606                         parm->ntmp ? parm->tmp[0] : -1);
5607         break;
5608     case 1:
5609         tcg_out_movext1(s, mov);
5610         break;
5611     default:
5612         g_assert_not_reached();
5613     }
5614 }
5615 
5616 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5617                                     TCGType type, tcg_target_long imm,
5618                                     const TCGLdstHelperParam *parm)
5619 {
5620     if (arg_slot_reg_p(slot)) {
5621         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5622     } else {
5623         int ofs = tcg_out_helper_stk_ofs(type, slot);
5624         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5625             tcg_debug_assert(parm->ntmp != 0);
5626             tcg_out_movi(s, type, parm->tmp[0], imm);
5627             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5628         }
5629     }
5630 }
5631 
5632 static void tcg_out_helper_load_common_args(TCGContext *s,
5633                                             const TCGLabelQemuLdst *ldst,
5634                                             const TCGLdstHelperParam *parm,
5635                                             const TCGHelperInfo *info,
5636                                             unsigned next_arg)
5637 {
5638     TCGMovExtend ptr_mov = {
5639         .dst_type = TCG_TYPE_PTR,
5640         .src_type = TCG_TYPE_PTR,
5641         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5642     };
5643     const TCGCallArgumentLoc *loc = &info->in[0];
5644     TCGType type;
5645     unsigned slot;
5646     tcg_target_ulong imm;
5647 
5648     /*
5649      * Handle env, which is always first.
5650      */
5651     ptr_mov.dst = loc->arg_slot;
5652     ptr_mov.src = TCG_AREG0;
5653     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5654 
5655     /*
5656      * Handle oi.
5657      */
5658     imm = ldst->oi;
5659     loc = &info->in[next_arg];
5660     type = TCG_TYPE_I32;
5661     switch (loc->kind) {
5662     case TCG_CALL_ARG_NORMAL:
5663         break;
5664     case TCG_CALL_ARG_EXTEND_U:
5665     case TCG_CALL_ARG_EXTEND_S:
5666         /* No extension required for MemOpIdx. */
5667         tcg_debug_assert(imm <= INT32_MAX);
5668         type = TCG_TYPE_REG;
5669         break;
5670     default:
5671         g_assert_not_reached();
5672     }
5673     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5674     next_arg++;
5675 
5676     /*
5677      * Handle ra.
5678      */
5679     loc = &info->in[next_arg];
5680     slot = loc->arg_slot;
5681     if (parm->ra_gen) {
5682         int arg_reg = -1;
5683         TCGReg ra_reg;
5684 
5685         if (arg_slot_reg_p(slot)) {
5686             arg_reg = tcg_target_call_iarg_regs[slot];
5687         }
5688         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5689 
5690         ptr_mov.dst = slot;
5691         ptr_mov.src = ra_reg;
5692         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5693     } else {
5694         imm = (uintptr_t)ldst->raddr;
5695         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5696     }
5697 }
5698 
5699 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5700                                        const TCGCallArgumentLoc *loc,
5701                                        TCGType dst_type, TCGType src_type,
5702                                        TCGReg lo, TCGReg hi)
5703 {
5704     MemOp reg_mo;
5705 
5706     if (dst_type <= TCG_TYPE_REG) {
5707         MemOp src_ext;
5708 
5709         switch (loc->kind) {
5710         case TCG_CALL_ARG_NORMAL:
5711             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5712             break;
5713         case TCG_CALL_ARG_EXTEND_U:
5714             dst_type = TCG_TYPE_REG;
5715             src_ext = MO_UL;
5716             break;
5717         case TCG_CALL_ARG_EXTEND_S:
5718             dst_type = TCG_TYPE_REG;
5719             src_ext = MO_SL;
5720             break;
5721         default:
5722             g_assert_not_reached();
5723         }
5724 
5725         mov[0].dst = loc->arg_slot;
5726         mov[0].dst_type = dst_type;
5727         mov[0].src = lo;
5728         mov[0].src_type = src_type;
5729         mov[0].src_ext = src_ext;
5730         return 1;
5731     }
5732 
5733     if (TCG_TARGET_REG_BITS == 32) {
5734         assert(dst_type == TCG_TYPE_I64);
5735         reg_mo = MO_32;
5736     } else {
5737         assert(dst_type == TCG_TYPE_I128);
5738         reg_mo = MO_64;
5739     }
5740 
5741     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5742     mov[0].src = lo;
5743     mov[0].dst_type = TCG_TYPE_REG;
5744     mov[0].src_type = TCG_TYPE_REG;
5745     mov[0].src_ext = reg_mo;
5746 
5747     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5748     mov[1].src = hi;
5749     mov[1].dst_type = TCG_TYPE_REG;
5750     mov[1].src_type = TCG_TYPE_REG;
5751     mov[1].src_ext = reg_mo;
5752 
5753     return 2;
5754 }
5755 
5756 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5757                                    const TCGLdstHelperParam *parm)
5758 {
5759     const TCGHelperInfo *info;
5760     const TCGCallArgumentLoc *loc;
5761     TCGMovExtend mov[2];
5762     unsigned next_arg, nmov;
5763     MemOp mop = get_memop(ldst->oi);
5764 
5765     switch (mop & MO_SIZE) {
5766     case MO_8:
5767     case MO_16:
5768     case MO_32:
5769         info = &info_helper_ld32_mmu;
5770         break;
5771     case MO_64:
5772         info = &info_helper_ld64_mmu;
5773         break;
5774     case MO_128:
5775         info = &info_helper_ld128_mmu;
5776         break;
5777     default:
5778         g_assert_not_reached();
5779     }
5780 
5781     /* Defer env argument. */
5782     next_arg = 1;
5783 
5784     loc = &info->in[next_arg];
5785     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5786         /*
5787          * 32-bit host with 32-bit guest: zero-extend the guest address
5788          * to 64-bits for the helper by storing the low part, then
5789          * load a zero for the high part.
5790          */
5791         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5792                                TCG_TYPE_I32, TCG_TYPE_I32,
5793                                ldst->addrlo_reg, -1);
5794         tcg_out_helper_load_slots(s, 1, mov, parm);
5795 
5796         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5797                                 TCG_TYPE_I32, 0, parm);
5798         next_arg += 2;
5799     } else {
5800         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5801                                       ldst->addrlo_reg, ldst->addrhi_reg);
5802         tcg_out_helper_load_slots(s, nmov, mov, parm);
5803         next_arg += nmov;
5804     }
5805 
5806     switch (info->out_kind) {
5807     case TCG_CALL_RET_NORMAL:
5808     case TCG_CALL_RET_BY_VEC:
5809         break;
5810     case TCG_CALL_RET_BY_REF:
5811         /*
5812          * The return reference is in the first argument slot.
5813          * We need memory in which to return: re-use the top of stack.
5814          */
5815         {
5816             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5817 
5818             if (arg_slot_reg_p(0)) {
5819                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5820                                  TCG_REG_CALL_STACK, ofs_slot0);
5821             } else {
5822                 tcg_debug_assert(parm->ntmp != 0);
5823                 tcg_out_addi_ptr(s, parm->tmp[0],
5824                                  TCG_REG_CALL_STACK, ofs_slot0);
5825                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5826                            TCG_REG_CALL_STACK, ofs_slot0);
5827             }
5828         }
5829         break;
5830     default:
5831         g_assert_not_reached();
5832     }
5833 
5834     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5835 }
5836 
5837 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5838                                   bool load_sign,
5839                                   const TCGLdstHelperParam *parm)
5840 {
5841     MemOp mop = get_memop(ldst->oi);
5842     TCGMovExtend mov[2];
5843     int ofs_slot0;
5844 
5845     switch (ldst->type) {
5846     case TCG_TYPE_I64:
5847         if (TCG_TARGET_REG_BITS == 32) {
5848             break;
5849         }
5850         /* fall through */
5851 
5852     case TCG_TYPE_I32:
5853         mov[0].dst = ldst->datalo_reg;
5854         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5855         mov[0].dst_type = ldst->type;
5856         mov[0].src_type = TCG_TYPE_REG;
5857 
5858         /*
5859          * If load_sign, then we allowed the helper to perform the
5860          * appropriate sign extension to tcg_target_ulong, and all
5861          * we need now is a plain move.
5862          *
5863          * If they do not, then we expect the relevant extension
5864          * instruction to be no more expensive than a move, and
5865          * we thus save the icache etc by only using one of two
5866          * helper functions.
5867          */
5868         if (load_sign || !(mop & MO_SIGN)) {
5869             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5870                 mov[0].src_ext = MO_32;
5871             } else {
5872                 mov[0].src_ext = MO_64;
5873             }
5874         } else {
5875             mov[0].src_ext = mop & MO_SSIZE;
5876         }
5877         tcg_out_movext1(s, mov);
5878         return;
5879 
5880     case TCG_TYPE_I128:
5881         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5882         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5883         switch (TCG_TARGET_CALL_RET_I128) {
5884         case TCG_CALL_RET_NORMAL:
5885             break;
5886         case TCG_CALL_RET_BY_VEC:
5887             tcg_out_st(s, TCG_TYPE_V128,
5888                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5889                        TCG_REG_CALL_STACK, ofs_slot0);
5890             /* fall through */
5891         case TCG_CALL_RET_BY_REF:
5892             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5893                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5894             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5895                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5896             return;
5897         default:
5898             g_assert_not_reached();
5899         }
5900         break;
5901 
5902     default:
5903         g_assert_not_reached();
5904     }
5905 
5906     mov[0].dst = ldst->datalo_reg;
5907     mov[0].src =
5908         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5909     mov[0].dst_type = TCG_TYPE_REG;
5910     mov[0].src_type = TCG_TYPE_REG;
5911     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5912 
5913     mov[1].dst = ldst->datahi_reg;
5914     mov[1].src =
5915         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5916     mov[1].dst_type = TCG_TYPE_REG;
5917     mov[1].src_type = TCG_TYPE_REG;
5918     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5919 
5920     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5921 }
5922 
5923 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5924                                    const TCGLdstHelperParam *parm)
5925 {
5926     const TCGHelperInfo *info;
5927     const TCGCallArgumentLoc *loc;
5928     TCGMovExtend mov[4];
5929     TCGType data_type;
5930     unsigned next_arg, nmov, n;
5931     MemOp mop = get_memop(ldst->oi);
5932 
5933     switch (mop & MO_SIZE) {
5934     case MO_8:
5935     case MO_16:
5936     case MO_32:
5937         info = &info_helper_st32_mmu;
5938         data_type = TCG_TYPE_I32;
5939         break;
5940     case MO_64:
5941         info = &info_helper_st64_mmu;
5942         data_type = TCG_TYPE_I64;
5943         break;
5944     case MO_128:
5945         info = &info_helper_st128_mmu;
5946         data_type = TCG_TYPE_I128;
5947         break;
5948     default:
5949         g_assert_not_reached();
5950     }
5951 
5952     /* Defer env argument. */
5953     next_arg = 1;
5954     nmov = 0;
5955 
5956     /* Handle addr argument. */
5957     loc = &info->in[next_arg];
5958     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5959         /*
5960          * 32-bit host with 32-bit guest: zero-extend the guest address
5961          * to 64-bits for the helper by storing the low part.  Later,
5962          * after we have processed the register inputs, we will load a
5963          * zero for the high part.
5964          */
5965         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5966                                TCG_TYPE_I32, TCG_TYPE_I32,
5967                                ldst->addrlo_reg, -1);
5968         next_arg += 2;
5969         nmov += 1;
5970     } else {
5971         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5972                                    ldst->addrlo_reg, ldst->addrhi_reg);
5973         next_arg += n;
5974         nmov += n;
5975     }
5976 
5977     /* Handle data argument. */
5978     loc = &info->in[next_arg];
5979     switch (loc->kind) {
5980     case TCG_CALL_ARG_NORMAL:
5981     case TCG_CALL_ARG_EXTEND_U:
5982     case TCG_CALL_ARG_EXTEND_S:
5983         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5984                                    ldst->datalo_reg, ldst->datahi_reg);
5985         next_arg += n;
5986         nmov += n;
5987         tcg_out_helper_load_slots(s, nmov, mov, parm);
5988         break;
5989 
5990     case TCG_CALL_ARG_BY_REF:
5991         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5992         tcg_debug_assert(data_type == TCG_TYPE_I128);
5993         tcg_out_st(s, TCG_TYPE_I64,
5994                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5995                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5996         tcg_out_st(s, TCG_TYPE_I64,
5997                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5998                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5999 
6000         tcg_out_helper_load_slots(s, nmov, mov, parm);
6001 
6002         if (arg_slot_reg_p(loc->arg_slot)) {
6003             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6004                              TCG_REG_CALL_STACK,
6005                              arg_slot_stk_ofs(loc->ref_slot));
6006         } else {
6007             tcg_debug_assert(parm->ntmp != 0);
6008             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6009                              arg_slot_stk_ofs(loc->ref_slot));
6010             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6011                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6012         }
6013         next_arg += 2;
6014         break;
6015 
6016     default:
6017         g_assert_not_reached();
6018     }
6019 
6020     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6021         /* Zero extend the address by loading a zero for the high part. */
6022         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6023         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6024     }
6025 
6026     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6027 }
6028 
6029 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6030 {
6031     int i, start_words, num_insns;
6032     TCGOp *op;
6033 
6034     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6035                  && qemu_log_in_addr_range(pc_start))) {
6036         FILE *logfile = qemu_log_trylock();
6037         if (logfile) {
6038             fprintf(logfile, "OP:\n");
6039             tcg_dump_ops(s, logfile, false);
6040             fprintf(logfile, "\n");
6041             qemu_log_unlock(logfile);
6042         }
6043     }
6044 
6045 #ifdef CONFIG_DEBUG_TCG
6046     /* Ensure all labels referenced have been emitted.  */
6047     {
6048         TCGLabel *l;
6049         bool error = false;
6050 
6051         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6052             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6053                 qemu_log_mask(CPU_LOG_TB_OP,
6054                               "$L%d referenced but not present.\n", l->id);
6055                 error = true;
6056             }
6057         }
6058         assert(!error);
6059     }
6060 #endif
6061 
6062     tcg_optimize(s);
6063 
6064     reachable_code_pass(s);
6065     liveness_pass_0(s);
6066     liveness_pass_1(s);
6067 
6068     if (s->nb_indirects > 0) {
6069         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6070                      && qemu_log_in_addr_range(pc_start))) {
6071             FILE *logfile = qemu_log_trylock();
6072             if (logfile) {
6073                 fprintf(logfile, "OP before indirect lowering:\n");
6074                 tcg_dump_ops(s, logfile, false);
6075                 fprintf(logfile, "\n");
6076                 qemu_log_unlock(logfile);
6077             }
6078         }
6079 
6080         /* Replace indirect temps with direct temps.  */
6081         if (liveness_pass_2(s)) {
6082             /* If changes were made, re-run liveness.  */
6083             liveness_pass_1(s);
6084         }
6085     }
6086 
6087     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6088                  && qemu_log_in_addr_range(pc_start))) {
6089         FILE *logfile = qemu_log_trylock();
6090         if (logfile) {
6091             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6092             tcg_dump_ops(s, logfile, true);
6093             fprintf(logfile, "\n");
6094             qemu_log_unlock(logfile);
6095         }
6096     }
6097 
6098     /* Initialize goto_tb jump offsets. */
6099     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6100     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6101     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6102     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6103 
6104     tcg_reg_alloc_start(s);
6105 
6106     /*
6107      * Reset the buffer pointers when restarting after overflow.
6108      * TODO: Move this into translate-all.c with the rest of the
6109      * buffer management.  Having only this done here is confusing.
6110      */
6111     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6112     s->code_ptr = s->code_buf;
6113 
6114 #ifdef TCG_TARGET_NEED_LDST_LABELS
6115     QSIMPLEQ_INIT(&s->ldst_labels);
6116 #endif
6117 #ifdef TCG_TARGET_NEED_POOL_LABELS
6118     s->pool_labels = NULL;
6119 #endif
6120 
6121     start_words = s->insn_start_words;
6122     s->gen_insn_data =
6123         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6124 
6125     tcg_out_tb_start(s);
6126 
6127     num_insns = -1;
6128     QTAILQ_FOREACH(op, &s->ops, link) {
6129         TCGOpcode opc = op->opc;
6130 
6131         switch (opc) {
6132         case INDEX_op_mov_i32:
6133         case INDEX_op_mov_i64:
6134         case INDEX_op_mov_vec:
6135             tcg_reg_alloc_mov(s, op);
6136             break;
6137         case INDEX_op_dup_vec:
6138             tcg_reg_alloc_dup(s, op);
6139             break;
6140         case INDEX_op_insn_start:
6141             if (num_insns >= 0) {
6142                 size_t off = tcg_current_code_size(s);
6143                 s->gen_insn_end_off[num_insns] = off;
6144                 /* Assert that we do not overflow our stored offset.  */
6145                 assert(s->gen_insn_end_off[num_insns] == off);
6146             }
6147             num_insns++;
6148             for (i = 0; i < start_words; ++i) {
6149                 s->gen_insn_data[num_insns * start_words + i] =
6150                     tcg_get_insn_start_param(op, i);
6151             }
6152             break;
6153         case INDEX_op_discard:
6154             temp_dead(s, arg_temp(op->args[0]));
6155             break;
6156         case INDEX_op_set_label:
6157             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6158             tcg_out_label(s, arg_label(op->args[0]));
6159             break;
6160         case INDEX_op_call:
6161             tcg_reg_alloc_call(s, op);
6162             break;
6163         case INDEX_op_exit_tb:
6164             tcg_out_exit_tb(s, op->args[0]);
6165             break;
6166         case INDEX_op_goto_tb:
6167             tcg_out_goto_tb(s, op->args[0]);
6168             break;
6169         case INDEX_op_dup2_vec:
6170             if (tcg_reg_alloc_dup2(s, op)) {
6171                 break;
6172             }
6173             /* fall through */
6174         default:
6175             /* Sanity check that we've not introduced any unhandled opcodes. */
6176             tcg_debug_assert(tcg_op_supported(opc));
6177             /* Note: in order to speed up the code, it would be much
6178                faster to have specialized register allocator functions for
6179                some common argument patterns */
6180             tcg_reg_alloc_op(s, op);
6181             break;
6182         }
6183         /* Test for (pending) buffer overflow.  The assumption is that any
6184            one operation beginning below the high water mark cannot overrun
6185            the buffer completely.  Thus we can test for overflow after
6186            generating code without having to check during generation.  */
6187         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6188             return -1;
6189         }
6190         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6191         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6192             return -2;
6193         }
6194     }
6195     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6196     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6197 
6198     /* Generate TB finalization at the end of block */
6199 #ifdef TCG_TARGET_NEED_LDST_LABELS
6200     i = tcg_out_ldst_finalize(s);
6201     if (i < 0) {
6202         return i;
6203     }
6204 #endif
6205 #ifdef TCG_TARGET_NEED_POOL_LABELS
6206     i = tcg_out_pool_finalize(s);
6207     if (i < 0) {
6208         return i;
6209     }
6210 #endif
6211     if (!tcg_resolve_relocs(s)) {
6212         return -2;
6213     }
6214 
6215 #ifndef CONFIG_TCG_INTERPRETER
6216     /* flush instruction cache */
6217     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6218                         (uintptr_t)s->code_buf,
6219                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6220 #endif
6221 
6222     return tcg_current_code_size(s);
6223 }
6224 
6225 #ifdef ELF_HOST_MACHINE
6226 /* In order to use this feature, the backend needs to do three things:
6227 
6228    (1) Define ELF_HOST_MACHINE to indicate both what value to
6229        put into the ELF image and to indicate support for the feature.
6230 
6231    (2) Define tcg_register_jit.  This should create a buffer containing
6232        the contents of a .debug_frame section that describes the post-
6233        prologue unwind info for the tcg machine.
6234 
6235    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6236 */
6237 
6238 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6239 typedef enum {
6240     JIT_NOACTION = 0,
6241     JIT_REGISTER_FN,
6242     JIT_UNREGISTER_FN
6243 } jit_actions_t;
6244 
6245 struct jit_code_entry {
6246     struct jit_code_entry *next_entry;
6247     struct jit_code_entry *prev_entry;
6248     const void *symfile_addr;
6249     uint64_t symfile_size;
6250 };
6251 
6252 struct jit_descriptor {
6253     uint32_t version;
6254     uint32_t action_flag;
6255     struct jit_code_entry *relevant_entry;
6256     struct jit_code_entry *first_entry;
6257 };
6258 
6259 void __jit_debug_register_code(void) __attribute__((noinline));
6260 void __jit_debug_register_code(void)
6261 {
6262     asm("");
6263 }
6264 
6265 /* Must statically initialize the version, because GDB may check
6266    the version before we can set it.  */
6267 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6268 
6269 /* End GDB interface.  */
6270 
6271 static int find_string(const char *strtab, const char *str)
6272 {
6273     const char *p = strtab + 1;
6274 
6275     while (1) {
6276         if (strcmp(p, str) == 0) {
6277             return p - strtab;
6278         }
6279         p += strlen(p) + 1;
6280     }
6281 }
6282 
6283 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6284                                  const void *debug_frame,
6285                                  size_t debug_frame_size)
6286 {
6287     struct __attribute__((packed)) DebugInfo {
6288         uint32_t  len;
6289         uint16_t  version;
6290         uint32_t  abbrev;
6291         uint8_t   ptr_size;
6292         uint8_t   cu_die;
6293         uint16_t  cu_lang;
6294         uintptr_t cu_low_pc;
6295         uintptr_t cu_high_pc;
6296         uint8_t   fn_die;
6297         char      fn_name[16];
6298         uintptr_t fn_low_pc;
6299         uintptr_t fn_high_pc;
6300         uint8_t   cu_eoc;
6301     };
6302 
6303     struct ElfImage {
6304         ElfW(Ehdr) ehdr;
6305         ElfW(Phdr) phdr;
6306         ElfW(Shdr) shdr[7];
6307         ElfW(Sym)  sym[2];
6308         struct DebugInfo di;
6309         uint8_t    da[24];
6310         char       str[80];
6311     };
6312 
6313     struct ElfImage *img;
6314 
6315     static const struct ElfImage img_template = {
6316         .ehdr = {
6317             .e_ident[EI_MAG0] = ELFMAG0,
6318             .e_ident[EI_MAG1] = ELFMAG1,
6319             .e_ident[EI_MAG2] = ELFMAG2,
6320             .e_ident[EI_MAG3] = ELFMAG3,
6321             .e_ident[EI_CLASS] = ELF_CLASS,
6322             .e_ident[EI_DATA] = ELF_DATA,
6323             .e_ident[EI_VERSION] = EV_CURRENT,
6324             .e_type = ET_EXEC,
6325             .e_machine = ELF_HOST_MACHINE,
6326             .e_version = EV_CURRENT,
6327             .e_phoff = offsetof(struct ElfImage, phdr),
6328             .e_shoff = offsetof(struct ElfImage, shdr),
6329             .e_ehsize = sizeof(ElfW(Shdr)),
6330             .e_phentsize = sizeof(ElfW(Phdr)),
6331             .e_phnum = 1,
6332             .e_shentsize = sizeof(ElfW(Shdr)),
6333             .e_shnum = ARRAY_SIZE(img->shdr),
6334             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6335 #ifdef ELF_HOST_FLAGS
6336             .e_flags = ELF_HOST_FLAGS,
6337 #endif
6338 #ifdef ELF_OSABI
6339             .e_ident[EI_OSABI] = ELF_OSABI,
6340 #endif
6341         },
6342         .phdr = {
6343             .p_type = PT_LOAD,
6344             .p_flags = PF_X,
6345         },
6346         .shdr = {
6347             [0] = { .sh_type = SHT_NULL },
6348             /* Trick: The contents of code_gen_buffer are not present in
6349                this fake ELF file; that got allocated elsewhere.  Therefore
6350                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6351                will not look for contents.  We can record any address.  */
6352             [1] = { /* .text */
6353                 .sh_type = SHT_NOBITS,
6354                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6355             },
6356             [2] = { /* .debug_info */
6357                 .sh_type = SHT_PROGBITS,
6358                 .sh_offset = offsetof(struct ElfImage, di),
6359                 .sh_size = sizeof(struct DebugInfo),
6360             },
6361             [3] = { /* .debug_abbrev */
6362                 .sh_type = SHT_PROGBITS,
6363                 .sh_offset = offsetof(struct ElfImage, da),
6364                 .sh_size = sizeof(img->da),
6365             },
6366             [4] = { /* .debug_frame */
6367                 .sh_type = SHT_PROGBITS,
6368                 .sh_offset = sizeof(struct ElfImage),
6369             },
6370             [5] = { /* .symtab */
6371                 .sh_type = SHT_SYMTAB,
6372                 .sh_offset = offsetof(struct ElfImage, sym),
6373                 .sh_size = sizeof(img->sym),
6374                 .sh_info = 1,
6375                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6376                 .sh_entsize = sizeof(ElfW(Sym)),
6377             },
6378             [6] = { /* .strtab */
6379                 .sh_type = SHT_STRTAB,
6380                 .sh_offset = offsetof(struct ElfImage, str),
6381                 .sh_size = sizeof(img->str),
6382             }
6383         },
6384         .sym = {
6385             [1] = { /* code_gen_buffer */
6386                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6387                 .st_shndx = 1,
6388             }
6389         },
6390         .di = {
6391             .len = sizeof(struct DebugInfo) - 4,
6392             .version = 2,
6393             .ptr_size = sizeof(void *),
6394             .cu_die = 1,
6395             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6396             .fn_die = 2,
6397             .fn_name = "code_gen_buffer"
6398         },
6399         .da = {
6400             1,          /* abbrev number (the cu) */
6401             0x11, 1,    /* DW_TAG_compile_unit, has children */
6402             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6403             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6404             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6405             0, 0,       /* end of abbrev */
6406             2,          /* abbrev number (the fn) */
6407             0x2e, 0,    /* DW_TAG_subprogram, no children */
6408             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6409             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6410             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6411             0, 0,       /* end of abbrev */
6412             0           /* no more abbrev */
6413         },
6414         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6415                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6416     };
6417 
6418     /* We only need a single jit entry; statically allocate it.  */
6419     static struct jit_code_entry one_entry;
6420 
6421     uintptr_t buf = (uintptr_t)buf_ptr;
6422     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6423     DebugFrameHeader *dfh;
6424 
6425     img = g_malloc(img_size);
6426     *img = img_template;
6427 
6428     img->phdr.p_vaddr = buf;
6429     img->phdr.p_paddr = buf;
6430     img->phdr.p_memsz = buf_size;
6431 
6432     img->shdr[1].sh_name = find_string(img->str, ".text");
6433     img->shdr[1].sh_addr = buf;
6434     img->shdr[1].sh_size = buf_size;
6435 
6436     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6437     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6438 
6439     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6440     img->shdr[4].sh_size = debug_frame_size;
6441 
6442     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6443     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6444 
6445     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6446     img->sym[1].st_value = buf;
6447     img->sym[1].st_size = buf_size;
6448 
6449     img->di.cu_low_pc = buf;
6450     img->di.cu_high_pc = buf + buf_size;
6451     img->di.fn_low_pc = buf;
6452     img->di.fn_high_pc = buf + buf_size;
6453 
6454     dfh = (DebugFrameHeader *)(img + 1);
6455     memcpy(dfh, debug_frame, debug_frame_size);
6456     dfh->fde.func_start = buf;
6457     dfh->fde.func_len = buf_size;
6458 
6459 #ifdef DEBUG_JIT
6460     /* Enable this block to be able to debug the ELF image file creation.
6461        One can use readelf, objdump, or other inspection utilities.  */
6462     {
6463         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6464         FILE *f = fopen(jit, "w+b");
6465         if (f) {
6466             if (fwrite(img, img_size, 1, f) != img_size) {
6467                 /* Avoid stupid unused return value warning for fwrite.  */
6468             }
6469             fclose(f);
6470         }
6471     }
6472 #endif
6473 
6474     one_entry.symfile_addr = img;
6475     one_entry.symfile_size = img_size;
6476 
6477     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6478     __jit_debug_descriptor.relevant_entry = &one_entry;
6479     __jit_debug_descriptor.first_entry = &one_entry;
6480     __jit_debug_register_code();
6481 }
6482 #else
6483 /* No support for the feature.  Provide the entry point expected by exec.c,
6484    and implement the internal function we declared earlier.  */
6485 
6486 static void tcg_register_jit_int(const void *buf, size_t size,
6487                                  const void *debug_frame,
6488                                  size_t debug_frame_size)
6489 {
6490 }
6491 
6492 void tcg_register_jit(const void *buf, size_t buf_size)
6493 {
6494 }
6495 #endif /* ELF_HOST_MACHINE */
6496 
6497 #if !TCG_TARGET_MAYBE_vec
6498 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6499 {
6500     g_assert_not_reached();
6501 }
6502 #endif
6503