xref: /openbmc/qemu/tcg/tcg.c (revision 3871be753f3351c21c8e384432f7798c3eed9de9)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "accel/tcg/perf.h"
59 #ifdef CONFIG_USER_ONLY
60 #include "exec/user/guest-base.h"
61 #endif
62 
63 /* Forward declarations for functions declared in tcg-target.c.inc and
64    used here. */
65 static void tcg_target_init(TCGContext *s);
66 static void tcg_target_qemu_prologue(TCGContext *s);
67 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
68                         intptr_t value, intptr_t addend);
69 
70 /* The CIE and FDE header definitions will be common to all hosts.  */
71 typedef struct {
72     uint32_t len __attribute__((aligned((sizeof(void *)))));
73     uint32_t id;
74     uint8_t version;
75     char augmentation[1];
76     uint8_t code_align;
77     uint8_t data_align;
78     uint8_t return_column;
79 } DebugFrameCIE;
80 
81 typedef struct QEMU_PACKED {
82     uint32_t len __attribute__((aligned((sizeof(void *)))));
83     uint32_t cie_offset;
84     uintptr_t func_start;
85     uintptr_t func_len;
86 } DebugFrameFDEHeader;
87 
88 typedef struct QEMU_PACKED {
89     DebugFrameCIE cie;
90     DebugFrameFDEHeader fde;
91 } DebugFrameHeader;
92 
93 typedef struct TCGLabelQemuLdst {
94     bool is_ld;             /* qemu_ld: true, qemu_st: false */
95     MemOpIdx oi;
96     TCGType type;           /* result type of a load */
97     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
98     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
99     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
100     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
101     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
102     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
103     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
104 } TCGLabelQemuLdst;
105 
106 static void tcg_register_jit_int(const void *buf, size_t size,
107                                  const void *debug_frame,
108                                  size_t debug_frame_size)
109     __attribute__((unused));
110 
111 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
112 static void tcg_out_tb_start(TCGContext *s);
113 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
114                        intptr_t arg2);
115 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
116 static void tcg_out_movi(TCGContext *s, TCGType type,
117                          TCGReg ret, tcg_target_long arg);
118 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
120 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
121 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
122 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
123 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
128 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
129 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
130 static void tcg_out_goto_tb(TCGContext *s, int which);
131 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
132                        const TCGArg args[TCG_MAX_OP_ARGS],
133                        const int const_args[TCG_MAX_OP_ARGS]);
134 #if TCG_TARGET_MAYBE_vec
135 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
136                             TCGReg dst, TCGReg src);
137 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
138                              TCGReg dst, TCGReg base, intptr_t offset);
139 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
140                              TCGReg dst, int64_t arg);
141 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
142                            unsigned vecl, unsigned vece,
143                            const TCGArg args[TCG_MAX_OP_ARGS],
144                            const int const_args[TCG_MAX_OP_ARGS]);
145 #else
146 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
147                                    TCGReg dst, TCGReg src)
148 {
149     g_assert_not_reached();
150 }
151 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
152                                     TCGReg dst, TCGReg base, intptr_t offset)
153 {
154     g_assert_not_reached();
155 }
156 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, int64_t arg)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
162                                   unsigned vecl, unsigned vece,
163                                   const TCGArg args[TCG_MAX_OP_ARGS],
164                                   const int const_args[TCG_MAX_OP_ARGS])
165 {
166     g_assert_not_reached();
167 }
168 #endif
169 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
170                        intptr_t arg2);
171 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
172                         TCGReg base, intptr_t ofs);
173 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
174                          const TCGHelperInfo *info);
175 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
176 static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece);
177 #ifdef TCG_TARGET_NEED_LDST_LABELS
178 static int tcg_out_ldst_finalize(TCGContext *s);
179 #endif
180 
181 #ifndef CONFIG_USER_ONLY
182 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
183 #endif
184 
185 typedef struct TCGLdstHelperParam {
186     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
187     unsigned ntmp;
188     int tmp[3];
189 } TCGLdstHelperParam;
190 
191 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
192                                    const TCGLdstHelperParam *p)
193     __attribute__((unused));
194 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
195                                   bool load_sign, const TCGLdstHelperParam *p)
196     __attribute__((unused));
197 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
198                                    const TCGLdstHelperParam *p)
199     __attribute__((unused));
200 
201 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
202     [MO_UB] = helper_ldub_mmu,
203     [MO_SB] = helper_ldsb_mmu,
204     [MO_UW] = helper_lduw_mmu,
205     [MO_SW] = helper_ldsw_mmu,
206     [MO_UL] = helper_ldul_mmu,
207     [MO_UQ] = helper_ldq_mmu,
208 #if TCG_TARGET_REG_BITS == 64
209     [MO_SL] = helper_ldsl_mmu,
210     [MO_128] = helper_ld16_mmu,
211 #endif
212 };
213 
214 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
215     [MO_8]  = helper_stb_mmu,
216     [MO_16] = helper_stw_mmu,
217     [MO_32] = helper_stl_mmu,
218     [MO_64] = helper_stq_mmu,
219 #if TCG_TARGET_REG_BITS == 64
220     [MO_128] = helper_st16_mmu,
221 #endif
222 };
223 
224 typedef struct {
225     MemOp atom;   /* lg2 bits of atomicity required */
226     MemOp align;  /* lg2 bits of alignment to use */
227 } TCGAtomAlign;
228 
229 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
230                                            MemOp host_atom, bool allow_two_ops)
231     __attribute__((unused));
232 
233 #ifdef CONFIG_USER_ONLY
234 bool tcg_use_softmmu;
235 #endif
236 
237 TCGContext tcg_init_ctx;
238 __thread TCGContext *tcg_ctx;
239 
240 TCGContext **tcg_ctxs;
241 unsigned int tcg_cur_ctxs;
242 unsigned int tcg_max_ctxs;
243 TCGv_env tcg_env;
244 const void *tcg_code_gen_epilogue;
245 uintptr_t tcg_splitwx_diff;
246 
247 #ifndef CONFIG_TCG_INTERPRETER
248 tcg_prologue_fn *tcg_qemu_tb_exec;
249 #endif
250 
251 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
252 static TCGRegSet tcg_target_call_clobber_regs;
253 
254 #if TCG_TARGET_INSN_UNIT_SIZE == 1
255 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
256 {
257     *s->code_ptr++ = v;
258 }
259 
260 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
261                                                       uint8_t v)
262 {
263     *p = v;
264 }
265 #endif
266 
267 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
268 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
269 {
270     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
271         *s->code_ptr++ = v;
272     } else {
273         tcg_insn_unit *p = s->code_ptr;
274         memcpy(p, &v, sizeof(v));
275         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
276     }
277 }
278 
279 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
280                                                        uint16_t v)
281 {
282     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
283         *p = v;
284     } else {
285         memcpy(p, &v, sizeof(v));
286     }
287 }
288 #endif
289 
290 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
291 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
292 {
293     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
294         *s->code_ptr++ = v;
295     } else {
296         tcg_insn_unit *p = s->code_ptr;
297         memcpy(p, &v, sizeof(v));
298         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
299     }
300 }
301 
302 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
303                                                        uint32_t v)
304 {
305     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
306         *p = v;
307     } else {
308         memcpy(p, &v, sizeof(v));
309     }
310 }
311 #endif
312 
313 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
314 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
315 {
316     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
317         *s->code_ptr++ = v;
318     } else {
319         tcg_insn_unit *p = s->code_ptr;
320         memcpy(p, &v, sizeof(v));
321         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
322     }
323 }
324 
325 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
326                                                        uint64_t v)
327 {
328     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
329         *p = v;
330     } else {
331         memcpy(p, &v, sizeof(v));
332     }
333 }
334 #endif
335 
336 /* label relocation processing */
337 
338 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
339                           TCGLabel *l, intptr_t addend)
340 {
341     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
342 
343     r->type = type;
344     r->ptr = code_ptr;
345     r->addend = addend;
346     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
347 }
348 
349 static void tcg_out_label(TCGContext *s, TCGLabel *l)
350 {
351     tcg_debug_assert(!l->has_value);
352     l->has_value = 1;
353     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
354 }
355 
356 TCGLabel *gen_new_label(void)
357 {
358     TCGContext *s = tcg_ctx;
359     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
360 
361     memset(l, 0, sizeof(TCGLabel));
362     l->id = s->nb_labels++;
363     QSIMPLEQ_INIT(&l->branches);
364     QSIMPLEQ_INIT(&l->relocs);
365 
366     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
367 
368     return l;
369 }
370 
371 static bool tcg_resolve_relocs(TCGContext *s)
372 {
373     TCGLabel *l;
374 
375     QSIMPLEQ_FOREACH(l, &s->labels, next) {
376         TCGRelocation *r;
377         uintptr_t value = l->u.value;
378 
379         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
380             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
381                 return false;
382             }
383         }
384     }
385     return true;
386 }
387 
388 static void set_jmp_reset_offset(TCGContext *s, int which)
389 {
390     /*
391      * We will check for overflow at the end of the opcode loop in
392      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
393      */
394     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
395 }
396 
397 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
398 {
399     /*
400      * We will check for overflow at the end of the opcode loop in
401      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
402      */
403     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
404 }
405 
406 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
407 {
408     /*
409      * Return the read-execute version of the pointer, for the benefit
410      * of any pc-relative addressing mode.
411      */
412     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
413 }
414 
415 static int __attribute__((unused))
416 tlb_mask_table_ofs(TCGContext *s, int which)
417 {
418     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
419             sizeof(CPUNegativeOffsetState));
420 }
421 
422 /* Signal overflow, starting over with fewer guest insns. */
423 static G_NORETURN
424 void tcg_raise_tb_overflow(TCGContext *s)
425 {
426     siglongjmp(s->jmp_trans, -2);
427 }
428 
429 /*
430  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
431  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
432  *
433  * However, tcg_out_helper_load_slots reuses this field to hold an
434  * argument slot number (which may designate a argument register or an
435  * argument stack slot), converting to TCGReg once all arguments that
436  * are destined for the stack are processed.
437  */
438 typedef struct TCGMovExtend {
439     unsigned dst;
440     TCGReg src;
441     TCGType dst_type;
442     TCGType src_type;
443     MemOp src_ext;
444 } TCGMovExtend;
445 
446 /**
447  * tcg_out_movext -- move and extend
448  * @s: tcg context
449  * @dst_type: integral type for destination
450  * @dst: destination register
451  * @src_type: integral type for source
452  * @src_ext: extension to apply to source
453  * @src: source register
454  *
455  * Move or extend @src into @dst, depending on @src_ext and the types.
456  */
457 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
458                            TCGType src_type, MemOp src_ext, TCGReg src)
459 {
460     switch (src_ext) {
461     case MO_UB:
462         tcg_out_ext8u(s, dst, src);
463         break;
464     case MO_SB:
465         tcg_out_ext8s(s, dst_type, dst, src);
466         break;
467     case MO_UW:
468         tcg_out_ext16u(s, dst, src);
469         break;
470     case MO_SW:
471         tcg_out_ext16s(s, dst_type, dst, src);
472         break;
473     case MO_UL:
474     case MO_SL:
475         if (dst_type == TCG_TYPE_I32) {
476             if (src_type == TCG_TYPE_I32) {
477                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
478             } else {
479                 tcg_out_extrl_i64_i32(s, dst, src);
480             }
481         } else if (src_type == TCG_TYPE_I32) {
482             if (src_ext & MO_SIGN) {
483                 tcg_out_exts_i32_i64(s, dst, src);
484             } else {
485                 tcg_out_extu_i32_i64(s, dst, src);
486             }
487         } else {
488             if (src_ext & MO_SIGN) {
489                 tcg_out_ext32s(s, dst, src);
490             } else {
491                 tcg_out_ext32u(s, dst, src);
492             }
493         }
494         break;
495     case MO_UQ:
496         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
497         if (dst_type == TCG_TYPE_I32) {
498             tcg_out_extrl_i64_i32(s, dst, src);
499         } else {
500             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
501         }
502         break;
503     default:
504         g_assert_not_reached();
505     }
506 }
507 
508 /* Minor variations on a theme, using a structure. */
509 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
510                                     TCGReg src)
511 {
512     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
513 }
514 
515 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
516 {
517     tcg_out_movext1_new_src(s, i, i->src);
518 }
519 
520 /**
521  * tcg_out_movext2 -- move and extend two pair
522  * @s: tcg context
523  * @i1: first move description
524  * @i2: second move description
525  * @scratch: temporary register, or -1 for none
526  *
527  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
528  * between the sources and destinations.
529  */
530 
531 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
532                             const TCGMovExtend *i2, int scratch)
533 {
534     TCGReg src1 = i1->src;
535     TCGReg src2 = i2->src;
536 
537     if (i1->dst != src2) {
538         tcg_out_movext1(s, i1);
539         tcg_out_movext1(s, i2);
540         return;
541     }
542     if (i2->dst == src1) {
543         TCGType src1_type = i1->src_type;
544         TCGType src2_type = i2->src_type;
545 
546         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
547             /* The data is now in the correct registers, now extend. */
548             src1 = i2->src;
549             src2 = i1->src;
550         } else {
551             tcg_debug_assert(scratch >= 0);
552             tcg_out_mov(s, src1_type, scratch, src1);
553             src1 = scratch;
554         }
555     }
556     tcg_out_movext1_new_src(s, i2, src2);
557     tcg_out_movext1_new_src(s, i1, src1);
558 }
559 
560 /**
561  * tcg_out_movext3 -- move and extend three pair
562  * @s: tcg context
563  * @i1: first move description
564  * @i2: second move description
565  * @i3: third move description
566  * @scratch: temporary register, or -1 for none
567  *
568  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
569  * between the sources and destinations.
570  */
571 
572 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
573                             const TCGMovExtend *i2, const TCGMovExtend *i3,
574                             int scratch)
575 {
576     TCGReg src1 = i1->src;
577     TCGReg src2 = i2->src;
578     TCGReg src3 = i3->src;
579 
580     if (i1->dst != src2 && i1->dst != src3) {
581         tcg_out_movext1(s, i1);
582         tcg_out_movext2(s, i2, i3, scratch);
583         return;
584     }
585     if (i2->dst != src1 && i2->dst != src3) {
586         tcg_out_movext1(s, i2);
587         tcg_out_movext2(s, i1, i3, scratch);
588         return;
589     }
590     if (i3->dst != src1 && i3->dst != src2) {
591         tcg_out_movext1(s, i3);
592         tcg_out_movext2(s, i1, i2, scratch);
593         return;
594     }
595 
596     /*
597      * There is a cycle.  Since there are only 3 nodes, the cycle is
598      * either "clockwise" or "anti-clockwise", and can be solved with
599      * a single scratch or two xchg.
600      */
601     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
602         /* "Clockwise" */
603         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
604             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
605             /* The data is now in the correct registers, now extend. */
606             tcg_out_movext1_new_src(s, i1, i1->dst);
607             tcg_out_movext1_new_src(s, i2, i2->dst);
608             tcg_out_movext1_new_src(s, i3, i3->dst);
609         } else {
610             tcg_debug_assert(scratch >= 0);
611             tcg_out_mov(s, i1->src_type, scratch, src1);
612             tcg_out_movext1(s, i3);
613             tcg_out_movext1(s, i2);
614             tcg_out_movext1_new_src(s, i1, scratch);
615         }
616     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
617         /* "Anti-clockwise" */
618         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
619             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
620             /* The data is now in the correct registers, now extend. */
621             tcg_out_movext1_new_src(s, i1, i1->dst);
622             tcg_out_movext1_new_src(s, i2, i2->dst);
623             tcg_out_movext1_new_src(s, i3, i3->dst);
624         } else {
625             tcg_debug_assert(scratch >= 0);
626             tcg_out_mov(s, i1->src_type, scratch, src1);
627             tcg_out_movext1(s, i2);
628             tcg_out_movext1(s, i3);
629             tcg_out_movext1_new_src(s, i1, scratch);
630         }
631     } else {
632         g_assert_not_reached();
633     }
634 }
635 
636 #define C_PFX1(P, A)                    P##A
637 #define C_PFX2(P, A, B)                 P##A##_##B
638 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
639 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
640 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
641 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
642 
643 /* Define an enumeration for the various combinations. */
644 
645 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
646 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
647 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
648 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
649 
650 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
651 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
652 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
653 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
654 
655 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
656 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
657 
658 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
659 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
660 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
661 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
662 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
663 
664 typedef enum {
665 #include "tcg-target-con-set.h"
666 } TCGConstraintSetIndex;
667 
668 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
669 
670 #undef C_O0_I1
671 #undef C_O0_I2
672 #undef C_O0_I3
673 #undef C_O0_I4
674 #undef C_O1_I1
675 #undef C_O1_I2
676 #undef C_O1_I3
677 #undef C_O1_I4
678 #undef C_N1_I2
679 #undef C_N2_I1
680 #undef C_O2_I1
681 #undef C_O2_I2
682 #undef C_O2_I3
683 #undef C_O2_I4
684 #undef C_N1_O1_I4
685 
686 /* Put all of the constraint sets into an array, indexed by the enum. */
687 
688 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
689 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
690 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
691 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
692 
693 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
694 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
695 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
696 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
697 
698 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
699 #define C_N2_I1(O1, O2, I1)             { .args_ct_str = { "&" #O1, "&" #O2, #I1 } },
700 
701 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
702 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
703 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
704 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
705 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
706 
707 static const TCGTargetOpDef constraint_sets[] = {
708 #include "tcg-target-con-set.h"
709 };
710 
711 
712 #undef C_O0_I1
713 #undef C_O0_I2
714 #undef C_O0_I3
715 #undef C_O0_I4
716 #undef C_O1_I1
717 #undef C_O1_I2
718 #undef C_O1_I3
719 #undef C_O1_I4
720 #undef C_N1_I2
721 #undef C_N2_I1
722 #undef C_O2_I1
723 #undef C_O2_I2
724 #undef C_O2_I3
725 #undef C_O2_I4
726 #undef C_N1_O1_I4
727 
728 /* Expand the enumerator to be returned from tcg_target_op_def(). */
729 
730 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
731 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
732 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
733 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
734 
735 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
736 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
737 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
738 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
739 
740 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
741 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
742 
743 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
744 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
745 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
746 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
747 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
748 
749 #include "tcg-target.c.inc"
750 
751 #ifndef CONFIG_TCG_INTERPRETER
752 /* Validate CPUTLBDescFast placement. */
753 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
754                         sizeof(CPUNegativeOffsetState))
755                   < MIN_TLB_MASK_TABLE_OFS);
756 #endif
757 
758 static void alloc_tcg_plugin_context(TCGContext *s)
759 {
760 #ifdef CONFIG_PLUGIN
761     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
762     s->plugin_tb->insns =
763         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
764 #endif
765 }
766 
767 /*
768  * All TCG threads except the parent (i.e. the one that called tcg_context_init
769  * and registered the target's TCG globals) must register with this function
770  * before initiating translation.
771  *
772  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
773  * of tcg_region_init() for the reasoning behind this.
774  *
775  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
776  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
777  * is not used anymore for translation once this function is called.
778  *
779  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
780  * iterates over the array (e.g. tcg_code_size() the same for both system/user
781  * modes.
782  */
783 #ifdef CONFIG_USER_ONLY
784 void tcg_register_thread(void)
785 {
786     tcg_ctx = &tcg_init_ctx;
787 }
788 #else
789 void tcg_register_thread(void)
790 {
791     TCGContext *s = g_malloc(sizeof(*s));
792     unsigned int i, n;
793 
794     *s = tcg_init_ctx;
795 
796     /* Relink mem_base.  */
797     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
798         if (tcg_init_ctx.temps[i].mem_base) {
799             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
800             tcg_debug_assert(b >= 0 && b < n);
801             s->temps[i].mem_base = &s->temps[b];
802         }
803     }
804 
805     /* Claim an entry in tcg_ctxs */
806     n = qatomic_fetch_inc(&tcg_cur_ctxs);
807     g_assert(n < tcg_max_ctxs);
808     qatomic_set(&tcg_ctxs[n], s);
809 
810     if (n > 0) {
811         alloc_tcg_plugin_context(s);
812         tcg_region_initial_alloc(s);
813     }
814 
815     tcg_ctx = s;
816 }
817 #endif /* !CONFIG_USER_ONLY */
818 
819 /* pool based memory allocation */
820 void *tcg_malloc_internal(TCGContext *s, int size)
821 {
822     TCGPool *p;
823     int pool_size;
824 
825     if (size > TCG_POOL_CHUNK_SIZE) {
826         /* big malloc: insert a new pool (XXX: could optimize) */
827         p = g_malloc(sizeof(TCGPool) + size);
828         p->size = size;
829         p->next = s->pool_first_large;
830         s->pool_first_large = p;
831         return p->data;
832     } else {
833         p = s->pool_current;
834         if (!p) {
835             p = s->pool_first;
836             if (!p)
837                 goto new_pool;
838         } else {
839             if (!p->next) {
840             new_pool:
841                 pool_size = TCG_POOL_CHUNK_SIZE;
842                 p = g_malloc(sizeof(TCGPool) + pool_size);
843                 p->size = pool_size;
844                 p->next = NULL;
845                 if (s->pool_current) {
846                     s->pool_current->next = p;
847                 } else {
848                     s->pool_first = p;
849                 }
850             } else {
851                 p = p->next;
852             }
853         }
854     }
855     s->pool_current = p;
856     s->pool_cur = p->data + size;
857     s->pool_end = p->data + p->size;
858     return p->data;
859 }
860 
861 void tcg_pool_reset(TCGContext *s)
862 {
863     TCGPool *p, *t;
864     for (p = s->pool_first_large; p; p = t) {
865         t = p->next;
866         g_free(p);
867     }
868     s->pool_first_large = NULL;
869     s->pool_cur = s->pool_end = NULL;
870     s->pool_current = NULL;
871 }
872 
873 /*
874  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
875  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
876  * We only use these for layout in tcg_out_ld_helper_ret and
877  * tcg_out_st_helper_args, and share them between several of
878  * the helpers, with the end result that it's easier to build manually.
879  */
880 
881 #if TCG_TARGET_REG_BITS == 32
882 # define dh_typecode_ttl  dh_typecode_i32
883 #else
884 # define dh_typecode_ttl  dh_typecode_i64
885 #endif
886 
887 static TCGHelperInfo info_helper_ld32_mmu = {
888     .flags = TCG_CALL_NO_WG,
889     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
890               | dh_typemask(env, 1)
891               | dh_typemask(i64, 2)  /* uint64_t addr */
892               | dh_typemask(i32, 3)  /* unsigned oi */
893               | dh_typemask(ptr, 4)  /* uintptr_t ra */
894 };
895 
896 static TCGHelperInfo info_helper_ld64_mmu = {
897     .flags = TCG_CALL_NO_WG,
898     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
899               | dh_typemask(env, 1)
900               | dh_typemask(i64, 2)  /* uint64_t addr */
901               | dh_typemask(i32, 3)  /* unsigned oi */
902               | dh_typemask(ptr, 4)  /* uintptr_t ra */
903 };
904 
905 static TCGHelperInfo info_helper_ld128_mmu = {
906     .flags = TCG_CALL_NO_WG,
907     .typemask = dh_typemask(i128, 0) /* return Int128 */
908               | dh_typemask(env, 1)
909               | dh_typemask(i64, 2)  /* uint64_t addr */
910               | dh_typemask(i32, 3)  /* unsigned oi */
911               | dh_typemask(ptr, 4)  /* uintptr_t ra */
912 };
913 
914 static TCGHelperInfo info_helper_st32_mmu = {
915     .flags = TCG_CALL_NO_WG,
916     .typemask = dh_typemask(void, 0)
917               | dh_typemask(env, 1)
918               | dh_typemask(i64, 2)  /* uint64_t addr */
919               | dh_typemask(i32, 3)  /* uint32_t data */
920               | dh_typemask(i32, 4)  /* unsigned oi */
921               | dh_typemask(ptr, 5)  /* uintptr_t ra */
922 };
923 
924 static TCGHelperInfo info_helper_st64_mmu = {
925     .flags = TCG_CALL_NO_WG,
926     .typemask = dh_typemask(void, 0)
927               | dh_typemask(env, 1)
928               | dh_typemask(i64, 2)  /* uint64_t addr */
929               | dh_typemask(i64, 3)  /* uint64_t data */
930               | dh_typemask(i32, 4)  /* unsigned oi */
931               | dh_typemask(ptr, 5)  /* uintptr_t ra */
932 };
933 
934 static TCGHelperInfo info_helper_st128_mmu = {
935     .flags = TCG_CALL_NO_WG,
936     .typemask = dh_typemask(void, 0)
937               | dh_typemask(env, 1)
938               | dh_typemask(i64, 2)  /* uint64_t addr */
939               | dh_typemask(i128, 3) /* Int128 data */
940               | dh_typemask(i32, 4)  /* unsigned oi */
941               | dh_typemask(ptr, 5)  /* uintptr_t ra */
942 };
943 
944 #ifdef CONFIG_TCG_INTERPRETER
945 static ffi_type *typecode_to_ffi(int argmask)
946 {
947     /*
948      * libffi does not support __int128_t, so we have forced Int128
949      * to use the structure definition instead of the builtin type.
950      */
951     static ffi_type *ffi_type_i128_elements[3] = {
952         &ffi_type_uint64,
953         &ffi_type_uint64,
954         NULL
955     };
956     static ffi_type ffi_type_i128 = {
957         .size = 16,
958         .alignment = __alignof__(Int128),
959         .type = FFI_TYPE_STRUCT,
960         .elements = ffi_type_i128_elements,
961     };
962 
963     switch (argmask) {
964     case dh_typecode_void:
965         return &ffi_type_void;
966     case dh_typecode_i32:
967         return &ffi_type_uint32;
968     case dh_typecode_s32:
969         return &ffi_type_sint32;
970     case dh_typecode_i64:
971         return &ffi_type_uint64;
972     case dh_typecode_s64:
973         return &ffi_type_sint64;
974     case dh_typecode_ptr:
975         return &ffi_type_pointer;
976     case dh_typecode_i128:
977         return &ffi_type_i128;
978     }
979     g_assert_not_reached();
980 }
981 
982 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
983 {
984     unsigned typemask = info->typemask;
985     struct {
986         ffi_cif cif;
987         ffi_type *args[];
988     } *ca;
989     ffi_status status;
990     int nargs;
991 
992     /* Ignoring the return type, find the last non-zero field. */
993     nargs = 32 - clz32(typemask >> 3);
994     nargs = DIV_ROUND_UP(nargs, 3);
995     assert(nargs <= MAX_CALL_IARGS);
996 
997     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
998     ca->cif.rtype = typecode_to_ffi(typemask & 7);
999     ca->cif.nargs = nargs;
1000 
1001     if (nargs != 0) {
1002         ca->cif.arg_types = ca->args;
1003         for (int j = 0; j < nargs; ++j) {
1004             int typecode = extract32(typemask, (j + 1) * 3, 3);
1005             ca->args[j] = typecode_to_ffi(typecode);
1006         }
1007     }
1008 
1009     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1010                           ca->cif.rtype, ca->cif.arg_types);
1011     assert(status == FFI_OK);
1012 
1013     return &ca->cif;
1014 }
1015 
1016 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1017 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1018 #else
1019 #define HELPER_INFO_INIT(I)      (&(I)->init)
1020 #define HELPER_INFO_INIT_VAL(I)  1
1021 #endif /* CONFIG_TCG_INTERPRETER */
1022 
1023 static inline bool arg_slot_reg_p(unsigned arg_slot)
1024 {
1025     /*
1026      * Split the sizeof away from the comparison to avoid Werror from
1027      * "unsigned < 0 is always false", when iarg_regs is empty.
1028      */
1029     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1030     return arg_slot < nreg;
1031 }
1032 
1033 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1034 {
1035     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1036     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1037 
1038     tcg_debug_assert(stk_slot < max);
1039     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1040 }
1041 
1042 typedef struct TCGCumulativeArgs {
1043     int arg_idx;                /* tcg_gen_callN args[] */
1044     int info_in_idx;            /* TCGHelperInfo in[] */
1045     int arg_slot;               /* regs+stack slot */
1046     int ref_slot;               /* stack slots for references */
1047 } TCGCumulativeArgs;
1048 
1049 static void layout_arg_even(TCGCumulativeArgs *cum)
1050 {
1051     cum->arg_slot += cum->arg_slot & 1;
1052 }
1053 
1054 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1055                          TCGCallArgumentKind kind)
1056 {
1057     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1058 
1059     *loc = (TCGCallArgumentLoc){
1060         .kind = kind,
1061         .arg_idx = cum->arg_idx,
1062         .arg_slot = cum->arg_slot,
1063     };
1064     cum->info_in_idx++;
1065     cum->arg_slot++;
1066 }
1067 
1068 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1069                                 TCGHelperInfo *info, int n)
1070 {
1071     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1072 
1073     for (int i = 0; i < n; ++i) {
1074         /* Layout all using the same arg_idx, adjusting the subindex. */
1075         loc[i] = (TCGCallArgumentLoc){
1076             .kind = TCG_CALL_ARG_NORMAL,
1077             .arg_idx = cum->arg_idx,
1078             .tmp_subindex = i,
1079             .arg_slot = cum->arg_slot + i,
1080         };
1081     }
1082     cum->info_in_idx += n;
1083     cum->arg_slot += n;
1084 }
1085 
1086 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1087 {
1088     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1089     int n = 128 / TCG_TARGET_REG_BITS;
1090 
1091     /* The first subindex carries the pointer. */
1092     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1093 
1094     /*
1095      * The callee is allowed to clobber memory associated with
1096      * structure pass by-reference.  Therefore we must make copies.
1097      * Allocate space from "ref_slot", which will be adjusted to
1098      * follow the parameters on the stack.
1099      */
1100     loc[0].ref_slot = cum->ref_slot;
1101 
1102     /*
1103      * Subsequent words also go into the reference slot, but
1104      * do not accumulate into the regular arguments.
1105      */
1106     for (int i = 1; i < n; ++i) {
1107         loc[i] = (TCGCallArgumentLoc){
1108             .kind = TCG_CALL_ARG_BY_REF_N,
1109             .arg_idx = cum->arg_idx,
1110             .tmp_subindex = i,
1111             .ref_slot = cum->ref_slot + i,
1112         };
1113     }
1114     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1115     cum->ref_slot += n;
1116 }
1117 
1118 static void init_call_layout(TCGHelperInfo *info)
1119 {
1120     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1121     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1122     unsigned typemask = info->typemask;
1123     unsigned typecode;
1124     TCGCumulativeArgs cum = { };
1125 
1126     /*
1127      * Parse and place any function return value.
1128      */
1129     typecode = typemask & 7;
1130     switch (typecode) {
1131     case dh_typecode_void:
1132         info->nr_out = 0;
1133         break;
1134     case dh_typecode_i32:
1135     case dh_typecode_s32:
1136     case dh_typecode_ptr:
1137         info->nr_out = 1;
1138         info->out_kind = TCG_CALL_RET_NORMAL;
1139         break;
1140     case dh_typecode_i64:
1141     case dh_typecode_s64:
1142         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1143         info->out_kind = TCG_CALL_RET_NORMAL;
1144         /* Query the last register now to trigger any assert early. */
1145         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1146         break;
1147     case dh_typecode_i128:
1148         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1149         info->out_kind = TCG_TARGET_CALL_RET_I128;
1150         switch (TCG_TARGET_CALL_RET_I128) {
1151         case TCG_CALL_RET_NORMAL:
1152             /* Query the last register now to trigger any assert early. */
1153             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1154             break;
1155         case TCG_CALL_RET_BY_VEC:
1156             /* Query the single register now to trigger any assert early. */
1157             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1158             break;
1159         case TCG_CALL_RET_BY_REF:
1160             /*
1161              * Allocate the first argument to the output.
1162              * We don't need to store this anywhere, just make it
1163              * unavailable for use in the input loop below.
1164              */
1165             cum.arg_slot = 1;
1166             break;
1167         default:
1168             qemu_build_not_reached();
1169         }
1170         break;
1171     default:
1172         g_assert_not_reached();
1173     }
1174 
1175     /*
1176      * Parse and place function arguments.
1177      */
1178     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1179         TCGCallArgumentKind kind;
1180         TCGType type;
1181 
1182         typecode = typemask & 7;
1183         switch (typecode) {
1184         case dh_typecode_i32:
1185         case dh_typecode_s32:
1186             type = TCG_TYPE_I32;
1187             break;
1188         case dh_typecode_i64:
1189         case dh_typecode_s64:
1190             type = TCG_TYPE_I64;
1191             break;
1192         case dh_typecode_ptr:
1193             type = TCG_TYPE_PTR;
1194             break;
1195         case dh_typecode_i128:
1196             type = TCG_TYPE_I128;
1197             break;
1198         default:
1199             g_assert_not_reached();
1200         }
1201 
1202         switch (type) {
1203         case TCG_TYPE_I32:
1204             switch (TCG_TARGET_CALL_ARG_I32) {
1205             case TCG_CALL_ARG_EVEN:
1206                 layout_arg_even(&cum);
1207                 /* fall through */
1208             case TCG_CALL_ARG_NORMAL:
1209                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1210                 break;
1211             case TCG_CALL_ARG_EXTEND:
1212                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1213                 layout_arg_1(&cum, info, kind);
1214                 break;
1215             default:
1216                 qemu_build_not_reached();
1217             }
1218             break;
1219 
1220         case TCG_TYPE_I64:
1221             switch (TCG_TARGET_CALL_ARG_I64) {
1222             case TCG_CALL_ARG_EVEN:
1223                 layout_arg_even(&cum);
1224                 /* fall through */
1225             case TCG_CALL_ARG_NORMAL:
1226                 if (TCG_TARGET_REG_BITS == 32) {
1227                     layout_arg_normal_n(&cum, info, 2);
1228                 } else {
1229                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1230                 }
1231                 break;
1232             default:
1233                 qemu_build_not_reached();
1234             }
1235             break;
1236 
1237         case TCG_TYPE_I128:
1238             switch (TCG_TARGET_CALL_ARG_I128) {
1239             case TCG_CALL_ARG_EVEN:
1240                 layout_arg_even(&cum);
1241                 /* fall through */
1242             case TCG_CALL_ARG_NORMAL:
1243                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1244                 break;
1245             case TCG_CALL_ARG_BY_REF:
1246                 layout_arg_by_ref(&cum, info);
1247                 break;
1248             default:
1249                 qemu_build_not_reached();
1250             }
1251             break;
1252 
1253         default:
1254             g_assert_not_reached();
1255         }
1256     }
1257     info->nr_in = cum.info_in_idx;
1258 
1259     /* Validate that we didn't overrun the input array. */
1260     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1261     /* Validate the backend has enough argument space. */
1262     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1263 
1264     /*
1265      * Relocate the "ref_slot" area to the end of the parameters.
1266      * Minimizing this stack offset helps code size for x86,
1267      * which has a signed 8-bit offset encoding.
1268      */
1269     if (cum.ref_slot != 0) {
1270         int ref_base = 0;
1271 
1272         if (cum.arg_slot > max_reg_slots) {
1273             int align = __alignof(Int128) / sizeof(tcg_target_long);
1274 
1275             ref_base = cum.arg_slot - max_reg_slots;
1276             if (align > 1) {
1277                 ref_base = ROUND_UP(ref_base, align);
1278             }
1279         }
1280         assert(ref_base + cum.ref_slot <= max_stk_slots);
1281         ref_base += max_reg_slots;
1282 
1283         if (ref_base != 0) {
1284             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1285                 TCGCallArgumentLoc *loc = &info->in[i];
1286                 switch (loc->kind) {
1287                 case TCG_CALL_ARG_BY_REF:
1288                 case TCG_CALL_ARG_BY_REF_N:
1289                     loc->ref_slot += ref_base;
1290                     break;
1291                 default:
1292                     break;
1293                 }
1294             }
1295         }
1296     }
1297 }
1298 
1299 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1300 static void process_op_defs(TCGContext *s);
1301 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1302                                             TCGReg reg, const char *name);
1303 
1304 static void tcg_context_init(unsigned max_cpus)
1305 {
1306     TCGContext *s = &tcg_init_ctx;
1307     int op, total_args, n, i;
1308     TCGOpDef *def;
1309     TCGArgConstraint *args_ct;
1310     TCGTemp *ts;
1311 
1312     memset(s, 0, sizeof(*s));
1313     s->nb_globals = 0;
1314 
1315     /* Count total number of arguments and allocate the corresponding
1316        space */
1317     total_args = 0;
1318     for(op = 0; op < NB_OPS; op++) {
1319         def = &tcg_op_defs[op];
1320         n = def->nb_iargs + def->nb_oargs;
1321         total_args += n;
1322     }
1323 
1324     args_ct = g_new0(TCGArgConstraint, total_args);
1325 
1326     for(op = 0; op < NB_OPS; op++) {
1327         def = &tcg_op_defs[op];
1328         def->args_ct = args_ct;
1329         n = def->nb_iargs + def->nb_oargs;
1330         args_ct += n;
1331     }
1332 
1333     init_call_layout(&info_helper_ld32_mmu);
1334     init_call_layout(&info_helper_ld64_mmu);
1335     init_call_layout(&info_helper_ld128_mmu);
1336     init_call_layout(&info_helper_st32_mmu);
1337     init_call_layout(&info_helper_st64_mmu);
1338     init_call_layout(&info_helper_st128_mmu);
1339 
1340     tcg_target_init(s);
1341     process_op_defs(s);
1342 
1343     /* Reverse the order of the saved registers, assuming they're all at
1344        the start of tcg_target_reg_alloc_order.  */
1345     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1346         int r = tcg_target_reg_alloc_order[n];
1347         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1348             break;
1349         }
1350     }
1351     for (i = 0; i < n; ++i) {
1352         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1353     }
1354     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1355         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1356     }
1357 
1358     alloc_tcg_plugin_context(s);
1359 
1360     tcg_ctx = s;
1361     /*
1362      * In user-mode we simply share the init context among threads, since we
1363      * use a single region. See the documentation tcg_region_init() for the
1364      * reasoning behind this.
1365      * In system-mode we will have at most max_cpus TCG threads.
1366      */
1367 #ifdef CONFIG_USER_ONLY
1368     tcg_ctxs = &tcg_ctx;
1369     tcg_cur_ctxs = 1;
1370     tcg_max_ctxs = 1;
1371 #else
1372     tcg_max_ctxs = max_cpus;
1373     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1374 #endif
1375 
1376     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1377     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1378     tcg_env = temp_tcgv_ptr(ts);
1379 }
1380 
1381 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1382 {
1383     tcg_context_init(max_cpus);
1384     tcg_region_init(tb_size, splitwx, max_cpus);
1385 }
1386 
1387 /*
1388  * Allocate TBs right before their corresponding translated code, making
1389  * sure that TBs and code are on different cache lines.
1390  */
1391 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1392 {
1393     uintptr_t align = qemu_icache_linesize;
1394     TranslationBlock *tb;
1395     void *next;
1396 
1397  retry:
1398     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1399     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1400 
1401     if (unlikely(next > s->code_gen_highwater)) {
1402         if (tcg_region_alloc(s)) {
1403             return NULL;
1404         }
1405         goto retry;
1406     }
1407     qatomic_set(&s->code_gen_ptr, next);
1408     s->data_gen_ptr = NULL;
1409     return tb;
1410 }
1411 
1412 void tcg_prologue_init(void)
1413 {
1414     TCGContext *s = tcg_ctx;
1415     size_t prologue_size;
1416 
1417     s->code_ptr = s->code_gen_ptr;
1418     s->code_buf = s->code_gen_ptr;
1419     s->data_gen_ptr = NULL;
1420 
1421 #ifndef CONFIG_TCG_INTERPRETER
1422     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1423 #endif
1424 
1425 #ifdef TCG_TARGET_NEED_POOL_LABELS
1426     s->pool_labels = NULL;
1427 #endif
1428 
1429     qemu_thread_jit_write();
1430     /* Generate the prologue.  */
1431     tcg_target_qemu_prologue(s);
1432 
1433 #ifdef TCG_TARGET_NEED_POOL_LABELS
1434     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1435     {
1436         int result = tcg_out_pool_finalize(s);
1437         tcg_debug_assert(result == 0);
1438     }
1439 #endif
1440 
1441     prologue_size = tcg_current_code_size(s);
1442     perf_report_prologue(s->code_gen_ptr, prologue_size);
1443 
1444 #ifndef CONFIG_TCG_INTERPRETER
1445     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1446                         (uintptr_t)s->code_buf, prologue_size);
1447 #endif
1448 
1449     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1450         FILE *logfile = qemu_log_trylock();
1451         if (logfile) {
1452             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1453             if (s->data_gen_ptr) {
1454                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1455                 size_t data_size = prologue_size - code_size;
1456                 size_t i;
1457 
1458                 disas(logfile, s->code_gen_ptr, code_size);
1459 
1460                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1461                     if (sizeof(tcg_target_ulong) == 8) {
1462                         fprintf(logfile,
1463                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1464                                 (uintptr_t)s->data_gen_ptr + i,
1465                                 *(uint64_t *)(s->data_gen_ptr + i));
1466                     } else {
1467                         fprintf(logfile,
1468                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1469                                 (uintptr_t)s->data_gen_ptr + i,
1470                                 *(uint32_t *)(s->data_gen_ptr + i));
1471                     }
1472                 }
1473             } else {
1474                 disas(logfile, s->code_gen_ptr, prologue_size);
1475             }
1476             fprintf(logfile, "\n");
1477             qemu_log_unlock(logfile);
1478         }
1479     }
1480 
1481 #ifndef CONFIG_TCG_INTERPRETER
1482     /*
1483      * Assert that goto_ptr is implemented completely, setting an epilogue.
1484      * For tci, we use NULL as the signal to return from the interpreter,
1485      * so skip this check.
1486      */
1487     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1488 #endif
1489 
1490     tcg_region_prologue_set(s);
1491 }
1492 
1493 void tcg_func_start(TCGContext *s)
1494 {
1495     tcg_pool_reset(s);
1496     s->nb_temps = s->nb_globals;
1497 
1498     /* No temps have been previously allocated for size or locality.  */
1499     memset(s->free_temps, 0, sizeof(s->free_temps));
1500 
1501     /* No constant temps have been previously allocated. */
1502     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1503         if (s->const_table[i]) {
1504             g_hash_table_remove_all(s->const_table[i]);
1505         }
1506     }
1507 
1508     s->nb_ops = 0;
1509     s->nb_labels = 0;
1510     s->current_frame_offset = s->frame_start;
1511 
1512 #ifdef CONFIG_DEBUG_TCG
1513     s->goto_tb_issue_mask = 0;
1514 #endif
1515 
1516     QTAILQ_INIT(&s->ops);
1517     QTAILQ_INIT(&s->free_ops);
1518     QSIMPLEQ_INIT(&s->labels);
1519 
1520     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1521                      s->addr_type == TCG_TYPE_I64);
1522 
1523     tcg_debug_assert(s->insn_start_words > 0);
1524 }
1525 
1526 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1527 {
1528     int n = s->nb_temps++;
1529 
1530     if (n >= TCG_MAX_TEMPS) {
1531         tcg_raise_tb_overflow(s);
1532     }
1533     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1534 }
1535 
1536 static TCGTemp *tcg_global_alloc(TCGContext *s)
1537 {
1538     TCGTemp *ts;
1539 
1540     tcg_debug_assert(s->nb_globals == s->nb_temps);
1541     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1542     s->nb_globals++;
1543     ts = tcg_temp_alloc(s);
1544     ts->kind = TEMP_GLOBAL;
1545 
1546     return ts;
1547 }
1548 
1549 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1550                                             TCGReg reg, const char *name)
1551 {
1552     TCGTemp *ts;
1553 
1554     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1555 
1556     ts = tcg_global_alloc(s);
1557     ts->base_type = type;
1558     ts->type = type;
1559     ts->kind = TEMP_FIXED;
1560     ts->reg = reg;
1561     ts->name = name;
1562     tcg_regset_set_reg(s->reserved_regs, reg);
1563 
1564     return ts;
1565 }
1566 
1567 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1568 {
1569     s->frame_start = start;
1570     s->frame_end = start + size;
1571     s->frame_temp
1572         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1573 }
1574 
1575 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1576                                             const char *name, TCGType type)
1577 {
1578     TCGContext *s = tcg_ctx;
1579     TCGTemp *base_ts = tcgv_ptr_temp(base);
1580     TCGTemp *ts = tcg_global_alloc(s);
1581     int indirect_reg = 0;
1582 
1583     switch (base_ts->kind) {
1584     case TEMP_FIXED:
1585         break;
1586     case TEMP_GLOBAL:
1587         /* We do not support double-indirect registers.  */
1588         tcg_debug_assert(!base_ts->indirect_reg);
1589         base_ts->indirect_base = 1;
1590         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1591                             ? 2 : 1);
1592         indirect_reg = 1;
1593         break;
1594     default:
1595         g_assert_not_reached();
1596     }
1597 
1598     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1599         TCGTemp *ts2 = tcg_global_alloc(s);
1600         char buf[64];
1601 
1602         ts->base_type = TCG_TYPE_I64;
1603         ts->type = TCG_TYPE_I32;
1604         ts->indirect_reg = indirect_reg;
1605         ts->mem_allocated = 1;
1606         ts->mem_base = base_ts;
1607         ts->mem_offset = offset;
1608         pstrcpy(buf, sizeof(buf), name);
1609         pstrcat(buf, sizeof(buf), "_0");
1610         ts->name = strdup(buf);
1611 
1612         tcg_debug_assert(ts2 == ts + 1);
1613         ts2->base_type = TCG_TYPE_I64;
1614         ts2->type = TCG_TYPE_I32;
1615         ts2->indirect_reg = indirect_reg;
1616         ts2->mem_allocated = 1;
1617         ts2->mem_base = base_ts;
1618         ts2->mem_offset = offset + 4;
1619         ts2->temp_subindex = 1;
1620         pstrcpy(buf, sizeof(buf), name);
1621         pstrcat(buf, sizeof(buf), "_1");
1622         ts2->name = strdup(buf);
1623     } else {
1624         ts->base_type = type;
1625         ts->type = type;
1626         ts->indirect_reg = indirect_reg;
1627         ts->mem_allocated = 1;
1628         ts->mem_base = base_ts;
1629         ts->mem_offset = offset;
1630         ts->name = name;
1631     }
1632     return ts;
1633 }
1634 
1635 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1636 {
1637     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1638     return temp_tcgv_i32(ts);
1639 }
1640 
1641 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1642 {
1643     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1644     return temp_tcgv_i64(ts);
1645 }
1646 
1647 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1648 {
1649     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1650     return temp_tcgv_ptr(ts);
1651 }
1652 
1653 static TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1654 {
1655     TCGContext *s = tcg_ctx;
1656     TCGTemp *ts;
1657     int n;
1658 
1659     if (kind == TEMP_EBB) {
1660         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1661 
1662         if (idx < TCG_MAX_TEMPS) {
1663             /* There is already an available temp with the right type.  */
1664             clear_bit(idx, s->free_temps[type].l);
1665 
1666             ts = &s->temps[idx];
1667             ts->temp_allocated = 1;
1668             tcg_debug_assert(ts->base_type == type);
1669             tcg_debug_assert(ts->kind == kind);
1670             return ts;
1671         }
1672     } else {
1673         tcg_debug_assert(kind == TEMP_TB);
1674     }
1675 
1676     switch (type) {
1677     case TCG_TYPE_I32:
1678     case TCG_TYPE_V64:
1679     case TCG_TYPE_V128:
1680     case TCG_TYPE_V256:
1681         n = 1;
1682         break;
1683     case TCG_TYPE_I64:
1684         n = 64 / TCG_TARGET_REG_BITS;
1685         break;
1686     case TCG_TYPE_I128:
1687         n = 128 / TCG_TARGET_REG_BITS;
1688         break;
1689     default:
1690         g_assert_not_reached();
1691     }
1692 
1693     ts = tcg_temp_alloc(s);
1694     ts->base_type = type;
1695     ts->temp_allocated = 1;
1696     ts->kind = kind;
1697 
1698     if (n == 1) {
1699         ts->type = type;
1700     } else {
1701         ts->type = TCG_TYPE_REG;
1702 
1703         for (int i = 1; i < n; ++i) {
1704             TCGTemp *ts2 = tcg_temp_alloc(s);
1705 
1706             tcg_debug_assert(ts2 == ts + i);
1707             ts2->base_type = type;
1708             ts2->type = TCG_TYPE_REG;
1709             ts2->temp_allocated = 1;
1710             ts2->temp_subindex = i;
1711             ts2->kind = kind;
1712         }
1713     }
1714     return ts;
1715 }
1716 
1717 TCGv_i32 tcg_temp_new_i32(void)
1718 {
1719     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1720 }
1721 
1722 TCGv_i32 tcg_temp_ebb_new_i32(void)
1723 {
1724     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1725 }
1726 
1727 TCGv_i64 tcg_temp_new_i64(void)
1728 {
1729     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1730 }
1731 
1732 TCGv_i64 tcg_temp_ebb_new_i64(void)
1733 {
1734     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1735 }
1736 
1737 TCGv_ptr tcg_temp_new_ptr(void)
1738 {
1739     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1740 }
1741 
1742 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1743 {
1744     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1745 }
1746 
1747 TCGv_i128 tcg_temp_new_i128(void)
1748 {
1749     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1750 }
1751 
1752 TCGv_i128 tcg_temp_ebb_new_i128(void)
1753 {
1754     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1755 }
1756 
1757 TCGv_vec tcg_temp_new_vec(TCGType type)
1758 {
1759     TCGTemp *t;
1760 
1761 #ifdef CONFIG_DEBUG_TCG
1762     switch (type) {
1763     case TCG_TYPE_V64:
1764         assert(TCG_TARGET_HAS_v64);
1765         break;
1766     case TCG_TYPE_V128:
1767         assert(TCG_TARGET_HAS_v128);
1768         break;
1769     case TCG_TYPE_V256:
1770         assert(TCG_TARGET_HAS_v256);
1771         break;
1772     default:
1773         g_assert_not_reached();
1774     }
1775 #endif
1776 
1777     t = tcg_temp_new_internal(type, TEMP_EBB);
1778     return temp_tcgv_vec(t);
1779 }
1780 
1781 /* Create a new temp of the same type as an existing temp.  */
1782 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1783 {
1784     TCGTemp *t = tcgv_vec_temp(match);
1785 
1786     tcg_debug_assert(t->temp_allocated != 0);
1787 
1788     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1789     return temp_tcgv_vec(t);
1790 }
1791 
1792 void tcg_temp_free_internal(TCGTemp *ts)
1793 {
1794     TCGContext *s = tcg_ctx;
1795 
1796     switch (ts->kind) {
1797     case TEMP_CONST:
1798     case TEMP_TB:
1799         /* Silently ignore free. */
1800         break;
1801     case TEMP_EBB:
1802         tcg_debug_assert(ts->temp_allocated != 0);
1803         ts->temp_allocated = 0;
1804         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1805         break;
1806     default:
1807         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1808         g_assert_not_reached();
1809     }
1810 }
1811 
1812 void tcg_temp_free_i32(TCGv_i32 arg)
1813 {
1814     tcg_temp_free_internal(tcgv_i32_temp(arg));
1815 }
1816 
1817 void tcg_temp_free_i64(TCGv_i64 arg)
1818 {
1819     tcg_temp_free_internal(tcgv_i64_temp(arg));
1820 }
1821 
1822 void tcg_temp_free_i128(TCGv_i128 arg)
1823 {
1824     tcg_temp_free_internal(tcgv_i128_temp(arg));
1825 }
1826 
1827 void tcg_temp_free_ptr(TCGv_ptr arg)
1828 {
1829     tcg_temp_free_internal(tcgv_ptr_temp(arg));
1830 }
1831 
1832 void tcg_temp_free_vec(TCGv_vec arg)
1833 {
1834     tcg_temp_free_internal(tcgv_vec_temp(arg));
1835 }
1836 
1837 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1838 {
1839     TCGContext *s = tcg_ctx;
1840     GHashTable *h = s->const_table[type];
1841     TCGTemp *ts;
1842 
1843     if (h == NULL) {
1844         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1845         s->const_table[type] = h;
1846     }
1847 
1848     ts = g_hash_table_lookup(h, &val);
1849     if (ts == NULL) {
1850         int64_t *val_ptr;
1851 
1852         ts = tcg_temp_alloc(s);
1853 
1854         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1855             TCGTemp *ts2 = tcg_temp_alloc(s);
1856 
1857             tcg_debug_assert(ts2 == ts + 1);
1858 
1859             ts->base_type = TCG_TYPE_I64;
1860             ts->type = TCG_TYPE_I32;
1861             ts->kind = TEMP_CONST;
1862             ts->temp_allocated = 1;
1863 
1864             ts2->base_type = TCG_TYPE_I64;
1865             ts2->type = TCG_TYPE_I32;
1866             ts2->kind = TEMP_CONST;
1867             ts2->temp_allocated = 1;
1868             ts2->temp_subindex = 1;
1869 
1870             /*
1871              * Retain the full value of the 64-bit constant in the low
1872              * part, so that the hash table works.  Actual uses will
1873              * truncate the value to the low part.
1874              */
1875             ts[HOST_BIG_ENDIAN].val = val;
1876             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1877             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1878         } else {
1879             ts->base_type = type;
1880             ts->type = type;
1881             ts->kind = TEMP_CONST;
1882             ts->temp_allocated = 1;
1883             ts->val = val;
1884             val_ptr = &ts->val;
1885         }
1886         g_hash_table_insert(h, val_ptr, ts);
1887     }
1888 
1889     return ts;
1890 }
1891 
1892 TCGv_i32 tcg_constant_i32(int32_t val)
1893 {
1894     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
1895 }
1896 
1897 TCGv_i64 tcg_constant_i64(int64_t val)
1898 {
1899     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
1900 }
1901 
1902 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
1903 {
1904     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
1905 }
1906 
1907 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1908 {
1909     val = dup_const(vece, val);
1910     return temp_tcgv_vec(tcg_constant_internal(type, val));
1911 }
1912 
1913 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1914 {
1915     TCGTemp *t = tcgv_vec_temp(match);
1916 
1917     tcg_debug_assert(t->temp_allocated != 0);
1918     return tcg_constant_vec(t->base_type, vece, val);
1919 }
1920 
1921 #ifdef CONFIG_DEBUG_TCG
1922 size_t temp_idx(TCGTemp *ts)
1923 {
1924     ptrdiff_t n = ts - tcg_ctx->temps;
1925     assert(n >= 0 && n < tcg_ctx->nb_temps);
1926     return n;
1927 }
1928 
1929 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1930 {
1931     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1932 
1933     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1934     assert(o % sizeof(TCGTemp) == 0);
1935 
1936     return (void *)tcg_ctx + (uintptr_t)v;
1937 }
1938 #endif /* CONFIG_DEBUG_TCG */
1939 
1940 /* Return true if OP may appear in the opcode stream.
1941    Test the runtime variable that controls each opcode.  */
1942 bool tcg_op_supported(TCGOpcode op)
1943 {
1944     const bool have_vec
1945         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1946 
1947     switch (op) {
1948     case INDEX_op_discard:
1949     case INDEX_op_set_label:
1950     case INDEX_op_call:
1951     case INDEX_op_br:
1952     case INDEX_op_mb:
1953     case INDEX_op_insn_start:
1954     case INDEX_op_exit_tb:
1955     case INDEX_op_goto_tb:
1956     case INDEX_op_goto_ptr:
1957     case INDEX_op_qemu_ld_a32_i32:
1958     case INDEX_op_qemu_ld_a64_i32:
1959     case INDEX_op_qemu_st_a32_i32:
1960     case INDEX_op_qemu_st_a64_i32:
1961     case INDEX_op_qemu_ld_a32_i64:
1962     case INDEX_op_qemu_ld_a64_i64:
1963     case INDEX_op_qemu_st_a32_i64:
1964     case INDEX_op_qemu_st_a64_i64:
1965         return true;
1966 
1967     case INDEX_op_qemu_st8_a32_i32:
1968     case INDEX_op_qemu_st8_a64_i32:
1969         return TCG_TARGET_HAS_qemu_st8_i32;
1970 
1971     case INDEX_op_qemu_ld_a32_i128:
1972     case INDEX_op_qemu_ld_a64_i128:
1973     case INDEX_op_qemu_st_a32_i128:
1974     case INDEX_op_qemu_st_a64_i128:
1975         return TCG_TARGET_HAS_qemu_ldst_i128;
1976 
1977     case INDEX_op_mov_i32:
1978     case INDEX_op_setcond_i32:
1979     case INDEX_op_brcond_i32:
1980     case INDEX_op_movcond_i32:
1981     case INDEX_op_ld8u_i32:
1982     case INDEX_op_ld8s_i32:
1983     case INDEX_op_ld16u_i32:
1984     case INDEX_op_ld16s_i32:
1985     case INDEX_op_ld_i32:
1986     case INDEX_op_st8_i32:
1987     case INDEX_op_st16_i32:
1988     case INDEX_op_st_i32:
1989     case INDEX_op_add_i32:
1990     case INDEX_op_sub_i32:
1991     case INDEX_op_mul_i32:
1992     case INDEX_op_and_i32:
1993     case INDEX_op_or_i32:
1994     case INDEX_op_xor_i32:
1995     case INDEX_op_shl_i32:
1996     case INDEX_op_shr_i32:
1997     case INDEX_op_sar_i32:
1998         return true;
1999 
2000     case INDEX_op_negsetcond_i32:
2001         return TCG_TARGET_HAS_negsetcond_i32;
2002     case INDEX_op_div_i32:
2003     case INDEX_op_divu_i32:
2004         return TCG_TARGET_HAS_div_i32;
2005     case INDEX_op_rem_i32:
2006     case INDEX_op_remu_i32:
2007         return TCG_TARGET_HAS_rem_i32;
2008     case INDEX_op_div2_i32:
2009     case INDEX_op_divu2_i32:
2010         return TCG_TARGET_HAS_div2_i32;
2011     case INDEX_op_rotl_i32:
2012     case INDEX_op_rotr_i32:
2013         return TCG_TARGET_HAS_rot_i32;
2014     case INDEX_op_deposit_i32:
2015         return TCG_TARGET_HAS_deposit_i32;
2016     case INDEX_op_extract_i32:
2017         return TCG_TARGET_HAS_extract_i32;
2018     case INDEX_op_sextract_i32:
2019         return TCG_TARGET_HAS_sextract_i32;
2020     case INDEX_op_extract2_i32:
2021         return TCG_TARGET_HAS_extract2_i32;
2022     case INDEX_op_add2_i32:
2023         return TCG_TARGET_HAS_add2_i32;
2024     case INDEX_op_sub2_i32:
2025         return TCG_TARGET_HAS_sub2_i32;
2026     case INDEX_op_mulu2_i32:
2027         return TCG_TARGET_HAS_mulu2_i32;
2028     case INDEX_op_muls2_i32:
2029         return TCG_TARGET_HAS_muls2_i32;
2030     case INDEX_op_muluh_i32:
2031         return TCG_TARGET_HAS_muluh_i32;
2032     case INDEX_op_mulsh_i32:
2033         return TCG_TARGET_HAS_mulsh_i32;
2034     case INDEX_op_ext8s_i32:
2035         return TCG_TARGET_HAS_ext8s_i32;
2036     case INDEX_op_ext16s_i32:
2037         return TCG_TARGET_HAS_ext16s_i32;
2038     case INDEX_op_ext8u_i32:
2039         return TCG_TARGET_HAS_ext8u_i32;
2040     case INDEX_op_ext16u_i32:
2041         return TCG_TARGET_HAS_ext16u_i32;
2042     case INDEX_op_bswap16_i32:
2043         return TCG_TARGET_HAS_bswap16_i32;
2044     case INDEX_op_bswap32_i32:
2045         return TCG_TARGET_HAS_bswap32_i32;
2046     case INDEX_op_not_i32:
2047         return TCG_TARGET_HAS_not_i32;
2048     case INDEX_op_neg_i32:
2049         return TCG_TARGET_HAS_neg_i32;
2050     case INDEX_op_andc_i32:
2051         return TCG_TARGET_HAS_andc_i32;
2052     case INDEX_op_orc_i32:
2053         return TCG_TARGET_HAS_orc_i32;
2054     case INDEX_op_eqv_i32:
2055         return TCG_TARGET_HAS_eqv_i32;
2056     case INDEX_op_nand_i32:
2057         return TCG_TARGET_HAS_nand_i32;
2058     case INDEX_op_nor_i32:
2059         return TCG_TARGET_HAS_nor_i32;
2060     case INDEX_op_clz_i32:
2061         return TCG_TARGET_HAS_clz_i32;
2062     case INDEX_op_ctz_i32:
2063         return TCG_TARGET_HAS_ctz_i32;
2064     case INDEX_op_ctpop_i32:
2065         return TCG_TARGET_HAS_ctpop_i32;
2066 
2067     case INDEX_op_brcond2_i32:
2068     case INDEX_op_setcond2_i32:
2069         return TCG_TARGET_REG_BITS == 32;
2070 
2071     case INDEX_op_mov_i64:
2072     case INDEX_op_setcond_i64:
2073     case INDEX_op_brcond_i64:
2074     case INDEX_op_movcond_i64:
2075     case INDEX_op_ld8u_i64:
2076     case INDEX_op_ld8s_i64:
2077     case INDEX_op_ld16u_i64:
2078     case INDEX_op_ld16s_i64:
2079     case INDEX_op_ld32u_i64:
2080     case INDEX_op_ld32s_i64:
2081     case INDEX_op_ld_i64:
2082     case INDEX_op_st8_i64:
2083     case INDEX_op_st16_i64:
2084     case INDEX_op_st32_i64:
2085     case INDEX_op_st_i64:
2086     case INDEX_op_add_i64:
2087     case INDEX_op_sub_i64:
2088     case INDEX_op_mul_i64:
2089     case INDEX_op_and_i64:
2090     case INDEX_op_or_i64:
2091     case INDEX_op_xor_i64:
2092     case INDEX_op_shl_i64:
2093     case INDEX_op_shr_i64:
2094     case INDEX_op_sar_i64:
2095     case INDEX_op_ext_i32_i64:
2096     case INDEX_op_extu_i32_i64:
2097         return TCG_TARGET_REG_BITS == 64;
2098 
2099     case INDEX_op_negsetcond_i64:
2100         return TCG_TARGET_HAS_negsetcond_i64;
2101     case INDEX_op_div_i64:
2102     case INDEX_op_divu_i64:
2103         return TCG_TARGET_HAS_div_i64;
2104     case INDEX_op_rem_i64:
2105     case INDEX_op_remu_i64:
2106         return TCG_TARGET_HAS_rem_i64;
2107     case INDEX_op_div2_i64:
2108     case INDEX_op_divu2_i64:
2109         return TCG_TARGET_HAS_div2_i64;
2110     case INDEX_op_rotl_i64:
2111     case INDEX_op_rotr_i64:
2112         return TCG_TARGET_HAS_rot_i64;
2113     case INDEX_op_deposit_i64:
2114         return TCG_TARGET_HAS_deposit_i64;
2115     case INDEX_op_extract_i64:
2116         return TCG_TARGET_HAS_extract_i64;
2117     case INDEX_op_sextract_i64:
2118         return TCG_TARGET_HAS_sextract_i64;
2119     case INDEX_op_extract2_i64:
2120         return TCG_TARGET_HAS_extract2_i64;
2121     case INDEX_op_extrl_i64_i32:
2122     case INDEX_op_extrh_i64_i32:
2123         return TCG_TARGET_HAS_extr_i64_i32;
2124     case INDEX_op_ext8s_i64:
2125         return TCG_TARGET_HAS_ext8s_i64;
2126     case INDEX_op_ext16s_i64:
2127         return TCG_TARGET_HAS_ext16s_i64;
2128     case INDEX_op_ext32s_i64:
2129         return TCG_TARGET_HAS_ext32s_i64;
2130     case INDEX_op_ext8u_i64:
2131         return TCG_TARGET_HAS_ext8u_i64;
2132     case INDEX_op_ext16u_i64:
2133         return TCG_TARGET_HAS_ext16u_i64;
2134     case INDEX_op_ext32u_i64:
2135         return TCG_TARGET_HAS_ext32u_i64;
2136     case INDEX_op_bswap16_i64:
2137         return TCG_TARGET_HAS_bswap16_i64;
2138     case INDEX_op_bswap32_i64:
2139         return TCG_TARGET_HAS_bswap32_i64;
2140     case INDEX_op_bswap64_i64:
2141         return TCG_TARGET_HAS_bswap64_i64;
2142     case INDEX_op_not_i64:
2143         return TCG_TARGET_HAS_not_i64;
2144     case INDEX_op_neg_i64:
2145         return TCG_TARGET_HAS_neg_i64;
2146     case INDEX_op_andc_i64:
2147         return TCG_TARGET_HAS_andc_i64;
2148     case INDEX_op_orc_i64:
2149         return TCG_TARGET_HAS_orc_i64;
2150     case INDEX_op_eqv_i64:
2151         return TCG_TARGET_HAS_eqv_i64;
2152     case INDEX_op_nand_i64:
2153         return TCG_TARGET_HAS_nand_i64;
2154     case INDEX_op_nor_i64:
2155         return TCG_TARGET_HAS_nor_i64;
2156     case INDEX_op_clz_i64:
2157         return TCG_TARGET_HAS_clz_i64;
2158     case INDEX_op_ctz_i64:
2159         return TCG_TARGET_HAS_ctz_i64;
2160     case INDEX_op_ctpop_i64:
2161         return TCG_TARGET_HAS_ctpop_i64;
2162     case INDEX_op_add2_i64:
2163         return TCG_TARGET_HAS_add2_i64;
2164     case INDEX_op_sub2_i64:
2165         return TCG_TARGET_HAS_sub2_i64;
2166     case INDEX_op_mulu2_i64:
2167         return TCG_TARGET_HAS_mulu2_i64;
2168     case INDEX_op_muls2_i64:
2169         return TCG_TARGET_HAS_muls2_i64;
2170     case INDEX_op_muluh_i64:
2171         return TCG_TARGET_HAS_muluh_i64;
2172     case INDEX_op_mulsh_i64:
2173         return TCG_TARGET_HAS_mulsh_i64;
2174 
2175     case INDEX_op_mov_vec:
2176     case INDEX_op_dup_vec:
2177     case INDEX_op_dupm_vec:
2178     case INDEX_op_ld_vec:
2179     case INDEX_op_st_vec:
2180     case INDEX_op_add_vec:
2181     case INDEX_op_sub_vec:
2182     case INDEX_op_and_vec:
2183     case INDEX_op_or_vec:
2184     case INDEX_op_xor_vec:
2185     case INDEX_op_cmp_vec:
2186         return have_vec;
2187     case INDEX_op_dup2_vec:
2188         return have_vec && TCG_TARGET_REG_BITS == 32;
2189     case INDEX_op_not_vec:
2190         return have_vec && TCG_TARGET_HAS_not_vec;
2191     case INDEX_op_neg_vec:
2192         return have_vec && TCG_TARGET_HAS_neg_vec;
2193     case INDEX_op_abs_vec:
2194         return have_vec && TCG_TARGET_HAS_abs_vec;
2195     case INDEX_op_andc_vec:
2196         return have_vec && TCG_TARGET_HAS_andc_vec;
2197     case INDEX_op_orc_vec:
2198         return have_vec && TCG_TARGET_HAS_orc_vec;
2199     case INDEX_op_nand_vec:
2200         return have_vec && TCG_TARGET_HAS_nand_vec;
2201     case INDEX_op_nor_vec:
2202         return have_vec && TCG_TARGET_HAS_nor_vec;
2203     case INDEX_op_eqv_vec:
2204         return have_vec && TCG_TARGET_HAS_eqv_vec;
2205     case INDEX_op_mul_vec:
2206         return have_vec && TCG_TARGET_HAS_mul_vec;
2207     case INDEX_op_shli_vec:
2208     case INDEX_op_shri_vec:
2209     case INDEX_op_sari_vec:
2210         return have_vec && TCG_TARGET_HAS_shi_vec;
2211     case INDEX_op_shls_vec:
2212     case INDEX_op_shrs_vec:
2213     case INDEX_op_sars_vec:
2214         return have_vec && TCG_TARGET_HAS_shs_vec;
2215     case INDEX_op_shlv_vec:
2216     case INDEX_op_shrv_vec:
2217     case INDEX_op_sarv_vec:
2218         return have_vec && TCG_TARGET_HAS_shv_vec;
2219     case INDEX_op_rotli_vec:
2220         return have_vec && TCG_TARGET_HAS_roti_vec;
2221     case INDEX_op_rotls_vec:
2222         return have_vec && TCG_TARGET_HAS_rots_vec;
2223     case INDEX_op_rotlv_vec:
2224     case INDEX_op_rotrv_vec:
2225         return have_vec && TCG_TARGET_HAS_rotv_vec;
2226     case INDEX_op_ssadd_vec:
2227     case INDEX_op_usadd_vec:
2228     case INDEX_op_sssub_vec:
2229     case INDEX_op_ussub_vec:
2230         return have_vec && TCG_TARGET_HAS_sat_vec;
2231     case INDEX_op_smin_vec:
2232     case INDEX_op_umin_vec:
2233     case INDEX_op_smax_vec:
2234     case INDEX_op_umax_vec:
2235         return have_vec && TCG_TARGET_HAS_minmax_vec;
2236     case INDEX_op_bitsel_vec:
2237         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2238     case INDEX_op_cmpsel_vec:
2239         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2240 
2241     default:
2242         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2243         return true;
2244     }
2245 }
2246 
2247 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2248 
2249 static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2250 {
2251     TCGv_i64 extend_free[MAX_CALL_IARGS];
2252     int n_extend = 0;
2253     TCGOp *op;
2254     int i, n, pi = 0, total_args;
2255 
2256     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2257         init_call_layout(info);
2258         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2259     }
2260 
2261     total_args = info->nr_out + info->nr_in + 2;
2262     op = tcg_op_alloc(INDEX_op_call, total_args);
2263 
2264 #ifdef CONFIG_PLUGIN
2265     /* Flag helpers that may affect guest state */
2266     if (tcg_ctx->plugin_insn &&
2267         !(info->flags & TCG_CALL_PLUGIN) &&
2268         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2269         tcg_ctx->plugin_insn->calls_helpers = true;
2270     }
2271 #endif
2272 
2273     TCGOP_CALLO(op) = n = info->nr_out;
2274     switch (n) {
2275     case 0:
2276         tcg_debug_assert(ret == NULL);
2277         break;
2278     case 1:
2279         tcg_debug_assert(ret != NULL);
2280         op->args[pi++] = temp_arg(ret);
2281         break;
2282     case 2:
2283     case 4:
2284         tcg_debug_assert(ret != NULL);
2285         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2286         tcg_debug_assert(ret->temp_subindex == 0);
2287         for (i = 0; i < n; ++i) {
2288             op->args[pi++] = temp_arg(ret + i);
2289         }
2290         break;
2291     default:
2292         g_assert_not_reached();
2293     }
2294 
2295     TCGOP_CALLI(op) = n = info->nr_in;
2296     for (i = 0; i < n; i++) {
2297         const TCGCallArgumentLoc *loc = &info->in[i];
2298         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2299 
2300         switch (loc->kind) {
2301         case TCG_CALL_ARG_NORMAL:
2302         case TCG_CALL_ARG_BY_REF:
2303         case TCG_CALL_ARG_BY_REF_N:
2304             op->args[pi++] = temp_arg(ts);
2305             break;
2306 
2307         case TCG_CALL_ARG_EXTEND_U:
2308         case TCG_CALL_ARG_EXTEND_S:
2309             {
2310                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2311                 TCGv_i32 orig = temp_tcgv_i32(ts);
2312 
2313                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2314                     tcg_gen_ext_i32_i64(temp, orig);
2315                 } else {
2316                     tcg_gen_extu_i32_i64(temp, orig);
2317                 }
2318                 op->args[pi++] = tcgv_i64_arg(temp);
2319                 extend_free[n_extend++] = temp;
2320             }
2321             break;
2322 
2323         default:
2324             g_assert_not_reached();
2325         }
2326     }
2327     op->args[pi++] = (uintptr_t)info->func;
2328     op->args[pi++] = (uintptr_t)info;
2329     tcg_debug_assert(pi == total_args);
2330 
2331     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2332 
2333     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2334     for (i = 0; i < n_extend; ++i) {
2335         tcg_temp_free_i64(extend_free[i]);
2336     }
2337 }
2338 
2339 void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2340 {
2341     tcg_gen_callN(info, ret, NULL);
2342 }
2343 
2344 void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2345 {
2346     tcg_gen_callN(info, ret, &t1);
2347 }
2348 
2349 void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2350 {
2351     TCGTemp *args[2] = { t1, t2 };
2352     tcg_gen_callN(info, ret, args);
2353 }
2354 
2355 void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2356                    TCGTemp *t2, TCGTemp *t3)
2357 {
2358     TCGTemp *args[3] = { t1, t2, t3 };
2359     tcg_gen_callN(info, ret, args);
2360 }
2361 
2362 void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2363                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2364 {
2365     TCGTemp *args[4] = { t1, t2, t3, t4 };
2366     tcg_gen_callN(info, ret, args);
2367 }
2368 
2369 void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2370                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2371 {
2372     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2373     tcg_gen_callN(info, ret, args);
2374 }
2375 
2376 void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2377                    TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2378 {
2379     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2380     tcg_gen_callN(info, ret, args);
2381 }
2382 
2383 void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2384                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2385                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2386 {
2387     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2388     tcg_gen_callN(info, ret, args);
2389 }
2390 
2391 static void tcg_reg_alloc_start(TCGContext *s)
2392 {
2393     int i, n;
2394 
2395     for (i = 0, n = s->nb_temps; i < n; i++) {
2396         TCGTemp *ts = &s->temps[i];
2397         TCGTempVal val = TEMP_VAL_MEM;
2398 
2399         switch (ts->kind) {
2400         case TEMP_CONST:
2401             val = TEMP_VAL_CONST;
2402             break;
2403         case TEMP_FIXED:
2404             val = TEMP_VAL_REG;
2405             break;
2406         case TEMP_GLOBAL:
2407             break;
2408         case TEMP_EBB:
2409             val = TEMP_VAL_DEAD;
2410             /* fall through */
2411         case TEMP_TB:
2412             ts->mem_allocated = 0;
2413             break;
2414         default:
2415             g_assert_not_reached();
2416         }
2417         ts->val_type = val;
2418     }
2419 
2420     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2421 }
2422 
2423 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2424                                  TCGTemp *ts)
2425 {
2426     int idx = temp_idx(ts);
2427 
2428     switch (ts->kind) {
2429     case TEMP_FIXED:
2430     case TEMP_GLOBAL:
2431         pstrcpy(buf, buf_size, ts->name);
2432         break;
2433     case TEMP_TB:
2434         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2435         break;
2436     case TEMP_EBB:
2437         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2438         break;
2439     case TEMP_CONST:
2440         switch (ts->type) {
2441         case TCG_TYPE_I32:
2442             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2443             break;
2444 #if TCG_TARGET_REG_BITS > 32
2445         case TCG_TYPE_I64:
2446             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2447             break;
2448 #endif
2449         case TCG_TYPE_V64:
2450         case TCG_TYPE_V128:
2451         case TCG_TYPE_V256:
2452             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2453                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2454             break;
2455         default:
2456             g_assert_not_reached();
2457         }
2458         break;
2459     }
2460     return buf;
2461 }
2462 
2463 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2464                              int buf_size, TCGArg arg)
2465 {
2466     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2467 }
2468 
2469 static const char * const cond_name[] =
2470 {
2471     [TCG_COND_NEVER] = "never",
2472     [TCG_COND_ALWAYS] = "always",
2473     [TCG_COND_EQ] = "eq",
2474     [TCG_COND_NE] = "ne",
2475     [TCG_COND_LT] = "lt",
2476     [TCG_COND_GE] = "ge",
2477     [TCG_COND_LE] = "le",
2478     [TCG_COND_GT] = "gt",
2479     [TCG_COND_LTU] = "ltu",
2480     [TCG_COND_GEU] = "geu",
2481     [TCG_COND_LEU] = "leu",
2482     [TCG_COND_GTU] = "gtu"
2483 };
2484 
2485 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2486 {
2487     [MO_UB]   = "ub",
2488     [MO_SB]   = "sb",
2489     [MO_LEUW] = "leuw",
2490     [MO_LESW] = "lesw",
2491     [MO_LEUL] = "leul",
2492     [MO_LESL] = "lesl",
2493     [MO_LEUQ] = "leq",
2494     [MO_BEUW] = "beuw",
2495     [MO_BESW] = "besw",
2496     [MO_BEUL] = "beul",
2497     [MO_BESL] = "besl",
2498     [MO_BEUQ] = "beq",
2499     [MO_128 + MO_BE] = "beo",
2500     [MO_128 + MO_LE] = "leo",
2501 };
2502 
2503 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2504     [MO_UNALN >> MO_ASHIFT]    = "un+",
2505     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2506     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2507     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2508     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2509     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2510     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2511     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2512 };
2513 
2514 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2515     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2516     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2517     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2518     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2519     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2520     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2521 };
2522 
2523 static const char bswap_flag_name[][6] = {
2524     [TCG_BSWAP_IZ] = "iz",
2525     [TCG_BSWAP_OZ] = "oz",
2526     [TCG_BSWAP_OS] = "os",
2527     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2528     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2529 };
2530 
2531 static inline bool tcg_regset_single(TCGRegSet d)
2532 {
2533     return (d & (d - 1)) == 0;
2534 }
2535 
2536 static inline TCGReg tcg_regset_first(TCGRegSet d)
2537 {
2538     if (TCG_TARGET_NB_REGS <= 32) {
2539         return ctz32(d);
2540     } else {
2541         return ctz64(d);
2542     }
2543 }
2544 
2545 /* Return only the number of characters output -- no error return. */
2546 #define ne_fprintf(...) \
2547     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2548 
2549 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2550 {
2551     char buf[128];
2552     TCGOp *op;
2553 
2554     QTAILQ_FOREACH(op, &s->ops, link) {
2555         int i, k, nb_oargs, nb_iargs, nb_cargs;
2556         const TCGOpDef *def;
2557         TCGOpcode c;
2558         int col = 0;
2559 
2560         c = op->opc;
2561         def = &tcg_op_defs[c];
2562 
2563         if (c == INDEX_op_insn_start) {
2564             nb_oargs = 0;
2565             col += ne_fprintf(f, "\n ----");
2566 
2567             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2568                 col += ne_fprintf(f, " %016" PRIx64,
2569                                   tcg_get_insn_start_param(op, i));
2570             }
2571         } else if (c == INDEX_op_call) {
2572             const TCGHelperInfo *info = tcg_call_info(op);
2573             void *func = tcg_call_func(op);
2574 
2575             /* variable number of arguments */
2576             nb_oargs = TCGOP_CALLO(op);
2577             nb_iargs = TCGOP_CALLI(op);
2578             nb_cargs = def->nb_cargs;
2579 
2580             col += ne_fprintf(f, " %s ", def->name);
2581 
2582             /*
2583              * Print the function name from TCGHelperInfo, if available.
2584              * Note that plugins have a template function for the info,
2585              * but the actual function pointer comes from the plugin.
2586              */
2587             if (func == info->func) {
2588                 col += ne_fprintf(f, "%s", info->name);
2589             } else {
2590                 col += ne_fprintf(f, "plugin(%p)", func);
2591             }
2592 
2593             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2594             for (i = 0; i < nb_oargs; i++) {
2595                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2596                                                             op->args[i]));
2597             }
2598             for (i = 0; i < nb_iargs; i++) {
2599                 TCGArg arg = op->args[nb_oargs + i];
2600                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2601                 col += ne_fprintf(f, ",%s", t);
2602             }
2603         } else {
2604             col += ne_fprintf(f, " %s ", def->name);
2605 
2606             nb_oargs = def->nb_oargs;
2607             nb_iargs = def->nb_iargs;
2608             nb_cargs = def->nb_cargs;
2609 
2610             if (def->flags & TCG_OPF_VECTOR) {
2611                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2612                                   8 << TCGOP_VECE(op));
2613             }
2614 
2615             k = 0;
2616             for (i = 0; i < nb_oargs; i++) {
2617                 const char *sep =  k ? "," : "";
2618                 col += ne_fprintf(f, "%s%s", sep,
2619                                   tcg_get_arg_str(s, buf, sizeof(buf),
2620                                                   op->args[k++]));
2621             }
2622             for (i = 0; i < nb_iargs; i++) {
2623                 const char *sep =  k ? "," : "";
2624                 col += ne_fprintf(f, "%s%s", sep,
2625                                   tcg_get_arg_str(s, buf, sizeof(buf),
2626                                                   op->args[k++]));
2627             }
2628             switch (c) {
2629             case INDEX_op_brcond_i32:
2630             case INDEX_op_setcond_i32:
2631             case INDEX_op_negsetcond_i32:
2632             case INDEX_op_movcond_i32:
2633             case INDEX_op_brcond2_i32:
2634             case INDEX_op_setcond2_i32:
2635             case INDEX_op_brcond_i64:
2636             case INDEX_op_setcond_i64:
2637             case INDEX_op_negsetcond_i64:
2638             case INDEX_op_movcond_i64:
2639             case INDEX_op_cmp_vec:
2640             case INDEX_op_cmpsel_vec:
2641                 if (op->args[k] < ARRAY_SIZE(cond_name)
2642                     && cond_name[op->args[k]]) {
2643                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2644                 } else {
2645                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2646                 }
2647                 i = 1;
2648                 break;
2649             case INDEX_op_qemu_ld_a32_i32:
2650             case INDEX_op_qemu_ld_a64_i32:
2651             case INDEX_op_qemu_st_a32_i32:
2652             case INDEX_op_qemu_st_a64_i32:
2653             case INDEX_op_qemu_st8_a32_i32:
2654             case INDEX_op_qemu_st8_a64_i32:
2655             case INDEX_op_qemu_ld_a32_i64:
2656             case INDEX_op_qemu_ld_a64_i64:
2657             case INDEX_op_qemu_st_a32_i64:
2658             case INDEX_op_qemu_st_a64_i64:
2659             case INDEX_op_qemu_ld_a32_i128:
2660             case INDEX_op_qemu_ld_a64_i128:
2661             case INDEX_op_qemu_st_a32_i128:
2662             case INDEX_op_qemu_st_a64_i128:
2663                 {
2664                     const char *s_al, *s_op, *s_at;
2665                     MemOpIdx oi = op->args[k++];
2666                     MemOp mop = get_memop(oi);
2667                     unsigned ix = get_mmuidx(oi);
2668 
2669                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2670                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2671                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2672                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2673 
2674                     /* If all fields are accounted for, print symbolically. */
2675                     if (!mop && s_al && s_op && s_at) {
2676                         col += ne_fprintf(f, ",%s%s%s,%u",
2677                                           s_at, s_al, s_op, ix);
2678                     } else {
2679                         mop = get_memop(oi);
2680                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2681                     }
2682                     i = 1;
2683                 }
2684                 break;
2685             case INDEX_op_bswap16_i32:
2686             case INDEX_op_bswap16_i64:
2687             case INDEX_op_bswap32_i32:
2688             case INDEX_op_bswap32_i64:
2689             case INDEX_op_bswap64_i64:
2690                 {
2691                     TCGArg flags = op->args[k];
2692                     const char *name = NULL;
2693 
2694                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2695                         name = bswap_flag_name[flags];
2696                     }
2697                     if (name) {
2698                         col += ne_fprintf(f, ",%s", name);
2699                     } else {
2700                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2701                     }
2702                     i = k = 1;
2703                 }
2704                 break;
2705             default:
2706                 i = 0;
2707                 break;
2708             }
2709             switch (c) {
2710             case INDEX_op_set_label:
2711             case INDEX_op_br:
2712             case INDEX_op_brcond_i32:
2713             case INDEX_op_brcond_i64:
2714             case INDEX_op_brcond2_i32:
2715                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2716                                   arg_label(op->args[k])->id);
2717                 i++, k++;
2718                 break;
2719             case INDEX_op_mb:
2720                 {
2721                     TCGBar membar = op->args[k];
2722                     const char *b_op, *m_op;
2723 
2724                     switch (membar & TCG_BAR_SC) {
2725                     case 0:
2726                         b_op = "none";
2727                         break;
2728                     case TCG_BAR_LDAQ:
2729                         b_op = "acq";
2730                         break;
2731                     case TCG_BAR_STRL:
2732                         b_op = "rel";
2733                         break;
2734                     case TCG_BAR_SC:
2735                         b_op = "seq";
2736                         break;
2737                     default:
2738                         g_assert_not_reached();
2739                     }
2740 
2741                     switch (membar & TCG_MO_ALL) {
2742                     case 0:
2743                         m_op = "none";
2744                         break;
2745                     case TCG_MO_LD_LD:
2746                         m_op = "rr";
2747                         break;
2748                     case TCG_MO_LD_ST:
2749                         m_op = "rw";
2750                         break;
2751                     case TCG_MO_ST_LD:
2752                         m_op = "wr";
2753                         break;
2754                     case TCG_MO_ST_ST:
2755                         m_op = "ww";
2756                         break;
2757                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2758                         m_op = "rr+rw";
2759                         break;
2760                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2761                         m_op = "rr+wr";
2762                         break;
2763                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2764                         m_op = "rr+ww";
2765                         break;
2766                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2767                         m_op = "rw+wr";
2768                         break;
2769                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2770                         m_op = "rw+ww";
2771                         break;
2772                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2773                         m_op = "wr+ww";
2774                         break;
2775                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2776                         m_op = "rr+rw+wr";
2777                         break;
2778                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2779                         m_op = "rr+rw+ww";
2780                         break;
2781                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2782                         m_op = "rr+wr+ww";
2783                         break;
2784                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2785                         m_op = "rw+wr+ww";
2786                         break;
2787                     case TCG_MO_ALL:
2788                         m_op = "all";
2789                         break;
2790                     default:
2791                         g_assert_not_reached();
2792                     }
2793 
2794                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2795                     i++, k++;
2796                 }
2797                 break;
2798             default:
2799                 break;
2800             }
2801             for (; i < nb_cargs; i++, k++) {
2802                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2803                                   op->args[k]);
2804             }
2805         }
2806 
2807         if (have_prefs || op->life) {
2808             for (; col < 40; ++col) {
2809                 putc(' ', f);
2810             }
2811         }
2812 
2813         if (op->life) {
2814             unsigned life = op->life;
2815 
2816             if (life & (SYNC_ARG * 3)) {
2817                 ne_fprintf(f, "  sync:");
2818                 for (i = 0; i < 2; ++i) {
2819                     if (life & (SYNC_ARG << i)) {
2820                         ne_fprintf(f, " %d", i);
2821                     }
2822                 }
2823             }
2824             life /= DEAD_ARG;
2825             if (life) {
2826                 ne_fprintf(f, "  dead:");
2827                 for (i = 0; life; ++i, life >>= 1) {
2828                     if (life & 1) {
2829                         ne_fprintf(f, " %d", i);
2830                     }
2831                 }
2832             }
2833         }
2834 
2835         if (have_prefs) {
2836             for (i = 0; i < nb_oargs; ++i) {
2837                 TCGRegSet set = output_pref(op, i);
2838 
2839                 if (i == 0) {
2840                     ne_fprintf(f, "  pref=");
2841                 } else {
2842                     ne_fprintf(f, ",");
2843                 }
2844                 if (set == 0) {
2845                     ne_fprintf(f, "none");
2846                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2847                     ne_fprintf(f, "all");
2848 #ifdef CONFIG_DEBUG_TCG
2849                 } else if (tcg_regset_single(set)) {
2850                     TCGReg reg = tcg_regset_first(set);
2851                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2852 #endif
2853                 } else if (TCG_TARGET_NB_REGS <= 32) {
2854                     ne_fprintf(f, "0x%x", (uint32_t)set);
2855                 } else {
2856                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2857                 }
2858             }
2859         }
2860 
2861         putc('\n', f);
2862     }
2863 }
2864 
2865 /* we give more priority to constraints with less registers */
2866 static int get_constraint_priority(const TCGOpDef *def, int k)
2867 {
2868     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2869     int n = ctpop64(arg_ct->regs);
2870 
2871     /*
2872      * Sort constraints of a single register first, which includes output
2873      * aliases (which must exactly match the input already allocated).
2874      */
2875     if (n == 1 || arg_ct->oalias) {
2876         return INT_MAX;
2877     }
2878 
2879     /*
2880      * Sort register pairs next, first then second immediately after.
2881      * Arbitrarily sort multiple pairs by the index of the first reg;
2882      * there shouldn't be many pairs.
2883      */
2884     switch (arg_ct->pair) {
2885     case 1:
2886     case 3:
2887         return (k + 1) * 2;
2888     case 2:
2889         return (arg_ct->pair_index + 1) * 2 - 1;
2890     }
2891 
2892     /* Finally, sort by decreasing register count. */
2893     assert(n > 1);
2894     return -n;
2895 }
2896 
2897 /* sort from highest priority to lowest */
2898 static void sort_constraints(TCGOpDef *def, int start, int n)
2899 {
2900     int i, j;
2901     TCGArgConstraint *a = def->args_ct;
2902 
2903     for (i = 0; i < n; i++) {
2904         a[start + i].sort_index = start + i;
2905     }
2906     if (n <= 1) {
2907         return;
2908     }
2909     for (i = 0; i < n - 1; i++) {
2910         for (j = i + 1; j < n; j++) {
2911             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2912             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2913             if (p1 < p2) {
2914                 int tmp = a[start + i].sort_index;
2915                 a[start + i].sort_index = a[start + j].sort_index;
2916                 a[start + j].sort_index = tmp;
2917             }
2918         }
2919     }
2920 }
2921 
2922 static void process_op_defs(TCGContext *s)
2923 {
2924     TCGOpcode op;
2925 
2926     for (op = 0; op < NB_OPS; op++) {
2927         TCGOpDef *def = &tcg_op_defs[op];
2928         const TCGTargetOpDef *tdefs;
2929         bool saw_alias_pair = false;
2930         int i, o, i2, o2, nb_args;
2931 
2932         if (def->flags & TCG_OPF_NOT_PRESENT) {
2933             continue;
2934         }
2935 
2936         nb_args = def->nb_iargs + def->nb_oargs;
2937         if (nb_args == 0) {
2938             continue;
2939         }
2940 
2941         /*
2942          * Macro magic should make it impossible, but double-check that
2943          * the array index is in range.  Since the signness of an enum
2944          * is implementation defined, force the result to unsigned.
2945          */
2946         unsigned con_set = tcg_target_op_def(op);
2947         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2948         tdefs = &constraint_sets[con_set];
2949 
2950         for (i = 0; i < nb_args; i++) {
2951             const char *ct_str = tdefs->args_ct_str[i];
2952             bool input_p = i >= def->nb_oargs;
2953 
2954             /* Incomplete TCGTargetOpDef entry. */
2955             tcg_debug_assert(ct_str != NULL);
2956 
2957             switch (*ct_str) {
2958             case '0' ... '9':
2959                 o = *ct_str - '0';
2960                 tcg_debug_assert(input_p);
2961                 tcg_debug_assert(o < def->nb_oargs);
2962                 tcg_debug_assert(def->args_ct[o].regs != 0);
2963                 tcg_debug_assert(!def->args_ct[o].oalias);
2964                 def->args_ct[i] = def->args_ct[o];
2965                 /* The output sets oalias.  */
2966                 def->args_ct[o].oalias = 1;
2967                 def->args_ct[o].alias_index = i;
2968                 /* The input sets ialias. */
2969                 def->args_ct[i].ialias = 1;
2970                 def->args_ct[i].alias_index = o;
2971                 if (def->args_ct[i].pair) {
2972                     saw_alias_pair = true;
2973                 }
2974                 tcg_debug_assert(ct_str[1] == '\0');
2975                 continue;
2976 
2977             case '&':
2978                 tcg_debug_assert(!input_p);
2979                 def->args_ct[i].newreg = true;
2980                 ct_str++;
2981                 break;
2982 
2983             case 'p': /* plus */
2984                 /* Allocate to the register after the previous. */
2985                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2986                 o = i - 1;
2987                 tcg_debug_assert(!def->args_ct[o].pair);
2988                 tcg_debug_assert(!def->args_ct[o].ct);
2989                 def->args_ct[i] = (TCGArgConstraint){
2990                     .pair = 2,
2991                     .pair_index = o,
2992                     .regs = def->args_ct[o].regs << 1,
2993                 };
2994                 def->args_ct[o].pair = 1;
2995                 def->args_ct[o].pair_index = i;
2996                 tcg_debug_assert(ct_str[1] == '\0');
2997                 continue;
2998 
2999             case 'm': /* minus */
3000                 /* Allocate to the register before the previous. */
3001                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
3002                 o = i - 1;
3003                 tcg_debug_assert(!def->args_ct[o].pair);
3004                 tcg_debug_assert(!def->args_ct[o].ct);
3005                 def->args_ct[i] = (TCGArgConstraint){
3006                     .pair = 1,
3007                     .pair_index = o,
3008                     .regs = def->args_ct[o].regs >> 1,
3009                 };
3010                 def->args_ct[o].pair = 2;
3011                 def->args_ct[o].pair_index = i;
3012                 tcg_debug_assert(ct_str[1] == '\0');
3013                 continue;
3014             }
3015 
3016             do {
3017                 switch (*ct_str) {
3018                 case 'i':
3019                     def->args_ct[i].ct |= TCG_CT_CONST;
3020                     break;
3021 
3022                 /* Include all of the target-specific constraints. */
3023 
3024 #undef CONST
3025 #define CONST(CASE, MASK) \
3026     case CASE: def->args_ct[i].ct |= MASK; break;
3027 #define REGS(CASE, MASK) \
3028     case CASE: def->args_ct[i].regs |= MASK; break;
3029 
3030 #include "tcg-target-con-str.h"
3031 
3032 #undef REGS
3033 #undef CONST
3034                 default:
3035                 case '0' ... '9':
3036                 case '&':
3037                 case 'p':
3038                 case 'm':
3039                     /* Typo in TCGTargetOpDef constraint. */
3040                     g_assert_not_reached();
3041                 }
3042             } while (*++ct_str != '\0');
3043         }
3044 
3045         /* TCGTargetOpDef entry with too much information? */
3046         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
3047 
3048         /*
3049          * Fix up output pairs that are aliased with inputs.
3050          * When we created the alias, we copied pair from the output.
3051          * There are three cases:
3052          *    (1a) Pairs of inputs alias pairs of outputs.
3053          *    (1b) One input aliases the first of a pair of outputs.
3054          *    (2)  One input aliases the second of a pair of outputs.
3055          *
3056          * Case 1a is handled by making sure that the pair_index'es are
3057          * properly updated so that they appear the same as a pair of inputs.
3058          *
3059          * Case 1b is handled by setting the pair_index of the input to
3060          * itself, simply so it doesn't point to an unrelated argument.
3061          * Since we don't encounter the "second" during the input allocation
3062          * phase, nothing happens with the second half of the input pair.
3063          *
3064          * Case 2 is handled by setting the second input to pair=3, the
3065          * first output to pair=3, and the pair_index'es to match.
3066          */
3067         if (saw_alias_pair) {
3068             for (i = def->nb_oargs; i < nb_args; i++) {
3069                 /*
3070                  * Since [0-9pm] must be alone in the constraint string,
3071                  * the only way they can both be set is if the pair comes
3072                  * from the output alias.
3073                  */
3074                 if (!def->args_ct[i].ialias) {
3075                     continue;
3076                 }
3077                 switch (def->args_ct[i].pair) {
3078                 case 0:
3079                     break;
3080                 case 1:
3081                     o = def->args_ct[i].alias_index;
3082                     o2 = def->args_ct[o].pair_index;
3083                     tcg_debug_assert(def->args_ct[o].pair == 1);
3084                     tcg_debug_assert(def->args_ct[o2].pair == 2);
3085                     if (def->args_ct[o2].oalias) {
3086                         /* Case 1a */
3087                         i2 = def->args_ct[o2].alias_index;
3088                         tcg_debug_assert(def->args_ct[i2].pair == 2);
3089                         def->args_ct[i2].pair_index = i;
3090                         def->args_ct[i].pair_index = i2;
3091                     } else {
3092                         /* Case 1b */
3093                         def->args_ct[i].pair_index = i;
3094                     }
3095                     break;
3096                 case 2:
3097                     o = def->args_ct[i].alias_index;
3098                     o2 = def->args_ct[o].pair_index;
3099                     tcg_debug_assert(def->args_ct[o].pair == 2);
3100                     tcg_debug_assert(def->args_ct[o2].pair == 1);
3101                     if (def->args_ct[o2].oalias) {
3102                         /* Case 1a */
3103                         i2 = def->args_ct[o2].alias_index;
3104                         tcg_debug_assert(def->args_ct[i2].pair == 1);
3105                         def->args_ct[i2].pair_index = i;
3106                         def->args_ct[i].pair_index = i2;
3107                     } else {
3108                         /* Case 2 */
3109                         def->args_ct[i].pair = 3;
3110                         def->args_ct[o2].pair = 3;
3111                         def->args_ct[i].pair_index = o2;
3112                         def->args_ct[o2].pair_index = i;
3113                     }
3114                     break;
3115                 default:
3116                     g_assert_not_reached();
3117                 }
3118             }
3119         }
3120 
3121         /* sort the constraints (XXX: this is just an heuristic) */
3122         sort_constraints(def, 0, def->nb_oargs);
3123         sort_constraints(def, def->nb_oargs, def->nb_iargs);
3124     }
3125 }
3126 
3127 static void remove_label_use(TCGOp *op, int idx)
3128 {
3129     TCGLabel *label = arg_label(op->args[idx]);
3130     TCGLabelUse *use;
3131 
3132     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3133         if (use->op == op) {
3134             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3135             return;
3136         }
3137     }
3138     g_assert_not_reached();
3139 }
3140 
3141 void tcg_op_remove(TCGContext *s, TCGOp *op)
3142 {
3143     switch (op->opc) {
3144     case INDEX_op_br:
3145         remove_label_use(op, 0);
3146         break;
3147     case INDEX_op_brcond_i32:
3148     case INDEX_op_brcond_i64:
3149         remove_label_use(op, 3);
3150         break;
3151     case INDEX_op_brcond2_i32:
3152         remove_label_use(op, 5);
3153         break;
3154     default:
3155         break;
3156     }
3157 
3158     QTAILQ_REMOVE(&s->ops, op, link);
3159     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3160     s->nb_ops--;
3161 }
3162 
3163 void tcg_remove_ops_after(TCGOp *op)
3164 {
3165     TCGContext *s = tcg_ctx;
3166 
3167     while (true) {
3168         TCGOp *last = tcg_last_op();
3169         if (last == op) {
3170             return;
3171         }
3172         tcg_op_remove(s, last);
3173     }
3174 }
3175 
3176 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3177 {
3178     TCGContext *s = tcg_ctx;
3179     TCGOp *op = NULL;
3180 
3181     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3182         QTAILQ_FOREACH(op, &s->free_ops, link) {
3183             if (nargs <= op->nargs) {
3184                 QTAILQ_REMOVE(&s->free_ops, op, link);
3185                 nargs = op->nargs;
3186                 goto found;
3187             }
3188         }
3189     }
3190 
3191     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3192     nargs = MAX(4, nargs);
3193     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3194 
3195  found:
3196     memset(op, 0, offsetof(TCGOp, link));
3197     op->opc = opc;
3198     op->nargs = nargs;
3199 
3200     /* Check for bitfield overflow. */
3201     tcg_debug_assert(op->nargs == nargs);
3202 
3203     s->nb_ops++;
3204     return op;
3205 }
3206 
3207 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3208 {
3209     TCGOp *op = tcg_op_alloc(opc, nargs);
3210     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3211     return op;
3212 }
3213 
3214 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3215                             TCGOpcode opc, unsigned nargs)
3216 {
3217     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3218     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3219     return new_op;
3220 }
3221 
3222 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3223                            TCGOpcode opc, unsigned nargs)
3224 {
3225     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3226     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3227     return new_op;
3228 }
3229 
3230 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3231 {
3232     TCGLabelUse *u;
3233 
3234     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3235         TCGOp *op = u->op;
3236         switch (op->opc) {
3237         case INDEX_op_br:
3238             op->args[0] = label_arg(to);
3239             break;
3240         case INDEX_op_brcond_i32:
3241         case INDEX_op_brcond_i64:
3242             op->args[3] = label_arg(to);
3243             break;
3244         case INDEX_op_brcond2_i32:
3245             op->args[5] = label_arg(to);
3246             break;
3247         default:
3248             g_assert_not_reached();
3249         }
3250     }
3251 
3252     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3253 }
3254 
3255 /* Reachable analysis : remove unreachable code.  */
3256 static void __attribute__((noinline))
3257 reachable_code_pass(TCGContext *s)
3258 {
3259     TCGOp *op, *op_next, *op_prev;
3260     bool dead = false;
3261 
3262     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3263         bool remove = dead;
3264         TCGLabel *label;
3265 
3266         switch (op->opc) {
3267         case INDEX_op_set_label:
3268             label = arg_label(op->args[0]);
3269 
3270             /*
3271              * Note that the first op in the TB is always a load,
3272              * so there is always something before a label.
3273              */
3274             op_prev = QTAILQ_PREV(op, link);
3275 
3276             /*
3277              * If we find two sequential labels, move all branches to
3278              * reference the second label and remove the first label.
3279              * Do this before branch to next optimization, so that the
3280              * middle label is out of the way.
3281              */
3282             if (op_prev->opc == INDEX_op_set_label) {
3283                 move_label_uses(label, arg_label(op_prev->args[0]));
3284                 tcg_op_remove(s, op_prev);
3285                 op_prev = QTAILQ_PREV(op, link);
3286             }
3287 
3288             /*
3289              * Optimization can fold conditional branches to unconditional.
3290              * If we find a label which is preceded by an unconditional
3291              * branch to next, remove the branch.  We couldn't do this when
3292              * processing the branch because any dead code between the branch
3293              * and label had not yet been removed.
3294              */
3295             if (op_prev->opc == INDEX_op_br &&
3296                 label == arg_label(op_prev->args[0])) {
3297                 tcg_op_remove(s, op_prev);
3298                 /* Fall through means insns become live again.  */
3299                 dead = false;
3300             }
3301 
3302             if (QSIMPLEQ_EMPTY(&label->branches)) {
3303                 /*
3304                  * While there is an occasional backward branch, virtually
3305                  * all branches generated by the translators are forward.
3306                  * Which means that generally we will have already removed
3307                  * all references to the label that will be, and there is
3308                  * little to be gained by iterating.
3309                  */
3310                 remove = true;
3311             } else {
3312                 /* Once we see a label, insns become live again.  */
3313                 dead = false;
3314                 remove = false;
3315             }
3316             break;
3317 
3318         case INDEX_op_br:
3319         case INDEX_op_exit_tb:
3320         case INDEX_op_goto_ptr:
3321             /* Unconditional branches; everything following is dead.  */
3322             dead = true;
3323             break;
3324 
3325         case INDEX_op_call:
3326             /* Notice noreturn helper calls, raising exceptions.  */
3327             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3328                 dead = true;
3329             }
3330             break;
3331 
3332         case INDEX_op_insn_start:
3333             /* Never remove -- we need to keep these for unwind.  */
3334             remove = false;
3335             break;
3336 
3337         default:
3338             break;
3339         }
3340 
3341         if (remove) {
3342             tcg_op_remove(s, op);
3343         }
3344     }
3345 }
3346 
3347 #define TS_DEAD  1
3348 #define TS_MEM   2
3349 
3350 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3351 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3352 
3353 /* For liveness_pass_1, the register preferences for a given temp.  */
3354 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3355 {
3356     return ts->state_ptr;
3357 }
3358 
3359 /* For liveness_pass_1, reset the preferences for a given temp to the
3360  * maximal regset for its type.
3361  */
3362 static inline void la_reset_pref(TCGTemp *ts)
3363 {
3364     *la_temp_pref(ts)
3365         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3366 }
3367 
3368 /* liveness analysis: end of function: all temps are dead, and globals
3369    should be in memory. */
3370 static void la_func_end(TCGContext *s, int ng, int nt)
3371 {
3372     int i;
3373 
3374     for (i = 0; i < ng; ++i) {
3375         s->temps[i].state = TS_DEAD | TS_MEM;
3376         la_reset_pref(&s->temps[i]);
3377     }
3378     for (i = ng; i < nt; ++i) {
3379         s->temps[i].state = TS_DEAD;
3380         la_reset_pref(&s->temps[i]);
3381     }
3382 }
3383 
3384 /* liveness analysis: end of basic block: all temps are dead, globals
3385    and local temps should be in memory. */
3386 static void la_bb_end(TCGContext *s, int ng, int nt)
3387 {
3388     int i;
3389 
3390     for (i = 0; i < nt; ++i) {
3391         TCGTemp *ts = &s->temps[i];
3392         int state;
3393 
3394         switch (ts->kind) {
3395         case TEMP_FIXED:
3396         case TEMP_GLOBAL:
3397         case TEMP_TB:
3398             state = TS_DEAD | TS_MEM;
3399             break;
3400         case TEMP_EBB:
3401         case TEMP_CONST:
3402             state = TS_DEAD;
3403             break;
3404         default:
3405             g_assert_not_reached();
3406         }
3407         ts->state = state;
3408         la_reset_pref(ts);
3409     }
3410 }
3411 
3412 /* liveness analysis: sync globals back to memory.  */
3413 static void la_global_sync(TCGContext *s, int ng)
3414 {
3415     int i;
3416 
3417     for (i = 0; i < ng; ++i) {
3418         int state = s->temps[i].state;
3419         s->temps[i].state = state | TS_MEM;
3420         if (state == TS_DEAD) {
3421             /* If the global was previously dead, reset prefs.  */
3422             la_reset_pref(&s->temps[i]);
3423         }
3424     }
3425 }
3426 
3427 /*
3428  * liveness analysis: conditional branch: all temps are dead unless
3429  * explicitly live-across-conditional-branch, globals and local temps
3430  * should be synced.
3431  */
3432 static void la_bb_sync(TCGContext *s, int ng, int nt)
3433 {
3434     la_global_sync(s, ng);
3435 
3436     for (int i = ng; i < nt; ++i) {
3437         TCGTemp *ts = &s->temps[i];
3438         int state;
3439 
3440         switch (ts->kind) {
3441         case TEMP_TB:
3442             state = ts->state;
3443             ts->state = state | TS_MEM;
3444             if (state != TS_DEAD) {
3445                 continue;
3446             }
3447             break;
3448         case TEMP_EBB:
3449         case TEMP_CONST:
3450             continue;
3451         default:
3452             g_assert_not_reached();
3453         }
3454         la_reset_pref(&s->temps[i]);
3455     }
3456 }
3457 
3458 /* liveness analysis: sync globals back to memory and kill.  */
3459 static void la_global_kill(TCGContext *s, int ng)
3460 {
3461     int i;
3462 
3463     for (i = 0; i < ng; i++) {
3464         s->temps[i].state = TS_DEAD | TS_MEM;
3465         la_reset_pref(&s->temps[i]);
3466     }
3467 }
3468 
3469 /* liveness analysis: note live globals crossing calls.  */
3470 static void la_cross_call(TCGContext *s, int nt)
3471 {
3472     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3473     int i;
3474 
3475     for (i = 0; i < nt; i++) {
3476         TCGTemp *ts = &s->temps[i];
3477         if (!(ts->state & TS_DEAD)) {
3478             TCGRegSet *pset = la_temp_pref(ts);
3479             TCGRegSet set = *pset;
3480 
3481             set &= mask;
3482             /* If the combination is not possible, restart.  */
3483             if (set == 0) {
3484                 set = tcg_target_available_regs[ts->type] & mask;
3485             }
3486             *pset = set;
3487         }
3488     }
3489 }
3490 
3491 /*
3492  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3493  * to TEMP_EBB, if possible.
3494  */
3495 static void __attribute__((noinline))
3496 liveness_pass_0(TCGContext *s)
3497 {
3498     void * const multiple_ebb = (void *)(uintptr_t)-1;
3499     int nb_temps = s->nb_temps;
3500     TCGOp *op, *ebb;
3501 
3502     for (int i = s->nb_globals; i < nb_temps; ++i) {
3503         s->temps[i].state_ptr = NULL;
3504     }
3505 
3506     /*
3507      * Represent each EBB by the op at which it begins.  In the case of
3508      * the first EBB, this is the first op, otherwise it is a label.
3509      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3510      * within a single EBB, else MULTIPLE_EBB.
3511      */
3512     ebb = QTAILQ_FIRST(&s->ops);
3513     QTAILQ_FOREACH(op, &s->ops, link) {
3514         const TCGOpDef *def;
3515         int nb_oargs, nb_iargs;
3516 
3517         switch (op->opc) {
3518         case INDEX_op_set_label:
3519             ebb = op;
3520             continue;
3521         case INDEX_op_discard:
3522             continue;
3523         case INDEX_op_call:
3524             nb_oargs = TCGOP_CALLO(op);
3525             nb_iargs = TCGOP_CALLI(op);
3526             break;
3527         default:
3528             def = &tcg_op_defs[op->opc];
3529             nb_oargs = def->nb_oargs;
3530             nb_iargs = def->nb_iargs;
3531             break;
3532         }
3533 
3534         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3535             TCGTemp *ts = arg_temp(op->args[i]);
3536 
3537             if (ts->kind != TEMP_TB) {
3538                 continue;
3539             }
3540             if (ts->state_ptr == NULL) {
3541                 ts->state_ptr = ebb;
3542             } else if (ts->state_ptr != ebb) {
3543                 ts->state_ptr = multiple_ebb;
3544             }
3545         }
3546     }
3547 
3548     /*
3549      * For TEMP_TB that turned out not to be used beyond one EBB,
3550      * reduce the liveness to TEMP_EBB.
3551      */
3552     for (int i = s->nb_globals; i < nb_temps; ++i) {
3553         TCGTemp *ts = &s->temps[i];
3554         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3555             ts->kind = TEMP_EBB;
3556         }
3557     }
3558 }
3559 
3560 /* Liveness analysis : update the opc_arg_life array to tell if a
3561    given input arguments is dead. Instructions updating dead
3562    temporaries are removed. */
3563 static void __attribute__((noinline))
3564 liveness_pass_1(TCGContext *s)
3565 {
3566     int nb_globals = s->nb_globals;
3567     int nb_temps = s->nb_temps;
3568     TCGOp *op, *op_prev;
3569     TCGRegSet *prefs;
3570     int i;
3571 
3572     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3573     for (i = 0; i < nb_temps; ++i) {
3574         s->temps[i].state_ptr = prefs + i;
3575     }
3576 
3577     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3578     la_func_end(s, nb_globals, nb_temps);
3579 
3580     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3581         int nb_iargs, nb_oargs;
3582         TCGOpcode opc_new, opc_new2;
3583         bool have_opc_new2;
3584         TCGLifeData arg_life = 0;
3585         TCGTemp *ts;
3586         TCGOpcode opc = op->opc;
3587         const TCGOpDef *def = &tcg_op_defs[opc];
3588 
3589         switch (opc) {
3590         case INDEX_op_call:
3591             {
3592                 const TCGHelperInfo *info = tcg_call_info(op);
3593                 int call_flags = tcg_call_flags(op);
3594 
3595                 nb_oargs = TCGOP_CALLO(op);
3596                 nb_iargs = TCGOP_CALLI(op);
3597 
3598                 /* pure functions can be removed if their result is unused */
3599                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3600                     for (i = 0; i < nb_oargs; i++) {
3601                         ts = arg_temp(op->args[i]);
3602                         if (ts->state != TS_DEAD) {
3603                             goto do_not_remove_call;
3604                         }
3605                     }
3606                     goto do_remove;
3607                 }
3608             do_not_remove_call:
3609 
3610                 /* Output args are dead.  */
3611                 for (i = 0; i < nb_oargs; i++) {
3612                     ts = arg_temp(op->args[i]);
3613                     if (ts->state & TS_DEAD) {
3614                         arg_life |= DEAD_ARG << i;
3615                     }
3616                     if (ts->state & TS_MEM) {
3617                         arg_life |= SYNC_ARG << i;
3618                     }
3619                     ts->state = TS_DEAD;
3620                     la_reset_pref(ts);
3621                 }
3622 
3623                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3624                 memset(op->output_pref, 0, sizeof(op->output_pref));
3625 
3626                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3627                                     TCG_CALL_NO_READ_GLOBALS))) {
3628                     la_global_kill(s, nb_globals);
3629                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3630                     la_global_sync(s, nb_globals);
3631                 }
3632 
3633                 /* Record arguments that die in this helper.  */
3634                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3635                     ts = arg_temp(op->args[i]);
3636                     if (ts->state & TS_DEAD) {
3637                         arg_life |= DEAD_ARG << i;
3638                     }
3639                 }
3640 
3641                 /* For all live registers, remove call-clobbered prefs.  */
3642                 la_cross_call(s, nb_temps);
3643 
3644                 /*
3645                  * Input arguments are live for preceding opcodes.
3646                  *
3647                  * For those arguments that die, and will be allocated in
3648                  * registers, clear the register set for that arg, to be
3649                  * filled in below.  For args that will be on the stack,
3650                  * reset to any available reg.  Process arguments in reverse
3651                  * order so that if a temp is used more than once, the stack
3652                  * reset to max happens before the register reset to 0.
3653                  */
3654                 for (i = nb_iargs - 1; i >= 0; i--) {
3655                     const TCGCallArgumentLoc *loc = &info->in[i];
3656                     ts = arg_temp(op->args[nb_oargs + i]);
3657 
3658                     if (ts->state & TS_DEAD) {
3659                         switch (loc->kind) {
3660                         case TCG_CALL_ARG_NORMAL:
3661                         case TCG_CALL_ARG_EXTEND_U:
3662                         case TCG_CALL_ARG_EXTEND_S:
3663                             if (arg_slot_reg_p(loc->arg_slot)) {
3664                                 *la_temp_pref(ts) = 0;
3665                                 break;
3666                             }
3667                             /* fall through */
3668                         default:
3669                             *la_temp_pref(ts) =
3670                                 tcg_target_available_regs[ts->type];
3671                             break;
3672                         }
3673                         ts->state &= ~TS_DEAD;
3674                     }
3675                 }
3676 
3677                 /*
3678                  * For each input argument, add its input register to prefs.
3679                  * If a temp is used once, this produces a single set bit;
3680                  * if a temp is used multiple times, this produces a set.
3681                  */
3682                 for (i = 0; i < nb_iargs; i++) {
3683                     const TCGCallArgumentLoc *loc = &info->in[i];
3684                     ts = arg_temp(op->args[nb_oargs + i]);
3685 
3686                     switch (loc->kind) {
3687                     case TCG_CALL_ARG_NORMAL:
3688                     case TCG_CALL_ARG_EXTEND_U:
3689                     case TCG_CALL_ARG_EXTEND_S:
3690                         if (arg_slot_reg_p(loc->arg_slot)) {
3691                             tcg_regset_set_reg(*la_temp_pref(ts),
3692                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3693                         }
3694                         break;
3695                     default:
3696                         break;
3697                     }
3698                 }
3699             }
3700             break;
3701         case INDEX_op_insn_start:
3702             break;
3703         case INDEX_op_discard:
3704             /* mark the temporary as dead */
3705             ts = arg_temp(op->args[0]);
3706             ts->state = TS_DEAD;
3707             la_reset_pref(ts);
3708             break;
3709 
3710         case INDEX_op_add2_i32:
3711             opc_new = INDEX_op_add_i32;
3712             goto do_addsub2;
3713         case INDEX_op_sub2_i32:
3714             opc_new = INDEX_op_sub_i32;
3715             goto do_addsub2;
3716         case INDEX_op_add2_i64:
3717             opc_new = INDEX_op_add_i64;
3718             goto do_addsub2;
3719         case INDEX_op_sub2_i64:
3720             opc_new = INDEX_op_sub_i64;
3721         do_addsub2:
3722             nb_iargs = 4;
3723             nb_oargs = 2;
3724             /* Test if the high part of the operation is dead, but not
3725                the low part.  The result can be optimized to a simple
3726                add or sub.  This happens often for x86_64 guest when the
3727                cpu mode is set to 32 bit.  */
3728             if (arg_temp(op->args[1])->state == TS_DEAD) {
3729                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3730                     goto do_remove;
3731                 }
3732                 /* Replace the opcode and adjust the args in place,
3733                    leaving 3 unused args at the end.  */
3734                 op->opc = opc = opc_new;
3735                 op->args[1] = op->args[2];
3736                 op->args[2] = op->args[4];
3737                 /* Fall through and mark the single-word operation live.  */
3738                 nb_iargs = 2;
3739                 nb_oargs = 1;
3740             }
3741             goto do_not_remove;
3742 
3743         case INDEX_op_mulu2_i32:
3744             opc_new = INDEX_op_mul_i32;
3745             opc_new2 = INDEX_op_muluh_i32;
3746             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3747             goto do_mul2;
3748         case INDEX_op_muls2_i32:
3749             opc_new = INDEX_op_mul_i32;
3750             opc_new2 = INDEX_op_mulsh_i32;
3751             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3752             goto do_mul2;
3753         case INDEX_op_mulu2_i64:
3754             opc_new = INDEX_op_mul_i64;
3755             opc_new2 = INDEX_op_muluh_i64;
3756             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3757             goto do_mul2;
3758         case INDEX_op_muls2_i64:
3759             opc_new = INDEX_op_mul_i64;
3760             opc_new2 = INDEX_op_mulsh_i64;
3761             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3762             goto do_mul2;
3763         do_mul2:
3764             nb_iargs = 2;
3765             nb_oargs = 2;
3766             if (arg_temp(op->args[1])->state == TS_DEAD) {
3767                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3768                     /* Both parts of the operation are dead.  */
3769                     goto do_remove;
3770                 }
3771                 /* The high part of the operation is dead; generate the low. */
3772                 op->opc = opc = opc_new;
3773                 op->args[1] = op->args[2];
3774                 op->args[2] = op->args[3];
3775             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3776                 /* The low part of the operation is dead; generate the high. */
3777                 op->opc = opc = opc_new2;
3778                 op->args[0] = op->args[1];
3779                 op->args[1] = op->args[2];
3780                 op->args[2] = op->args[3];
3781             } else {
3782                 goto do_not_remove;
3783             }
3784             /* Mark the single-word operation live.  */
3785             nb_oargs = 1;
3786             goto do_not_remove;
3787 
3788         default:
3789             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3790             nb_iargs = def->nb_iargs;
3791             nb_oargs = def->nb_oargs;
3792 
3793             /* Test if the operation can be removed because all
3794                its outputs are dead. We assume that nb_oargs == 0
3795                implies side effects */
3796             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3797                 for (i = 0; i < nb_oargs; i++) {
3798                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3799                         goto do_not_remove;
3800                     }
3801                 }
3802                 goto do_remove;
3803             }
3804             goto do_not_remove;
3805 
3806         do_remove:
3807             tcg_op_remove(s, op);
3808             break;
3809 
3810         do_not_remove:
3811             for (i = 0; i < nb_oargs; i++) {
3812                 ts = arg_temp(op->args[i]);
3813 
3814                 /* Remember the preference of the uses that followed.  */
3815                 if (i < ARRAY_SIZE(op->output_pref)) {
3816                     op->output_pref[i] = *la_temp_pref(ts);
3817                 }
3818 
3819                 /* Output args are dead.  */
3820                 if (ts->state & TS_DEAD) {
3821                     arg_life |= DEAD_ARG << i;
3822                 }
3823                 if (ts->state & TS_MEM) {
3824                     arg_life |= SYNC_ARG << i;
3825                 }
3826                 ts->state = TS_DEAD;
3827                 la_reset_pref(ts);
3828             }
3829 
3830             /* If end of basic block, update.  */
3831             if (def->flags & TCG_OPF_BB_EXIT) {
3832                 la_func_end(s, nb_globals, nb_temps);
3833             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3834                 la_bb_sync(s, nb_globals, nb_temps);
3835             } else if (def->flags & TCG_OPF_BB_END) {
3836                 la_bb_end(s, nb_globals, nb_temps);
3837             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3838                 la_global_sync(s, nb_globals);
3839                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3840                     la_cross_call(s, nb_temps);
3841                 }
3842             }
3843 
3844             /* Record arguments that die in this opcode.  */
3845             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3846                 ts = arg_temp(op->args[i]);
3847                 if (ts->state & TS_DEAD) {
3848                     arg_life |= DEAD_ARG << i;
3849                 }
3850             }
3851 
3852             /* Input arguments are live for preceding opcodes.  */
3853             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3854                 ts = arg_temp(op->args[i]);
3855                 if (ts->state & TS_DEAD) {
3856                     /* For operands that were dead, initially allow
3857                        all regs for the type.  */
3858                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3859                     ts->state &= ~TS_DEAD;
3860                 }
3861             }
3862 
3863             /* Incorporate constraints for this operand.  */
3864             switch (opc) {
3865             case INDEX_op_mov_i32:
3866             case INDEX_op_mov_i64:
3867                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3868                    have proper constraints.  That said, special case
3869                    moves to propagate preferences backward.  */
3870                 if (IS_DEAD_ARG(1)) {
3871                     *la_temp_pref(arg_temp(op->args[0]))
3872                         = *la_temp_pref(arg_temp(op->args[1]));
3873                 }
3874                 break;
3875 
3876             default:
3877                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3878                     const TCGArgConstraint *ct = &def->args_ct[i];
3879                     TCGRegSet set, *pset;
3880 
3881                     ts = arg_temp(op->args[i]);
3882                     pset = la_temp_pref(ts);
3883                     set = *pset;
3884 
3885                     set &= ct->regs;
3886                     if (ct->ialias) {
3887                         set &= output_pref(op, ct->alias_index);
3888                     }
3889                     /* If the combination is not possible, restart.  */
3890                     if (set == 0) {
3891                         set = ct->regs;
3892                     }
3893                     *pset = set;
3894                 }
3895                 break;
3896             }
3897             break;
3898         }
3899         op->life = arg_life;
3900     }
3901 }
3902 
3903 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3904 static bool __attribute__((noinline))
3905 liveness_pass_2(TCGContext *s)
3906 {
3907     int nb_globals = s->nb_globals;
3908     int nb_temps, i;
3909     bool changes = false;
3910     TCGOp *op, *op_next;
3911 
3912     /* Create a temporary for each indirect global.  */
3913     for (i = 0; i < nb_globals; ++i) {
3914         TCGTemp *its = &s->temps[i];
3915         if (its->indirect_reg) {
3916             TCGTemp *dts = tcg_temp_alloc(s);
3917             dts->type = its->type;
3918             dts->base_type = its->base_type;
3919             dts->temp_subindex = its->temp_subindex;
3920             dts->kind = TEMP_EBB;
3921             its->state_ptr = dts;
3922         } else {
3923             its->state_ptr = NULL;
3924         }
3925         /* All globals begin dead.  */
3926         its->state = TS_DEAD;
3927     }
3928     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3929         TCGTemp *its = &s->temps[i];
3930         its->state_ptr = NULL;
3931         its->state = TS_DEAD;
3932     }
3933 
3934     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3935         TCGOpcode opc = op->opc;
3936         const TCGOpDef *def = &tcg_op_defs[opc];
3937         TCGLifeData arg_life = op->life;
3938         int nb_iargs, nb_oargs, call_flags;
3939         TCGTemp *arg_ts, *dir_ts;
3940 
3941         if (opc == INDEX_op_call) {
3942             nb_oargs = TCGOP_CALLO(op);
3943             nb_iargs = TCGOP_CALLI(op);
3944             call_flags = tcg_call_flags(op);
3945         } else {
3946             nb_iargs = def->nb_iargs;
3947             nb_oargs = def->nb_oargs;
3948 
3949             /* Set flags similar to how calls require.  */
3950             if (def->flags & TCG_OPF_COND_BRANCH) {
3951                 /* Like reading globals: sync_globals */
3952                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3953             } else if (def->flags & TCG_OPF_BB_END) {
3954                 /* Like writing globals: save_globals */
3955                 call_flags = 0;
3956             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3957                 /* Like reading globals: sync_globals */
3958                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3959             } else {
3960                 /* No effect on globals.  */
3961                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3962                               TCG_CALL_NO_WRITE_GLOBALS);
3963             }
3964         }
3965 
3966         /* Make sure that input arguments are available.  */
3967         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3968             arg_ts = arg_temp(op->args[i]);
3969             dir_ts = arg_ts->state_ptr;
3970             if (dir_ts && arg_ts->state == TS_DEAD) {
3971                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3972                                   ? INDEX_op_ld_i32
3973                                   : INDEX_op_ld_i64);
3974                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3975 
3976                 lop->args[0] = temp_arg(dir_ts);
3977                 lop->args[1] = temp_arg(arg_ts->mem_base);
3978                 lop->args[2] = arg_ts->mem_offset;
3979 
3980                 /* Loaded, but synced with memory.  */
3981                 arg_ts->state = TS_MEM;
3982             }
3983         }
3984 
3985         /* Perform input replacement, and mark inputs that became dead.
3986            No action is required except keeping temp_state up to date
3987            so that we reload when needed.  */
3988         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3989             arg_ts = arg_temp(op->args[i]);
3990             dir_ts = arg_ts->state_ptr;
3991             if (dir_ts) {
3992                 op->args[i] = temp_arg(dir_ts);
3993                 changes = true;
3994                 if (IS_DEAD_ARG(i)) {
3995                     arg_ts->state = TS_DEAD;
3996                 }
3997             }
3998         }
3999 
4000         /* Liveness analysis should ensure that the following are
4001            all correct, for call sites and basic block end points.  */
4002         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4003             /* Nothing to do */
4004         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4005             for (i = 0; i < nb_globals; ++i) {
4006                 /* Liveness should see that globals are synced back,
4007                    that is, either TS_DEAD or TS_MEM.  */
4008                 arg_ts = &s->temps[i];
4009                 tcg_debug_assert(arg_ts->state_ptr == 0
4010                                  || arg_ts->state != 0);
4011             }
4012         } else {
4013             for (i = 0; i < nb_globals; ++i) {
4014                 /* Liveness should see that globals are saved back,
4015                    that is, TS_DEAD, waiting to be reloaded.  */
4016                 arg_ts = &s->temps[i];
4017                 tcg_debug_assert(arg_ts->state_ptr == 0
4018                                  || arg_ts->state == TS_DEAD);
4019             }
4020         }
4021 
4022         /* Outputs become available.  */
4023         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
4024             arg_ts = arg_temp(op->args[0]);
4025             dir_ts = arg_ts->state_ptr;
4026             if (dir_ts) {
4027                 op->args[0] = temp_arg(dir_ts);
4028                 changes = true;
4029 
4030                 /* The output is now live and modified.  */
4031                 arg_ts->state = 0;
4032 
4033                 if (NEED_SYNC_ARG(0)) {
4034                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4035                                       ? INDEX_op_st_i32
4036                                       : INDEX_op_st_i64);
4037                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4038                     TCGTemp *out_ts = dir_ts;
4039 
4040                     if (IS_DEAD_ARG(0)) {
4041                         out_ts = arg_temp(op->args[1]);
4042                         arg_ts->state = TS_DEAD;
4043                         tcg_op_remove(s, op);
4044                     } else {
4045                         arg_ts->state = TS_MEM;
4046                     }
4047 
4048                     sop->args[0] = temp_arg(out_ts);
4049                     sop->args[1] = temp_arg(arg_ts->mem_base);
4050                     sop->args[2] = arg_ts->mem_offset;
4051                 } else {
4052                     tcg_debug_assert(!IS_DEAD_ARG(0));
4053                 }
4054             }
4055         } else {
4056             for (i = 0; i < nb_oargs; i++) {
4057                 arg_ts = arg_temp(op->args[i]);
4058                 dir_ts = arg_ts->state_ptr;
4059                 if (!dir_ts) {
4060                     continue;
4061                 }
4062                 op->args[i] = temp_arg(dir_ts);
4063                 changes = true;
4064 
4065                 /* The output is now live and modified.  */
4066                 arg_ts->state = 0;
4067 
4068                 /* Sync outputs upon their last write.  */
4069                 if (NEED_SYNC_ARG(i)) {
4070                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4071                                       ? INDEX_op_st_i32
4072                                       : INDEX_op_st_i64);
4073                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4074 
4075                     sop->args[0] = temp_arg(dir_ts);
4076                     sop->args[1] = temp_arg(arg_ts->mem_base);
4077                     sop->args[2] = arg_ts->mem_offset;
4078 
4079                     arg_ts->state = TS_MEM;
4080                 }
4081                 /* Drop outputs that are dead.  */
4082                 if (IS_DEAD_ARG(i)) {
4083                     arg_ts->state = TS_DEAD;
4084                 }
4085             }
4086         }
4087     }
4088 
4089     return changes;
4090 }
4091 
4092 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4093 {
4094     intptr_t off;
4095     int size, align;
4096 
4097     /* When allocating an object, look at the full type. */
4098     size = tcg_type_size(ts->base_type);
4099     switch (ts->base_type) {
4100     case TCG_TYPE_I32:
4101         align = 4;
4102         break;
4103     case TCG_TYPE_I64:
4104     case TCG_TYPE_V64:
4105         align = 8;
4106         break;
4107     case TCG_TYPE_I128:
4108     case TCG_TYPE_V128:
4109     case TCG_TYPE_V256:
4110         /*
4111          * Note that we do not require aligned storage for V256,
4112          * and that we provide alignment for I128 to match V128,
4113          * even if that's above what the host ABI requires.
4114          */
4115         align = 16;
4116         break;
4117     default:
4118         g_assert_not_reached();
4119     }
4120 
4121     /*
4122      * Assume the stack is sufficiently aligned.
4123      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4124      * and do not require 16 byte vector alignment.  This seems slightly
4125      * easier than fully parameterizing the above switch statement.
4126      */
4127     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4128     off = ROUND_UP(s->current_frame_offset, align);
4129 
4130     /* If we've exhausted the stack frame, restart with a smaller TB. */
4131     if (off + size > s->frame_end) {
4132         tcg_raise_tb_overflow(s);
4133     }
4134     s->current_frame_offset = off + size;
4135 #if defined(__sparc__)
4136     off += TCG_TARGET_STACK_BIAS;
4137 #endif
4138 
4139     /* If the object was subdivided, assign memory to all the parts. */
4140     if (ts->base_type != ts->type) {
4141         int part_size = tcg_type_size(ts->type);
4142         int part_count = size / part_size;
4143 
4144         /*
4145          * Each part is allocated sequentially in tcg_temp_new_internal.
4146          * Jump back to the first part by subtracting the current index.
4147          */
4148         ts -= ts->temp_subindex;
4149         for (int i = 0; i < part_count; ++i) {
4150             ts[i].mem_offset = off + i * part_size;
4151             ts[i].mem_base = s->frame_temp;
4152             ts[i].mem_allocated = 1;
4153         }
4154     } else {
4155         ts->mem_offset = off;
4156         ts->mem_base = s->frame_temp;
4157         ts->mem_allocated = 1;
4158     }
4159 }
4160 
4161 /* Assign @reg to @ts, and update reg_to_temp[]. */
4162 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4163 {
4164     if (ts->val_type == TEMP_VAL_REG) {
4165         TCGReg old = ts->reg;
4166         tcg_debug_assert(s->reg_to_temp[old] == ts);
4167         if (old == reg) {
4168             return;
4169         }
4170         s->reg_to_temp[old] = NULL;
4171     }
4172     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4173     s->reg_to_temp[reg] = ts;
4174     ts->val_type = TEMP_VAL_REG;
4175     ts->reg = reg;
4176 }
4177 
4178 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4179 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4180 {
4181     tcg_debug_assert(type != TEMP_VAL_REG);
4182     if (ts->val_type == TEMP_VAL_REG) {
4183         TCGReg reg = ts->reg;
4184         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4185         s->reg_to_temp[reg] = NULL;
4186     }
4187     ts->val_type = type;
4188 }
4189 
4190 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4191 
4192 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4193    mark it free; otherwise mark it dead.  */
4194 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4195 {
4196     TCGTempVal new_type;
4197 
4198     switch (ts->kind) {
4199     case TEMP_FIXED:
4200         return;
4201     case TEMP_GLOBAL:
4202     case TEMP_TB:
4203         new_type = TEMP_VAL_MEM;
4204         break;
4205     case TEMP_EBB:
4206         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4207         break;
4208     case TEMP_CONST:
4209         new_type = TEMP_VAL_CONST;
4210         break;
4211     default:
4212         g_assert_not_reached();
4213     }
4214     set_temp_val_nonreg(s, ts, new_type);
4215 }
4216 
4217 /* Mark a temporary as dead.  */
4218 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4219 {
4220     temp_free_or_dead(s, ts, 1);
4221 }
4222 
4223 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4224    registers needs to be allocated to store a constant.  If 'free_or_dead'
4225    is non-zero, subsequently release the temporary; if it is positive, the
4226    temp is dead; if it is negative, the temp is free.  */
4227 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4228                       TCGRegSet preferred_regs, int free_or_dead)
4229 {
4230     if (!temp_readonly(ts) && !ts->mem_coherent) {
4231         if (!ts->mem_allocated) {
4232             temp_allocate_frame(s, ts);
4233         }
4234         switch (ts->val_type) {
4235         case TEMP_VAL_CONST:
4236             /* If we're going to free the temp immediately, then we won't
4237                require it later in a register, so attempt to store the
4238                constant to memory directly.  */
4239             if (free_or_dead
4240                 && tcg_out_sti(s, ts->type, ts->val,
4241                                ts->mem_base->reg, ts->mem_offset)) {
4242                 break;
4243             }
4244             temp_load(s, ts, tcg_target_available_regs[ts->type],
4245                       allocated_regs, preferred_regs);
4246             /* fallthrough */
4247 
4248         case TEMP_VAL_REG:
4249             tcg_out_st(s, ts->type, ts->reg,
4250                        ts->mem_base->reg, ts->mem_offset);
4251             break;
4252 
4253         case TEMP_VAL_MEM:
4254             break;
4255 
4256         case TEMP_VAL_DEAD:
4257         default:
4258             g_assert_not_reached();
4259         }
4260         ts->mem_coherent = 1;
4261     }
4262     if (free_or_dead) {
4263         temp_free_or_dead(s, ts, free_or_dead);
4264     }
4265 }
4266 
4267 /* free register 'reg' by spilling the corresponding temporary if necessary */
4268 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4269 {
4270     TCGTemp *ts = s->reg_to_temp[reg];
4271     if (ts != NULL) {
4272         temp_sync(s, ts, allocated_regs, 0, -1);
4273     }
4274 }
4275 
4276 /**
4277  * tcg_reg_alloc:
4278  * @required_regs: Set of registers in which we must allocate.
4279  * @allocated_regs: Set of registers which must be avoided.
4280  * @preferred_regs: Set of registers we should prefer.
4281  * @rev: True if we search the registers in "indirect" order.
4282  *
4283  * The allocated register must be in @required_regs & ~@allocated_regs,
4284  * but if we can put it in @preferred_regs we may save a move later.
4285  */
4286 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4287                             TCGRegSet allocated_regs,
4288                             TCGRegSet preferred_regs, bool rev)
4289 {
4290     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4291     TCGRegSet reg_ct[2];
4292     const int *order;
4293 
4294     reg_ct[1] = required_regs & ~allocated_regs;
4295     tcg_debug_assert(reg_ct[1] != 0);
4296     reg_ct[0] = reg_ct[1] & preferred_regs;
4297 
4298     /* Skip the preferred_regs option if it cannot be satisfied,
4299        or if the preference made no difference.  */
4300     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4301 
4302     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4303 
4304     /* Try free registers, preferences first.  */
4305     for (j = f; j < 2; j++) {
4306         TCGRegSet set = reg_ct[j];
4307 
4308         if (tcg_regset_single(set)) {
4309             /* One register in the set.  */
4310             TCGReg reg = tcg_regset_first(set);
4311             if (s->reg_to_temp[reg] == NULL) {
4312                 return reg;
4313             }
4314         } else {
4315             for (i = 0; i < n; i++) {
4316                 TCGReg reg = order[i];
4317                 if (s->reg_to_temp[reg] == NULL &&
4318                     tcg_regset_test_reg(set, reg)) {
4319                     return reg;
4320                 }
4321             }
4322         }
4323     }
4324 
4325     /* We must spill something.  */
4326     for (j = f; j < 2; j++) {
4327         TCGRegSet set = reg_ct[j];
4328 
4329         if (tcg_regset_single(set)) {
4330             /* One register in the set.  */
4331             TCGReg reg = tcg_regset_first(set);
4332             tcg_reg_free(s, reg, allocated_regs);
4333             return reg;
4334         } else {
4335             for (i = 0; i < n; i++) {
4336                 TCGReg reg = order[i];
4337                 if (tcg_regset_test_reg(set, reg)) {
4338                     tcg_reg_free(s, reg, allocated_regs);
4339                     return reg;
4340                 }
4341             }
4342         }
4343     }
4344 
4345     g_assert_not_reached();
4346 }
4347 
4348 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4349                                  TCGRegSet allocated_regs,
4350                                  TCGRegSet preferred_regs, bool rev)
4351 {
4352     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4353     TCGRegSet reg_ct[2];
4354     const int *order;
4355 
4356     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4357     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4358     tcg_debug_assert(reg_ct[1] != 0);
4359     reg_ct[0] = reg_ct[1] & preferred_regs;
4360 
4361     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4362 
4363     /*
4364      * Skip the preferred_regs option if it cannot be satisfied,
4365      * or if the preference made no difference.
4366      */
4367     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4368 
4369     /*
4370      * Minimize the number of flushes by looking for 2 free registers first,
4371      * then a single flush, then two flushes.
4372      */
4373     for (fmin = 2; fmin >= 0; fmin--) {
4374         for (j = k; j < 2; j++) {
4375             TCGRegSet set = reg_ct[j];
4376 
4377             for (i = 0; i < n; i++) {
4378                 TCGReg reg = order[i];
4379 
4380                 if (tcg_regset_test_reg(set, reg)) {
4381                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4382                     if (f >= fmin) {
4383                         tcg_reg_free(s, reg, allocated_regs);
4384                         tcg_reg_free(s, reg + 1, allocated_regs);
4385                         return reg;
4386                     }
4387                 }
4388             }
4389         }
4390     }
4391     g_assert_not_reached();
4392 }
4393 
4394 /* Make sure the temporary is in a register.  If needed, allocate the register
4395    from DESIRED while avoiding ALLOCATED.  */
4396 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4397                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4398 {
4399     TCGReg reg;
4400 
4401     switch (ts->val_type) {
4402     case TEMP_VAL_REG:
4403         return;
4404     case TEMP_VAL_CONST:
4405         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4406                             preferred_regs, ts->indirect_base);
4407         if (ts->type <= TCG_TYPE_I64) {
4408             tcg_out_movi(s, ts->type, reg, ts->val);
4409         } else {
4410             uint64_t val = ts->val;
4411             MemOp vece = MO_64;
4412 
4413             /*
4414              * Find the minimal vector element that matches the constant.
4415              * The targets will, in general, have to do this search anyway,
4416              * do this generically.
4417              */
4418             if (val == dup_const(MO_8, val)) {
4419                 vece = MO_8;
4420             } else if (val == dup_const(MO_16, val)) {
4421                 vece = MO_16;
4422             } else if (val == dup_const(MO_32, val)) {
4423                 vece = MO_32;
4424             }
4425 
4426             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4427         }
4428         ts->mem_coherent = 0;
4429         break;
4430     case TEMP_VAL_MEM:
4431         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4432                             preferred_regs, ts->indirect_base);
4433         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4434         ts->mem_coherent = 1;
4435         break;
4436     case TEMP_VAL_DEAD:
4437     default:
4438         g_assert_not_reached();
4439     }
4440     set_temp_val_reg(s, ts, reg);
4441 }
4442 
4443 /* Save a temporary to memory. 'allocated_regs' is used in case a
4444    temporary registers needs to be allocated to store a constant.  */
4445 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4446 {
4447     /* The liveness analysis already ensures that globals are back
4448        in memory. Keep an tcg_debug_assert for safety. */
4449     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4450 }
4451 
4452 /* save globals to their canonical location and assume they can be
4453    modified be the following code. 'allocated_regs' is used in case a
4454    temporary registers needs to be allocated to store a constant. */
4455 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4456 {
4457     int i, n;
4458 
4459     for (i = 0, n = s->nb_globals; i < n; i++) {
4460         temp_save(s, &s->temps[i], allocated_regs);
4461     }
4462 }
4463 
4464 /* sync globals to their canonical location and assume they can be
4465    read by the following code. 'allocated_regs' is used in case a
4466    temporary registers needs to be allocated to store a constant. */
4467 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4468 {
4469     int i, n;
4470 
4471     for (i = 0, n = s->nb_globals; i < n; i++) {
4472         TCGTemp *ts = &s->temps[i];
4473         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4474                          || ts->kind == TEMP_FIXED
4475                          || ts->mem_coherent);
4476     }
4477 }
4478 
4479 /* at the end of a basic block, we assume all temporaries are dead and
4480    all globals are stored at their canonical location. */
4481 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4482 {
4483     int i;
4484 
4485     for (i = s->nb_globals; i < s->nb_temps; i++) {
4486         TCGTemp *ts = &s->temps[i];
4487 
4488         switch (ts->kind) {
4489         case TEMP_TB:
4490             temp_save(s, ts, allocated_regs);
4491             break;
4492         case TEMP_EBB:
4493             /* The liveness analysis already ensures that temps are dead.
4494                Keep an tcg_debug_assert for safety. */
4495             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4496             break;
4497         case TEMP_CONST:
4498             /* Similarly, we should have freed any allocated register. */
4499             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4500             break;
4501         default:
4502             g_assert_not_reached();
4503         }
4504     }
4505 
4506     save_globals(s, allocated_regs);
4507 }
4508 
4509 /*
4510  * At a conditional branch, we assume all temporaries are dead unless
4511  * explicitly live-across-conditional-branch; all globals and local
4512  * temps are synced to their location.
4513  */
4514 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4515 {
4516     sync_globals(s, allocated_regs);
4517 
4518     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4519         TCGTemp *ts = &s->temps[i];
4520         /*
4521          * The liveness analysis already ensures that temps are dead.
4522          * Keep tcg_debug_asserts for safety.
4523          */
4524         switch (ts->kind) {
4525         case TEMP_TB:
4526             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4527             break;
4528         case TEMP_EBB:
4529         case TEMP_CONST:
4530             break;
4531         default:
4532             g_assert_not_reached();
4533         }
4534     }
4535 }
4536 
4537 /*
4538  * Specialized code generation for INDEX_op_mov_* with a constant.
4539  */
4540 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4541                                   tcg_target_ulong val, TCGLifeData arg_life,
4542                                   TCGRegSet preferred_regs)
4543 {
4544     /* ENV should not be modified.  */
4545     tcg_debug_assert(!temp_readonly(ots));
4546 
4547     /* The movi is not explicitly generated here.  */
4548     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4549     ots->val = val;
4550     ots->mem_coherent = 0;
4551     if (NEED_SYNC_ARG(0)) {
4552         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4553     } else if (IS_DEAD_ARG(0)) {
4554         temp_dead(s, ots);
4555     }
4556 }
4557 
4558 /*
4559  * Specialized code generation for INDEX_op_mov_*.
4560  */
4561 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4562 {
4563     const TCGLifeData arg_life = op->life;
4564     TCGRegSet allocated_regs, preferred_regs;
4565     TCGTemp *ts, *ots;
4566     TCGType otype, itype;
4567     TCGReg oreg, ireg;
4568 
4569     allocated_regs = s->reserved_regs;
4570     preferred_regs = output_pref(op, 0);
4571     ots = arg_temp(op->args[0]);
4572     ts = arg_temp(op->args[1]);
4573 
4574     /* ENV should not be modified.  */
4575     tcg_debug_assert(!temp_readonly(ots));
4576 
4577     /* Note that otype != itype for no-op truncation.  */
4578     otype = ots->type;
4579     itype = ts->type;
4580 
4581     if (ts->val_type == TEMP_VAL_CONST) {
4582         /* propagate constant or generate sti */
4583         tcg_target_ulong val = ts->val;
4584         if (IS_DEAD_ARG(1)) {
4585             temp_dead(s, ts);
4586         }
4587         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4588         return;
4589     }
4590 
4591     /* If the source value is in memory we're going to be forced
4592        to have it in a register in order to perform the copy.  Copy
4593        the SOURCE value into its own register first, that way we
4594        don't have to reload SOURCE the next time it is used. */
4595     if (ts->val_type == TEMP_VAL_MEM) {
4596         temp_load(s, ts, tcg_target_available_regs[itype],
4597                   allocated_regs, preferred_regs);
4598     }
4599     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4600     ireg = ts->reg;
4601 
4602     if (IS_DEAD_ARG(0)) {
4603         /* mov to a non-saved dead register makes no sense (even with
4604            liveness analysis disabled). */
4605         tcg_debug_assert(NEED_SYNC_ARG(0));
4606         if (!ots->mem_allocated) {
4607             temp_allocate_frame(s, ots);
4608         }
4609         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4610         if (IS_DEAD_ARG(1)) {
4611             temp_dead(s, ts);
4612         }
4613         temp_dead(s, ots);
4614         return;
4615     }
4616 
4617     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4618         /*
4619          * The mov can be suppressed.  Kill input first, so that it
4620          * is unlinked from reg_to_temp, then set the output to the
4621          * reg that we saved from the input.
4622          */
4623         temp_dead(s, ts);
4624         oreg = ireg;
4625     } else {
4626         if (ots->val_type == TEMP_VAL_REG) {
4627             oreg = ots->reg;
4628         } else {
4629             /* Make sure to not spill the input register during allocation. */
4630             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4631                                  allocated_regs | ((TCGRegSet)1 << ireg),
4632                                  preferred_regs, ots->indirect_base);
4633         }
4634         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4635             /*
4636              * Cross register class move not supported.
4637              * Store the source register into the destination slot
4638              * and leave the destination temp as TEMP_VAL_MEM.
4639              */
4640             assert(!temp_readonly(ots));
4641             if (!ts->mem_allocated) {
4642                 temp_allocate_frame(s, ots);
4643             }
4644             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4645             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4646             ots->mem_coherent = 1;
4647             return;
4648         }
4649     }
4650     set_temp_val_reg(s, ots, oreg);
4651     ots->mem_coherent = 0;
4652 
4653     if (NEED_SYNC_ARG(0)) {
4654         temp_sync(s, ots, allocated_regs, 0, 0);
4655     }
4656 }
4657 
4658 /*
4659  * Specialized code generation for INDEX_op_dup_vec.
4660  */
4661 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4662 {
4663     const TCGLifeData arg_life = op->life;
4664     TCGRegSet dup_out_regs, dup_in_regs;
4665     TCGTemp *its, *ots;
4666     TCGType itype, vtype;
4667     unsigned vece;
4668     int lowpart_ofs;
4669     bool ok;
4670 
4671     ots = arg_temp(op->args[0]);
4672     its = arg_temp(op->args[1]);
4673 
4674     /* ENV should not be modified.  */
4675     tcg_debug_assert(!temp_readonly(ots));
4676 
4677     itype = its->type;
4678     vece = TCGOP_VECE(op);
4679     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4680 
4681     if (its->val_type == TEMP_VAL_CONST) {
4682         /* Propagate constant via movi -> dupi.  */
4683         tcg_target_ulong val = its->val;
4684         if (IS_DEAD_ARG(1)) {
4685             temp_dead(s, its);
4686         }
4687         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4688         return;
4689     }
4690 
4691     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4692     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4693 
4694     /* Allocate the output register now.  */
4695     if (ots->val_type != TEMP_VAL_REG) {
4696         TCGRegSet allocated_regs = s->reserved_regs;
4697         TCGReg oreg;
4698 
4699         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4700             /* Make sure to not spill the input register. */
4701             tcg_regset_set_reg(allocated_regs, its->reg);
4702         }
4703         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4704                              output_pref(op, 0), ots->indirect_base);
4705         set_temp_val_reg(s, ots, oreg);
4706     }
4707 
4708     switch (its->val_type) {
4709     case TEMP_VAL_REG:
4710         /*
4711          * The dup constriaints must be broad, covering all possible VECE.
4712          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4713          * to fail, indicating that extra moves are required for that case.
4714          */
4715         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4716             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4717                 goto done;
4718             }
4719             /* Try again from memory or a vector input register.  */
4720         }
4721         if (!its->mem_coherent) {
4722             /*
4723              * The input register is not synced, and so an extra store
4724              * would be required to use memory.  Attempt an integer-vector
4725              * register move first.  We do not have a TCGRegSet for this.
4726              */
4727             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4728                 break;
4729             }
4730             /* Sync the temp back to its slot and load from there.  */
4731             temp_sync(s, its, s->reserved_regs, 0, 0);
4732         }
4733         /* fall through */
4734 
4735     case TEMP_VAL_MEM:
4736         lowpart_ofs = 0;
4737         if (HOST_BIG_ENDIAN) {
4738             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4739         }
4740         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4741                              its->mem_offset + lowpart_ofs)) {
4742             goto done;
4743         }
4744         /* Load the input into the destination vector register. */
4745         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4746         break;
4747 
4748     default:
4749         g_assert_not_reached();
4750     }
4751 
4752     /* We now have a vector input register, so dup must succeed. */
4753     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4754     tcg_debug_assert(ok);
4755 
4756  done:
4757     ots->mem_coherent = 0;
4758     if (IS_DEAD_ARG(1)) {
4759         temp_dead(s, its);
4760     }
4761     if (NEED_SYNC_ARG(0)) {
4762         temp_sync(s, ots, s->reserved_regs, 0, 0);
4763     }
4764     if (IS_DEAD_ARG(0)) {
4765         temp_dead(s, ots);
4766     }
4767 }
4768 
4769 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4770 {
4771     const TCGLifeData arg_life = op->life;
4772     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4773     TCGRegSet i_allocated_regs;
4774     TCGRegSet o_allocated_regs;
4775     int i, k, nb_iargs, nb_oargs;
4776     TCGReg reg;
4777     TCGArg arg;
4778     const TCGArgConstraint *arg_ct;
4779     TCGTemp *ts;
4780     TCGArg new_args[TCG_MAX_OP_ARGS];
4781     int const_args[TCG_MAX_OP_ARGS];
4782 
4783     nb_oargs = def->nb_oargs;
4784     nb_iargs = def->nb_iargs;
4785 
4786     /* copy constants */
4787     memcpy(new_args + nb_oargs + nb_iargs,
4788            op->args + nb_oargs + nb_iargs,
4789            sizeof(TCGArg) * def->nb_cargs);
4790 
4791     i_allocated_regs = s->reserved_regs;
4792     o_allocated_regs = s->reserved_regs;
4793 
4794     /* satisfy input constraints */
4795     for (k = 0; k < nb_iargs; k++) {
4796         TCGRegSet i_preferred_regs, i_required_regs;
4797         bool allocate_new_reg, copyto_new_reg;
4798         TCGTemp *ts2;
4799         int i1, i2;
4800 
4801         i = def->args_ct[nb_oargs + k].sort_index;
4802         arg = op->args[i];
4803         arg_ct = &def->args_ct[i];
4804         ts = arg_temp(arg);
4805 
4806         if (ts->val_type == TEMP_VAL_CONST
4807             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct, TCGOP_VECE(op))) {
4808             /* constant is OK for instruction */
4809             const_args[i] = 1;
4810             new_args[i] = ts->val;
4811             continue;
4812         }
4813 
4814         reg = ts->reg;
4815         i_preferred_regs = 0;
4816         i_required_regs = arg_ct->regs;
4817         allocate_new_reg = false;
4818         copyto_new_reg = false;
4819 
4820         switch (arg_ct->pair) {
4821         case 0: /* not paired */
4822             if (arg_ct->ialias) {
4823                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4824 
4825                 /*
4826                  * If the input is readonly, then it cannot also be an
4827                  * output and aliased to itself.  If the input is not
4828                  * dead after the instruction, we must allocate a new
4829                  * register and move it.
4830                  */
4831                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4832                     || def->args_ct[arg_ct->alias_index].newreg) {
4833                     allocate_new_reg = true;
4834                 } else if (ts->val_type == TEMP_VAL_REG) {
4835                     /*
4836                      * Check if the current register has already been
4837                      * allocated for another input.
4838                      */
4839                     allocate_new_reg =
4840                         tcg_regset_test_reg(i_allocated_regs, reg);
4841                 }
4842             }
4843             if (!allocate_new_reg) {
4844                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4845                           i_preferred_regs);
4846                 reg = ts->reg;
4847                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4848             }
4849             if (allocate_new_reg) {
4850                 /*
4851                  * Allocate a new register matching the constraint
4852                  * and move the temporary register into it.
4853                  */
4854                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4855                           i_allocated_regs, 0);
4856                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4857                                     i_preferred_regs, ts->indirect_base);
4858                 copyto_new_reg = true;
4859             }
4860             break;
4861 
4862         case 1:
4863             /* First of an input pair; if i1 == i2, the second is an output. */
4864             i1 = i;
4865             i2 = arg_ct->pair_index;
4866             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4867 
4868             /*
4869              * It is easier to default to allocating a new pair
4870              * and to identify a few cases where it's not required.
4871              */
4872             if (arg_ct->ialias) {
4873                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4874                 if (IS_DEAD_ARG(i1) &&
4875                     IS_DEAD_ARG(i2) &&
4876                     !temp_readonly(ts) &&
4877                     ts->val_type == TEMP_VAL_REG &&
4878                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4879                     tcg_regset_test_reg(i_required_regs, reg) &&
4880                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4881                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4882                     (ts2
4883                      ? ts2->val_type == TEMP_VAL_REG &&
4884                        ts2->reg == reg + 1 &&
4885                        !temp_readonly(ts2)
4886                      : s->reg_to_temp[reg + 1] == NULL)) {
4887                     break;
4888                 }
4889             } else {
4890                 /* Without aliasing, the pair must also be an input. */
4891                 tcg_debug_assert(ts2);
4892                 if (ts->val_type == TEMP_VAL_REG &&
4893                     ts2->val_type == TEMP_VAL_REG &&
4894                     ts2->reg == reg + 1 &&
4895                     tcg_regset_test_reg(i_required_regs, reg)) {
4896                     break;
4897                 }
4898             }
4899             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4900                                      0, ts->indirect_base);
4901             goto do_pair;
4902 
4903         case 2: /* pair second */
4904             reg = new_args[arg_ct->pair_index] + 1;
4905             goto do_pair;
4906 
4907         case 3: /* ialias with second output, no first input */
4908             tcg_debug_assert(arg_ct->ialias);
4909             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4910 
4911             if (IS_DEAD_ARG(i) &&
4912                 !temp_readonly(ts) &&
4913                 ts->val_type == TEMP_VAL_REG &&
4914                 reg > 0 &&
4915                 s->reg_to_temp[reg - 1] == NULL &&
4916                 tcg_regset_test_reg(i_required_regs, reg) &&
4917                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4918                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4919                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4920                 break;
4921             }
4922             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4923                                      i_allocated_regs, 0,
4924                                      ts->indirect_base);
4925             tcg_regset_set_reg(i_allocated_regs, reg);
4926             reg += 1;
4927             goto do_pair;
4928 
4929         do_pair:
4930             /*
4931              * If an aliased input is not dead after the instruction,
4932              * we must allocate a new register and move it.
4933              */
4934             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4935                 TCGRegSet t_allocated_regs = i_allocated_regs;
4936 
4937                 /*
4938                  * Because of the alias, and the continued life, make sure
4939                  * that the temp is somewhere *other* than the reg pair,
4940                  * and we get a copy in reg.
4941                  */
4942                 tcg_regset_set_reg(t_allocated_regs, reg);
4943                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4944                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4945                     /* If ts was already in reg, copy it somewhere else. */
4946                     TCGReg nr;
4947                     bool ok;
4948 
4949                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4950                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4951                                        t_allocated_regs, 0, ts->indirect_base);
4952                     ok = tcg_out_mov(s, ts->type, nr, reg);
4953                     tcg_debug_assert(ok);
4954 
4955                     set_temp_val_reg(s, ts, nr);
4956                 } else {
4957                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4958                               t_allocated_regs, 0);
4959                     copyto_new_reg = true;
4960                 }
4961             } else {
4962                 /* Preferably allocate to reg, otherwise copy. */
4963                 i_required_regs = (TCGRegSet)1 << reg;
4964                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4965                           i_preferred_regs);
4966                 copyto_new_reg = ts->reg != reg;
4967             }
4968             break;
4969 
4970         default:
4971             g_assert_not_reached();
4972         }
4973 
4974         if (copyto_new_reg) {
4975             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4976                 /*
4977                  * Cross register class move not supported.  Sync the
4978                  * temp back to its slot and load from there.
4979                  */
4980                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4981                 tcg_out_ld(s, ts->type, reg,
4982                            ts->mem_base->reg, ts->mem_offset);
4983             }
4984         }
4985         new_args[i] = reg;
4986         const_args[i] = 0;
4987         tcg_regset_set_reg(i_allocated_regs, reg);
4988     }
4989 
4990     /* mark dead temporaries and free the associated registers */
4991     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4992         if (IS_DEAD_ARG(i)) {
4993             temp_dead(s, arg_temp(op->args[i]));
4994         }
4995     }
4996 
4997     if (def->flags & TCG_OPF_COND_BRANCH) {
4998         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4999     } else if (def->flags & TCG_OPF_BB_END) {
5000         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5001     } else {
5002         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5003             /* XXX: permit generic clobber register list ? */
5004             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5005                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5006                     tcg_reg_free(s, i, i_allocated_regs);
5007                 }
5008             }
5009         }
5010         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5011             /* sync globals if the op has side effects and might trigger
5012                an exception. */
5013             sync_globals(s, i_allocated_regs);
5014         }
5015 
5016         /* satisfy the output constraints */
5017         for(k = 0; k < nb_oargs; k++) {
5018             i = def->args_ct[k].sort_index;
5019             arg = op->args[i];
5020             arg_ct = &def->args_ct[i];
5021             ts = arg_temp(arg);
5022 
5023             /* ENV should not be modified.  */
5024             tcg_debug_assert(!temp_readonly(ts));
5025 
5026             switch (arg_ct->pair) {
5027             case 0: /* not paired */
5028                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5029                     reg = new_args[arg_ct->alias_index];
5030                 } else if (arg_ct->newreg) {
5031                     reg = tcg_reg_alloc(s, arg_ct->regs,
5032                                         i_allocated_regs | o_allocated_regs,
5033                                         output_pref(op, k), ts->indirect_base);
5034                 } else {
5035                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5036                                         output_pref(op, k), ts->indirect_base);
5037                 }
5038                 break;
5039 
5040             case 1: /* first of pair */
5041                 tcg_debug_assert(!arg_ct->newreg);
5042                 if (arg_ct->oalias) {
5043                     reg = new_args[arg_ct->alias_index];
5044                     break;
5045                 }
5046                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5047                                          output_pref(op, k), ts->indirect_base);
5048                 break;
5049 
5050             case 2: /* second of pair */
5051                 tcg_debug_assert(!arg_ct->newreg);
5052                 if (arg_ct->oalias) {
5053                     reg = new_args[arg_ct->alias_index];
5054                 } else {
5055                     reg = new_args[arg_ct->pair_index] + 1;
5056                 }
5057                 break;
5058 
5059             case 3: /* first of pair, aliasing with a second input */
5060                 tcg_debug_assert(!arg_ct->newreg);
5061                 reg = new_args[arg_ct->pair_index] - 1;
5062                 break;
5063 
5064             default:
5065                 g_assert_not_reached();
5066             }
5067             tcg_regset_set_reg(o_allocated_regs, reg);
5068             set_temp_val_reg(s, ts, reg);
5069             ts->mem_coherent = 0;
5070             new_args[i] = reg;
5071         }
5072     }
5073 
5074     /* emit instruction */
5075     switch (op->opc) {
5076     case INDEX_op_ext8s_i32:
5077         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5078         break;
5079     case INDEX_op_ext8s_i64:
5080         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5081         break;
5082     case INDEX_op_ext8u_i32:
5083     case INDEX_op_ext8u_i64:
5084         tcg_out_ext8u(s, new_args[0], new_args[1]);
5085         break;
5086     case INDEX_op_ext16s_i32:
5087         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5088         break;
5089     case INDEX_op_ext16s_i64:
5090         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5091         break;
5092     case INDEX_op_ext16u_i32:
5093     case INDEX_op_ext16u_i64:
5094         tcg_out_ext16u(s, new_args[0], new_args[1]);
5095         break;
5096     case INDEX_op_ext32s_i64:
5097         tcg_out_ext32s(s, new_args[0], new_args[1]);
5098         break;
5099     case INDEX_op_ext32u_i64:
5100         tcg_out_ext32u(s, new_args[0], new_args[1]);
5101         break;
5102     case INDEX_op_ext_i32_i64:
5103         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5104         break;
5105     case INDEX_op_extu_i32_i64:
5106         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5107         break;
5108     case INDEX_op_extrl_i64_i32:
5109         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5110         break;
5111     default:
5112         if (def->flags & TCG_OPF_VECTOR) {
5113             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
5114                            new_args, const_args);
5115         } else {
5116             tcg_out_op(s, op->opc, new_args, const_args);
5117         }
5118         break;
5119     }
5120 
5121     /* move the outputs in the correct register if needed */
5122     for(i = 0; i < nb_oargs; i++) {
5123         ts = arg_temp(op->args[i]);
5124 
5125         /* ENV should not be modified.  */
5126         tcg_debug_assert(!temp_readonly(ts));
5127 
5128         if (NEED_SYNC_ARG(i)) {
5129             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5130         } else if (IS_DEAD_ARG(i)) {
5131             temp_dead(s, ts);
5132         }
5133     }
5134 }
5135 
5136 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5137 {
5138     const TCGLifeData arg_life = op->life;
5139     TCGTemp *ots, *itsl, *itsh;
5140     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5141 
5142     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5143     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5144     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5145 
5146     ots = arg_temp(op->args[0]);
5147     itsl = arg_temp(op->args[1]);
5148     itsh = arg_temp(op->args[2]);
5149 
5150     /* ENV should not be modified.  */
5151     tcg_debug_assert(!temp_readonly(ots));
5152 
5153     /* Allocate the output register now.  */
5154     if (ots->val_type != TEMP_VAL_REG) {
5155         TCGRegSet allocated_regs = s->reserved_regs;
5156         TCGRegSet dup_out_regs =
5157             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5158         TCGReg oreg;
5159 
5160         /* Make sure to not spill the input registers. */
5161         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5162             tcg_regset_set_reg(allocated_regs, itsl->reg);
5163         }
5164         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5165             tcg_regset_set_reg(allocated_regs, itsh->reg);
5166         }
5167 
5168         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5169                              output_pref(op, 0), ots->indirect_base);
5170         set_temp_val_reg(s, ots, oreg);
5171     }
5172 
5173     /* Promote dup2 of immediates to dupi_vec. */
5174     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5175         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5176         MemOp vece = MO_64;
5177 
5178         if (val == dup_const(MO_8, val)) {
5179             vece = MO_8;
5180         } else if (val == dup_const(MO_16, val)) {
5181             vece = MO_16;
5182         } else if (val == dup_const(MO_32, val)) {
5183             vece = MO_32;
5184         }
5185 
5186         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5187         goto done;
5188     }
5189 
5190     /* If the two inputs form one 64-bit value, try dupm_vec. */
5191     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5192         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5193         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5194         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5195 
5196         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5197         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5198 
5199         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5200                              its->mem_base->reg, its->mem_offset)) {
5201             goto done;
5202         }
5203     }
5204 
5205     /* Fall back to generic expansion. */
5206     return false;
5207 
5208  done:
5209     ots->mem_coherent = 0;
5210     if (IS_DEAD_ARG(1)) {
5211         temp_dead(s, itsl);
5212     }
5213     if (IS_DEAD_ARG(2)) {
5214         temp_dead(s, itsh);
5215     }
5216     if (NEED_SYNC_ARG(0)) {
5217         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5218     } else if (IS_DEAD_ARG(0)) {
5219         temp_dead(s, ots);
5220     }
5221     return true;
5222 }
5223 
5224 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5225                          TCGRegSet allocated_regs)
5226 {
5227     if (ts->val_type == TEMP_VAL_REG) {
5228         if (ts->reg != reg) {
5229             tcg_reg_free(s, reg, allocated_regs);
5230             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5231                 /*
5232                  * Cross register class move not supported.  Sync the
5233                  * temp back to its slot and load from there.
5234                  */
5235                 temp_sync(s, ts, allocated_regs, 0, 0);
5236                 tcg_out_ld(s, ts->type, reg,
5237                            ts->mem_base->reg, ts->mem_offset);
5238             }
5239         }
5240     } else {
5241         TCGRegSet arg_set = 0;
5242 
5243         tcg_reg_free(s, reg, allocated_regs);
5244         tcg_regset_set_reg(arg_set, reg);
5245         temp_load(s, ts, arg_set, allocated_regs, 0);
5246     }
5247 }
5248 
5249 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5250                          TCGRegSet allocated_regs)
5251 {
5252     /*
5253      * When the destination is on the stack, load up the temp and store.
5254      * If there are many call-saved registers, the temp might live to
5255      * see another use; otherwise it'll be discarded.
5256      */
5257     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5258     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5259                arg_slot_stk_ofs(arg_slot));
5260 }
5261 
5262 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5263                             TCGTemp *ts, TCGRegSet *allocated_regs)
5264 {
5265     if (arg_slot_reg_p(l->arg_slot)) {
5266         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5267         load_arg_reg(s, reg, ts, *allocated_regs);
5268         tcg_regset_set_reg(*allocated_regs, reg);
5269     } else {
5270         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5271     }
5272 }
5273 
5274 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5275                          intptr_t ref_off, TCGRegSet *allocated_regs)
5276 {
5277     TCGReg reg;
5278 
5279     if (arg_slot_reg_p(arg_slot)) {
5280         reg = tcg_target_call_iarg_regs[arg_slot];
5281         tcg_reg_free(s, reg, *allocated_regs);
5282         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5283         tcg_regset_set_reg(*allocated_regs, reg);
5284     } else {
5285         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5286                             *allocated_regs, 0, false);
5287         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5288         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5289                    arg_slot_stk_ofs(arg_slot));
5290     }
5291 }
5292 
5293 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5294 {
5295     const int nb_oargs = TCGOP_CALLO(op);
5296     const int nb_iargs = TCGOP_CALLI(op);
5297     const TCGLifeData arg_life = op->life;
5298     const TCGHelperInfo *info = tcg_call_info(op);
5299     TCGRegSet allocated_regs = s->reserved_regs;
5300     int i;
5301 
5302     /*
5303      * Move inputs into place in reverse order,
5304      * so that we place stacked arguments first.
5305      */
5306     for (i = nb_iargs - 1; i >= 0; --i) {
5307         const TCGCallArgumentLoc *loc = &info->in[i];
5308         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5309 
5310         switch (loc->kind) {
5311         case TCG_CALL_ARG_NORMAL:
5312         case TCG_CALL_ARG_EXTEND_U:
5313         case TCG_CALL_ARG_EXTEND_S:
5314             load_arg_normal(s, loc, ts, &allocated_regs);
5315             break;
5316         case TCG_CALL_ARG_BY_REF:
5317             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5318             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5319                          arg_slot_stk_ofs(loc->ref_slot),
5320                          &allocated_regs);
5321             break;
5322         case TCG_CALL_ARG_BY_REF_N:
5323             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5324             break;
5325         default:
5326             g_assert_not_reached();
5327         }
5328     }
5329 
5330     /* Mark dead temporaries and free the associated registers.  */
5331     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5332         if (IS_DEAD_ARG(i)) {
5333             temp_dead(s, arg_temp(op->args[i]));
5334         }
5335     }
5336 
5337     /* Clobber call registers.  */
5338     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5339         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5340             tcg_reg_free(s, i, allocated_regs);
5341         }
5342     }
5343 
5344     /*
5345      * Save globals if they might be written by the helper,
5346      * sync them if they might be read.
5347      */
5348     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5349         /* Nothing to do */
5350     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5351         sync_globals(s, allocated_regs);
5352     } else {
5353         save_globals(s, allocated_regs);
5354     }
5355 
5356     /*
5357      * If the ABI passes a pointer to the returned struct as the first
5358      * argument, load that now.  Pass a pointer to the output home slot.
5359      */
5360     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5361         TCGTemp *ts = arg_temp(op->args[0]);
5362 
5363         if (!ts->mem_allocated) {
5364             temp_allocate_frame(s, ts);
5365         }
5366         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5367     }
5368 
5369     tcg_out_call(s, tcg_call_func(op), info);
5370 
5371     /* Assign output registers and emit moves if needed.  */
5372     switch (info->out_kind) {
5373     case TCG_CALL_RET_NORMAL:
5374         for (i = 0; i < nb_oargs; i++) {
5375             TCGTemp *ts = arg_temp(op->args[i]);
5376             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5377 
5378             /* ENV should not be modified.  */
5379             tcg_debug_assert(!temp_readonly(ts));
5380 
5381             set_temp_val_reg(s, ts, reg);
5382             ts->mem_coherent = 0;
5383         }
5384         break;
5385 
5386     case TCG_CALL_RET_BY_VEC:
5387         {
5388             TCGTemp *ts = arg_temp(op->args[0]);
5389 
5390             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5391             tcg_debug_assert(ts->temp_subindex == 0);
5392             if (!ts->mem_allocated) {
5393                 temp_allocate_frame(s, ts);
5394             }
5395             tcg_out_st(s, TCG_TYPE_V128,
5396                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5397                        ts->mem_base->reg, ts->mem_offset);
5398         }
5399         /* fall through to mark all parts in memory */
5400 
5401     case TCG_CALL_RET_BY_REF:
5402         /* The callee has performed a write through the reference. */
5403         for (i = 0; i < nb_oargs; i++) {
5404             TCGTemp *ts = arg_temp(op->args[i]);
5405             ts->val_type = TEMP_VAL_MEM;
5406         }
5407         break;
5408 
5409     default:
5410         g_assert_not_reached();
5411     }
5412 
5413     /* Flush or discard output registers as needed. */
5414     for (i = 0; i < nb_oargs; i++) {
5415         TCGTemp *ts = arg_temp(op->args[i]);
5416         if (NEED_SYNC_ARG(i)) {
5417             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5418         } else if (IS_DEAD_ARG(i)) {
5419             temp_dead(s, ts);
5420         }
5421     }
5422 }
5423 
5424 /**
5425  * atom_and_align_for_opc:
5426  * @s: tcg context
5427  * @opc: memory operation code
5428  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5429  * @allow_two_ops: true if we are prepared to issue two operations
5430  *
5431  * Return the alignment and atomicity to use for the inline fast path
5432  * for the given memory operation.  The alignment may be larger than
5433  * that specified in @opc, and the correct alignment will be diagnosed
5434  * by the slow path helper.
5435  *
5436  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5437  * and issue two loads or stores for subalignment.
5438  */
5439 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5440                                            MemOp host_atom, bool allow_two_ops)
5441 {
5442     MemOp align = get_alignment_bits(opc);
5443     MemOp size = opc & MO_SIZE;
5444     MemOp half = size ? size - 1 : 0;
5445     MemOp atmax;
5446     MemOp atom;
5447 
5448     /* When serialized, no further atomicity required.  */
5449     if (s->gen_tb->cflags & CF_PARALLEL) {
5450         atom = opc & MO_ATOM_MASK;
5451     } else {
5452         atom = MO_ATOM_NONE;
5453     }
5454 
5455     switch (atom) {
5456     case MO_ATOM_NONE:
5457         /* The operation requires no specific atomicity. */
5458         atmax = MO_8;
5459         break;
5460 
5461     case MO_ATOM_IFALIGN:
5462         atmax = size;
5463         break;
5464 
5465     case MO_ATOM_IFALIGN_PAIR:
5466         atmax = half;
5467         break;
5468 
5469     case MO_ATOM_WITHIN16:
5470         atmax = size;
5471         if (size == MO_128) {
5472             /* Misalignment implies !within16, and therefore no atomicity. */
5473         } else if (host_atom != MO_ATOM_WITHIN16) {
5474             /* The host does not implement within16, so require alignment. */
5475             align = MAX(align, size);
5476         }
5477         break;
5478 
5479     case MO_ATOM_WITHIN16_PAIR:
5480         atmax = size;
5481         /*
5482          * Misalignment implies !within16, and therefore half atomicity.
5483          * Any host prepared for two operations can implement this with
5484          * half alignment.
5485          */
5486         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5487             align = MAX(align, half);
5488         }
5489         break;
5490 
5491     case MO_ATOM_SUBALIGN:
5492         atmax = size;
5493         if (host_atom != MO_ATOM_SUBALIGN) {
5494             /* If unaligned but not odd, there are subobjects up to half. */
5495             if (allow_two_ops) {
5496                 align = MAX(align, half);
5497             } else {
5498                 align = MAX(align, size);
5499             }
5500         }
5501         break;
5502 
5503     default:
5504         g_assert_not_reached();
5505     }
5506 
5507     return (TCGAtomAlign){ .atom = atmax, .align = align };
5508 }
5509 
5510 /*
5511  * Similarly for qemu_ld/st slow path helpers.
5512  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5513  * using only the provided backend tcg_out_* functions.
5514  */
5515 
5516 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5517 {
5518     int ofs = arg_slot_stk_ofs(slot);
5519 
5520     /*
5521      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5522      * require extension to uint64_t, adjust the address for uint32_t.
5523      */
5524     if (HOST_BIG_ENDIAN &&
5525         TCG_TARGET_REG_BITS == 64 &&
5526         type == TCG_TYPE_I32) {
5527         ofs += 4;
5528     }
5529     return ofs;
5530 }
5531 
5532 static void tcg_out_helper_load_slots(TCGContext *s,
5533                                       unsigned nmov, TCGMovExtend *mov,
5534                                       const TCGLdstHelperParam *parm)
5535 {
5536     unsigned i;
5537     TCGReg dst3;
5538 
5539     /*
5540      * Start from the end, storing to the stack first.
5541      * This frees those registers, so we need not consider overlap.
5542      */
5543     for (i = nmov; i-- > 0; ) {
5544         unsigned slot = mov[i].dst;
5545 
5546         if (arg_slot_reg_p(slot)) {
5547             goto found_reg;
5548         }
5549 
5550         TCGReg src = mov[i].src;
5551         TCGType dst_type = mov[i].dst_type;
5552         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5553 
5554         /* The argument is going onto the stack; extend into scratch. */
5555         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5556             tcg_debug_assert(parm->ntmp != 0);
5557             mov[i].dst = src = parm->tmp[0];
5558             tcg_out_movext1(s, &mov[i]);
5559         }
5560 
5561         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5562                    tcg_out_helper_stk_ofs(dst_type, slot));
5563     }
5564     return;
5565 
5566  found_reg:
5567     /*
5568      * The remaining arguments are in registers.
5569      * Convert slot numbers to argument registers.
5570      */
5571     nmov = i + 1;
5572     for (i = 0; i < nmov; ++i) {
5573         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5574     }
5575 
5576     switch (nmov) {
5577     case 4:
5578         /* The backend must have provided enough temps for the worst case. */
5579         tcg_debug_assert(parm->ntmp >= 2);
5580 
5581         dst3 = mov[3].dst;
5582         for (unsigned j = 0; j < 3; ++j) {
5583             if (dst3 == mov[j].src) {
5584                 /*
5585                  * Conflict. Copy the source to a temporary, perform the
5586                  * remaining moves, then the extension from our scratch
5587                  * on the way out.
5588                  */
5589                 TCGReg scratch = parm->tmp[1];
5590 
5591                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5592                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5593                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5594                 break;
5595             }
5596         }
5597 
5598         /* No conflicts: perform this move and continue. */
5599         tcg_out_movext1(s, &mov[3]);
5600         /* fall through */
5601 
5602     case 3:
5603         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5604                         parm->ntmp ? parm->tmp[0] : -1);
5605         break;
5606     case 2:
5607         tcg_out_movext2(s, mov, mov + 1,
5608                         parm->ntmp ? parm->tmp[0] : -1);
5609         break;
5610     case 1:
5611         tcg_out_movext1(s, mov);
5612         break;
5613     default:
5614         g_assert_not_reached();
5615     }
5616 }
5617 
5618 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5619                                     TCGType type, tcg_target_long imm,
5620                                     const TCGLdstHelperParam *parm)
5621 {
5622     if (arg_slot_reg_p(slot)) {
5623         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5624     } else {
5625         int ofs = tcg_out_helper_stk_ofs(type, slot);
5626         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5627             tcg_debug_assert(parm->ntmp != 0);
5628             tcg_out_movi(s, type, parm->tmp[0], imm);
5629             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5630         }
5631     }
5632 }
5633 
5634 static void tcg_out_helper_load_common_args(TCGContext *s,
5635                                             const TCGLabelQemuLdst *ldst,
5636                                             const TCGLdstHelperParam *parm,
5637                                             const TCGHelperInfo *info,
5638                                             unsigned next_arg)
5639 {
5640     TCGMovExtend ptr_mov = {
5641         .dst_type = TCG_TYPE_PTR,
5642         .src_type = TCG_TYPE_PTR,
5643         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5644     };
5645     const TCGCallArgumentLoc *loc = &info->in[0];
5646     TCGType type;
5647     unsigned slot;
5648     tcg_target_ulong imm;
5649 
5650     /*
5651      * Handle env, which is always first.
5652      */
5653     ptr_mov.dst = loc->arg_slot;
5654     ptr_mov.src = TCG_AREG0;
5655     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5656 
5657     /*
5658      * Handle oi.
5659      */
5660     imm = ldst->oi;
5661     loc = &info->in[next_arg];
5662     type = TCG_TYPE_I32;
5663     switch (loc->kind) {
5664     case TCG_CALL_ARG_NORMAL:
5665         break;
5666     case TCG_CALL_ARG_EXTEND_U:
5667     case TCG_CALL_ARG_EXTEND_S:
5668         /* No extension required for MemOpIdx. */
5669         tcg_debug_assert(imm <= INT32_MAX);
5670         type = TCG_TYPE_REG;
5671         break;
5672     default:
5673         g_assert_not_reached();
5674     }
5675     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5676     next_arg++;
5677 
5678     /*
5679      * Handle ra.
5680      */
5681     loc = &info->in[next_arg];
5682     slot = loc->arg_slot;
5683     if (parm->ra_gen) {
5684         int arg_reg = -1;
5685         TCGReg ra_reg;
5686 
5687         if (arg_slot_reg_p(slot)) {
5688             arg_reg = tcg_target_call_iarg_regs[slot];
5689         }
5690         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5691 
5692         ptr_mov.dst = slot;
5693         ptr_mov.src = ra_reg;
5694         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5695     } else {
5696         imm = (uintptr_t)ldst->raddr;
5697         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5698     }
5699 }
5700 
5701 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5702                                        const TCGCallArgumentLoc *loc,
5703                                        TCGType dst_type, TCGType src_type,
5704                                        TCGReg lo, TCGReg hi)
5705 {
5706     MemOp reg_mo;
5707 
5708     if (dst_type <= TCG_TYPE_REG) {
5709         MemOp src_ext;
5710 
5711         switch (loc->kind) {
5712         case TCG_CALL_ARG_NORMAL:
5713             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5714             break;
5715         case TCG_CALL_ARG_EXTEND_U:
5716             dst_type = TCG_TYPE_REG;
5717             src_ext = MO_UL;
5718             break;
5719         case TCG_CALL_ARG_EXTEND_S:
5720             dst_type = TCG_TYPE_REG;
5721             src_ext = MO_SL;
5722             break;
5723         default:
5724             g_assert_not_reached();
5725         }
5726 
5727         mov[0].dst = loc->arg_slot;
5728         mov[0].dst_type = dst_type;
5729         mov[0].src = lo;
5730         mov[0].src_type = src_type;
5731         mov[0].src_ext = src_ext;
5732         return 1;
5733     }
5734 
5735     if (TCG_TARGET_REG_BITS == 32) {
5736         assert(dst_type == TCG_TYPE_I64);
5737         reg_mo = MO_32;
5738     } else {
5739         assert(dst_type == TCG_TYPE_I128);
5740         reg_mo = MO_64;
5741     }
5742 
5743     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5744     mov[0].src = lo;
5745     mov[0].dst_type = TCG_TYPE_REG;
5746     mov[0].src_type = TCG_TYPE_REG;
5747     mov[0].src_ext = reg_mo;
5748 
5749     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5750     mov[1].src = hi;
5751     mov[1].dst_type = TCG_TYPE_REG;
5752     mov[1].src_type = TCG_TYPE_REG;
5753     mov[1].src_ext = reg_mo;
5754 
5755     return 2;
5756 }
5757 
5758 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5759                                    const TCGLdstHelperParam *parm)
5760 {
5761     const TCGHelperInfo *info;
5762     const TCGCallArgumentLoc *loc;
5763     TCGMovExtend mov[2];
5764     unsigned next_arg, nmov;
5765     MemOp mop = get_memop(ldst->oi);
5766 
5767     switch (mop & MO_SIZE) {
5768     case MO_8:
5769     case MO_16:
5770     case MO_32:
5771         info = &info_helper_ld32_mmu;
5772         break;
5773     case MO_64:
5774         info = &info_helper_ld64_mmu;
5775         break;
5776     case MO_128:
5777         info = &info_helper_ld128_mmu;
5778         break;
5779     default:
5780         g_assert_not_reached();
5781     }
5782 
5783     /* Defer env argument. */
5784     next_arg = 1;
5785 
5786     loc = &info->in[next_arg];
5787     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5788         /*
5789          * 32-bit host with 32-bit guest: zero-extend the guest address
5790          * to 64-bits for the helper by storing the low part, then
5791          * load a zero for the high part.
5792          */
5793         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5794                                TCG_TYPE_I32, TCG_TYPE_I32,
5795                                ldst->addrlo_reg, -1);
5796         tcg_out_helper_load_slots(s, 1, mov, parm);
5797 
5798         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5799                                 TCG_TYPE_I32, 0, parm);
5800         next_arg += 2;
5801     } else {
5802         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5803                                       ldst->addrlo_reg, ldst->addrhi_reg);
5804         tcg_out_helper_load_slots(s, nmov, mov, parm);
5805         next_arg += nmov;
5806     }
5807 
5808     switch (info->out_kind) {
5809     case TCG_CALL_RET_NORMAL:
5810     case TCG_CALL_RET_BY_VEC:
5811         break;
5812     case TCG_CALL_RET_BY_REF:
5813         /*
5814          * The return reference is in the first argument slot.
5815          * We need memory in which to return: re-use the top of stack.
5816          */
5817         {
5818             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5819 
5820             if (arg_slot_reg_p(0)) {
5821                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5822                                  TCG_REG_CALL_STACK, ofs_slot0);
5823             } else {
5824                 tcg_debug_assert(parm->ntmp != 0);
5825                 tcg_out_addi_ptr(s, parm->tmp[0],
5826                                  TCG_REG_CALL_STACK, ofs_slot0);
5827                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5828                            TCG_REG_CALL_STACK, ofs_slot0);
5829             }
5830         }
5831         break;
5832     default:
5833         g_assert_not_reached();
5834     }
5835 
5836     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5837 }
5838 
5839 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5840                                   bool load_sign,
5841                                   const TCGLdstHelperParam *parm)
5842 {
5843     MemOp mop = get_memop(ldst->oi);
5844     TCGMovExtend mov[2];
5845     int ofs_slot0;
5846 
5847     switch (ldst->type) {
5848     case TCG_TYPE_I64:
5849         if (TCG_TARGET_REG_BITS == 32) {
5850             break;
5851         }
5852         /* fall through */
5853 
5854     case TCG_TYPE_I32:
5855         mov[0].dst = ldst->datalo_reg;
5856         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5857         mov[0].dst_type = ldst->type;
5858         mov[0].src_type = TCG_TYPE_REG;
5859 
5860         /*
5861          * If load_sign, then we allowed the helper to perform the
5862          * appropriate sign extension to tcg_target_ulong, and all
5863          * we need now is a plain move.
5864          *
5865          * If they do not, then we expect the relevant extension
5866          * instruction to be no more expensive than a move, and
5867          * we thus save the icache etc by only using one of two
5868          * helper functions.
5869          */
5870         if (load_sign || !(mop & MO_SIGN)) {
5871             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5872                 mov[0].src_ext = MO_32;
5873             } else {
5874                 mov[0].src_ext = MO_64;
5875             }
5876         } else {
5877             mov[0].src_ext = mop & MO_SSIZE;
5878         }
5879         tcg_out_movext1(s, mov);
5880         return;
5881 
5882     case TCG_TYPE_I128:
5883         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5884         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5885         switch (TCG_TARGET_CALL_RET_I128) {
5886         case TCG_CALL_RET_NORMAL:
5887             break;
5888         case TCG_CALL_RET_BY_VEC:
5889             tcg_out_st(s, TCG_TYPE_V128,
5890                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5891                        TCG_REG_CALL_STACK, ofs_slot0);
5892             /* fall through */
5893         case TCG_CALL_RET_BY_REF:
5894             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5895                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5896             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5897                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5898             return;
5899         default:
5900             g_assert_not_reached();
5901         }
5902         break;
5903 
5904     default:
5905         g_assert_not_reached();
5906     }
5907 
5908     mov[0].dst = ldst->datalo_reg;
5909     mov[0].src =
5910         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5911     mov[0].dst_type = TCG_TYPE_REG;
5912     mov[0].src_type = TCG_TYPE_REG;
5913     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5914 
5915     mov[1].dst = ldst->datahi_reg;
5916     mov[1].src =
5917         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5918     mov[1].dst_type = TCG_TYPE_REG;
5919     mov[1].src_type = TCG_TYPE_REG;
5920     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5921 
5922     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5923 }
5924 
5925 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5926                                    const TCGLdstHelperParam *parm)
5927 {
5928     const TCGHelperInfo *info;
5929     const TCGCallArgumentLoc *loc;
5930     TCGMovExtend mov[4];
5931     TCGType data_type;
5932     unsigned next_arg, nmov, n;
5933     MemOp mop = get_memop(ldst->oi);
5934 
5935     switch (mop & MO_SIZE) {
5936     case MO_8:
5937     case MO_16:
5938     case MO_32:
5939         info = &info_helper_st32_mmu;
5940         data_type = TCG_TYPE_I32;
5941         break;
5942     case MO_64:
5943         info = &info_helper_st64_mmu;
5944         data_type = TCG_TYPE_I64;
5945         break;
5946     case MO_128:
5947         info = &info_helper_st128_mmu;
5948         data_type = TCG_TYPE_I128;
5949         break;
5950     default:
5951         g_assert_not_reached();
5952     }
5953 
5954     /* Defer env argument. */
5955     next_arg = 1;
5956     nmov = 0;
5957 
5958     /* Handle addr argument. */
5959     loc = &info->in[next_arg];
5960     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5961         /*
5962          * 32-bit host with 32-bit guest: zero-extend the guest address
5963          * to 64-bits for the helper by storing the low part.  Later,
5964          * after we have processed the register inputs, we will load a
5965          * zero for the high part.
5966          */
5967         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5968                                TCG_TYPE_I32, TCG_TYPE_I32,
5969                                ldst->addrlo_reg, -1);
5970         next_arg += 2;
5971         nmov += 1;
5972     } else {
5973         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5974                                    ldst->addrlo_reg, ldst->addrhi_reg);
5975         next_arg += n;
5976         nmov += n;
5977     }
5978 
5979     /* Handle data argument. */
5980     loc = &info->in[next_arg];
5981     switch (loc->kind) {
5982     case TCG_CALL_ARG_NORMAL:
5983     case TCG_CALL_ARG_EXTEND_U:
5984     case TCG_CALL_ARG_EXTEND_S:
5985         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5986                                    ldst->datalo_reg, ldst->datahi_reg);
5987         next_arg += n;
5988         nmov += n;
5989         tcg_out_helper_load_slots(s, nmov, mov, parm);
5990         break;
5991 
5992     case TCG_CALL_ARG_BY_REF:
5993         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5994         tcg_debug_assert(data_type == TCG_TYPE_I128);
5995         tcg_out_st(s, TCG_TYPE_I64,
5996                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5997                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5998         tcg_out_st(s, TCG_TYPE_I64,
5999                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6000                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6001 
6002         tcg_out_helper_load_slots(s, nmov, mov, parm);
6003 
6004         if (arg_slot_reg_p(loc->arg_slot)) {
6005             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6006                              TCG_REG_CALL_STACK,
6007                              arg_slot_stk_ofs(loc->ref_slot));
6008         } else {
6009             tcg_debug_assert(parm->ntmp != 0);
6010             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6011                              arg_slot_stk_ofs(loc->ref_slot));
6012             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6013                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6014         }
6015         next_arg += 2;
6016         break;
6017 
6018     default:
6019         g_assert_not_reached();
6020     }
6021 
6022     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6023         /* Zero extend the address by loading a zero for the high part. */
6024         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6025         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6026     }
6027 
6028     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6029 }
6030 
6031 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6032 {
6033     int i, start_words, num_insns;
6034     TCGOp *op;
6035 
6036     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6037                  && qemu_log_in_addr_range(pc_start))) {
6038         FILE *logfile = qemu_log_trylock();
6039         if (logfile) {
6040             fprintf(logfile, "OP:\n");
6041             tcg_dump_ops(s, logfile, false);
6042             fprintf(logfile, "\n");
6043             qemu_log_unlock(logfile);
6044         }
6045     }
6046 
6047 #ifdef CONFIG_DEBUG_TCG
6048     /* Ensure all labels referenced have been emitted.  */
6049     {
6050         TCGLabel *l;
6051         bool error = false;
6052 
6053         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6054             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6055                 qemu_log_mask(CPU_LOG_TB_OP,
6056                               "$L%d referenced but not present.\n", l->id);
6057                 error = true;
6058             }
6059         }
6060         assert(!error);
6061     }
6062 #endif
6063 
6064     tcg_optimize(s);
6065 
6066     reachable_code_pass(s);
6067     liveness_pass_0(s);
6068     liveness_pass_1(s);
6069 
6070     if (s->nb_indirects > 0) {
6071         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6072                      && qemu_log_in_addr_range(pc_start))) {
6073             FILE *logfile = qemu_log_trylock();
6074             if (logfile) {
6075                 fprintf(logfile, "OP before indirect lowering:\n");
6076                 tcg_dump_ops(s, logfile, false);
6077                 fprintf(logfile, "\n");
6078                 qemu_log_unlock(logfile);
6079             }
6080         }
6081 
6082         /* Replace indirect temps with direct temps.  */
6083         if (liveness_pass_2(s)) {
6084             /* If changes were made, re-run liveness.  */
6085             liveness_pass_1(s);
6086         }
6087     }
6088 
6089     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6090                  && qemu_log_in_addr_range(pc_start))) {
6091         FILE *logfile = qemu_log_trylock();
6092         if (logfile) {
6093             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6094             tcg_dump_ops(s, logfile, true);
6095             fprintf(logfile, "\n");
6096             qemu_log_unlock(logfile);
6097         }
6098     }
6099 
6100     /* Initialize goto_tb jump offsets. */
6101     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6102     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6103     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6104     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6105 
6106     tcg_reg_alloc_start(s);
6107 
6108     /*
6109      * Reset the buffer pointers when restarting after overflow.
6110      * TODO: Move this into translate-all.c with the rest of the
6111      * buffer management.  Having only this done here is confusing.
6112      */
6113     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6114     s->code_ptr = s->code_buf;
6115 
6116 #ifdef TCG_TARGET_NEED_LDST_LABELS
6117     QSIMPLEQ_INIT(&s->ldst_labels);
6118 #endif
6119 #ifdef TCG_TARGET_NEED_POOL_LABELS
6120     s->pool_labels = NULL;
6121 #endif
6122 
6123     start_words = s->insn_start_words;
6124     s->gen_insn_data =
6125         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6126 
6127     tcg_out_tb_start(s);
6128 
6129     num_insns = -1;
6130     QTAILQ_FOREACH(op, &s->ops, link) {
6131         TCGOpcode opc = op->opc;
6132 
6133         switch (opc) {
6134         case INDEX_op_mov_i32:
6135         case INDEX_op_mov_i64:
6136         case INDEX_op_mov_vec:
6137             tcg_reg_alloc_mov(s, op);
6138             break;
6139         case INDEX_op_dup_vec:
6140             tcg_reg_alloc_dup(s, op);
6141             break;
6142         case INDEX_op_insn_start:
6143             if (num_insns >= 0) {
6144                 size_t off = tcg_current_code_size(s);
6145                 s->gen_insn_end_off[num_insns] = off;
6146                 /* Assert that we do not overflow our stored offset.  */
6147                 assert(s->gen_insn_end_off[num_insns] == off);
6148             }
6149             num_insns++;
6150             for (i = 0; i < start_words; ++i) {
6151                 s->gen_insn_data[num_insns * start_words + i] =
6152                     tcg_get_insn_start_param(op, i);
6153             }
6154             break;
6155         case INDEX_op_discard:
6156             temp_dead(s, arg_temp(op->args[0]));
6157             break;
6158         case INDEX_op_set_label:
6159             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6160             tcg_out_label(s, arg_label(op->args[0]));
6161             break;
6162         case INDEX_op_call:
6163             tcg_reg_alloc_call(s, op);
6164             break;
6165         case INDEX_op_exit_tb:
6166             tcg_out_exit_tb(s, op->args[0]);
6167             break;
6168         case INDEX_op_goto_tb:
6169             tcg_out_goto_tb(s, op->args[0]);
6170             break;
6171         case INDEX_op_dup2_vec:
6172             if (tcg_reg_alloc_dup2(s, op)) {
6173                 break;
6174             }
6175             /* fall through */
6176         default:
6177             /* Sanity check that we've not introduced any unhandled opcodes. */
6178             tcg_debug_assert(tcg_op_supported(opc));
6179             /* Note: in order to speed up the code, it would be much
6180                faster to have specialized register allocator functions for
6181                some common argument patterns */
6182             tcg_reg_alloc_op(s, op);
6183             break;
6184         }
6185         /* Test for (pending) buffer overflow.  The assumption is that any
6186            one operation beginning below the high water mark cannot overrun
6187            the buffer completely.  Thus we can test for overflow after
6188            generating code without having to check during generation.  */
6189         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6190             return -1;
6191         }
6192         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6193         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6194             return -2;
6195         }
6196     }
6197     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6198     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6199 
6200     /* Generate TB finalization at the end of block */
6201 #ifdef TCG_TARGET_NEED_LDST_LABELS
6202     i = tcg_out_ldst_finalize(s);
6203     if (i < 0) {
6204         return i;
6205     }
6206 #endif
6207 #ifdef TCG_TARGET_NEED_POOL_LABELS
6208     i = tcg_out_pool_finalize(s);
6209     if (i < 0) {
6210         return i;
6211     }
6212 #endif
6213     if (!tcg_resolve_relocs(s)) {
6214         return -2;
6215     }
6216 
6217 #ifndef CONFIG_TCG_INTERPRETER
6218     /* flush instruction cache */
6219     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6220                         (uintptr_t)s->code_buf,
6221                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6222 #endif
6223 
6224     return tcg_current_code_size(s);
6225 }
6226 
6227 #ifdef ELF_HOST_MACHINE
6228 /* In order to use this feature, the backend needs to do three things:
6229 
6230    (1) Define ELF_HOST_MACHINE to indicate both what value to
6231        put into the ELF image and to indicate support for the feature.
6232 
6233    (2) Define tcg_register_jit.  This should create a buffer containing
6234        the contents of a .debug_frame section that describes the post-
6235        prologue unwind info for the tcg machine.
6236 
6237    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6238 */
6239 
6240 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6241 typedef enum {
6242     JIT_NOACTION = 0,
6243     JIT_REGISTER_FN,
6244     JIT_UNREGISTER_FN
6245 } jit_actions_t;
6246 
6247 struct jit_code_entry {
6248     struct jit_code_entry *next_entry;
6249     struct jit_code_entry *prev_entry;
6250     const void *symfile_addr;
6251     uint64_t symfile_size;
6252 };
6253 
6254 struct jit_descriptor {
6255     uint32_t version;
6256     uint32_t action_flag;
6257     struct jit_code_entry *relevant_entry;
6258     struct jit_code_entry *first_entry;
6259 };
6260 
6261 void __jit_debug_register_code(void) __attribute__((noinline));
6262 void __jit_debug_register_code(void)
6263 {
6264     asm("");
6265 }
6266 
6267 /* Must statically initialize the version, because GDB may check
6268    the version before we can set it.  */
6269 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6270 
6271 /* End GDB interface.  */
6272 
6273 static int find_string(const char *strtab, const char *str)
6274 {
6275     const char *p = strtab + 1;
6276 
6277     while (1) {
6278         if (strcmp(p, str) == 0) {
6279             return p - strtab;
6280         }
6281         p += strlen(p) + 1;
6282     }
6283 }
6284 
6285 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6286                                  const void *debug_frame,
6287                                  size_t debug_frame_size)
6288 {
6289     struct __attribute__((packed)) DebugInfo {
6290         uint32_t  len;
6291         uint16_t  version;
6292         uint32_t  abbrev;
6293         uint8_t   ptr_size;
6294         uint8_t   cu_die;
6295         uint16_t  cu_lang;
6296         uintptr_t cu_low_pc;
6297         uintptr_t cu_high_pc;
6298         uint8_t   fn_die;
6299         char      fn_name[16];
6300         uintptr_t fn_low_pc;
6301         uintptr_t fn_high_pc;
6302         uint8_t   cu_eoc;
6303     };
6304 
6305     struct ElfImage {
6306         ElfW(Ehdr) ehdr;
6307         ElfW(Phdr) phdr;
6308         ElfW(Shdr) shdr[7];
6309         ElfW(Sym)  sym[2];
6310         struct DebugInfo di;
6311         uint8_t    da[24];
6312         char       str[80];
6313     };
6314 
6315     struct ElfImage *img;
6316 
6317     static const struct ElfImage img_template = {
6318         .ehdr = {
6319             .e_ident[EI_MAG0] = ELFMAG0,
6320             .e_ident[EI_MAG1] = ELFMAG1,
6321             .e_ident[EI_MAG2] = ELFMAG2,
6322             .e_ident[EI_MAG3] = ELFMAG3,
6323             .e_ident[EI_CLASS] = ELF_CLASS,
6324             .e_ident[EI_DATA] = ELF_DATA,
6325             .e_ident[EI_VERSION] = EV_CURRENT,
6326             .e_type = ET_EXEC,
6327             .e_machine = ELF_HOST_MACHINE,
6328             .e_version = EV_CURRENT,
6329             .e_phoff = offsetof(struct ElfImage, phdr),
6330             .e_shoff = offsetof(struct ElfImage, shdr),
6331             .e_ehsize = sizeof(ElfW(Shdr)),
6332             .e_phentsize = sizeof(ElfW(Phdr)),
6333             .e_phnum = 1,
6334             .e_shentsize = sizeof(ElfW(Shdr)),
6335             .e_shnum = ARRAY_SIZE(img->shdr),
6336             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6337 #ifdef ELF_HOST_FLAGS
6338             .e_flags = ELF_HOST_FLAGS,
6339 #endif
6340 #ifdef ELF_OSABI
6341             .e_ident[EI_OSABI] = ELF_OSABI,
6342 #endif
6343         },
6344         .phdr = {
6345             .p_type = PT_LOAD,
6346             .p_flags = PF_X,
6347         },
6348         .shdr = {
6349             [0] = { .sh_type = SHT_NULL },
6350             /* Trick: The contents of code_gen_buffer are not present in
6351                this fake ELF file; that got allocated elsewhere.  Therefore
6352                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6353                will not look for contents.  We can record any address.  */
6354             [1] = { /* .text */
6355                 .sh_type = SHT_NOBITS,
6356                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6357             },
6358             [2] = { /* .debug_info */
6359                 .sh_type = SHT_PROGBITS,
6360                 .sh_offset = offsetof(struct ElfImage, di),
6361                 .sh_size = sizeof(struct DebugInfo),
6362             },
6363             [3] = { /* .debug_abbrev */
6364                 .sh_type = SHT_PROGBITS,
6365                 .sh_offset = offsetof(struct ElfImage, da),
6366                 .sh_size = sizeof(img->da),
6367             },
6368             [4] = { /* .debug_frame */
6369                 .sh_type = SHT_PROGBITS,
6370                 .sh_offset = sizeof(struct ElfImage),
6371             },
6372             [5] = { /* .symtab */
6373                 .sh_type = SHT_SYMTAB,
6374                 .sh_offset = offsetof(struct ElfImage, sym),
6375                 .sh_size = sizeof(img->sym),
6376                 .sh_info = 1,
6377                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6378                 .sh_entsize = sizeof(ElfW(Sym)),
6379             },
6380             [6] = { /* .strtab */
6381                 .sh_type = SHT_STRTAB,
6382                 .sh_offset = offsetof(struct ElfImage, str),
6383                 .sh_size = sizeof(img->str),
6384             }
6385         },
6386         .sym = {
6387             [1] = { /* code_gen_buffer */
6388                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6389                 .st_shndx = 1,
6390             }
6391         },
6392         .di = {
6393             .len = sizeof(struct DebugInfo) - 4,
6394             .version = 2,
6395             .ptr_size = sizeof(void *),
6396             .cu_die = 1,
6397             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6398             .fn_die = 2,
6399             .fn_name = "code_gen_buffer"
6400         },
6401         .da = {
6402             1,          /* abbrev number (the cu) */
6403             0x11, 1,    /* DW_TAG_compile_unit, has children */
6404             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6405             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6406             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6407             0, 0,       /* end of abbrev */
6408             2,          /* abbrev number (the fn) */
6409             0x2e, 0,    /* DW_TAG_subprogram, no children */
6410             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6411             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6412             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6413             0, 0,       /* end of abbrev */
6414             0           /* no more abbrev */
6415         },
6416         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6417                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6418     };
6419 
6420     /* We only need a single jit entry; statically allocate it.  */
6421     static struct jit_code_entry one_entry;
6422 
6423     uintptr_t buf = (uintptr_t)buf_ptr;
6424     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6425     DebugFrameHeader *dfh;
6426 
6427     img = g_malloc(img_size);
6428     *img = img_template;
6429 
6430     img->phdr.p_vaddr = buf;
6431     img->phdr.p_paddr = buf;
6432     img->phdr.p_memsz = buf_size;
6433 
6434     img->shdr[1].sh_name = find_string(img->str, ".text");
6435     img->shdr[1].sh_addr = buf;
6436     img->shdr[1].sh_size = buf_size;
6437 
6438     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6439     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6440 
6441     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6442     img->shdr[4].sh_size = debug_frame_size;
6443 
6444     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6445     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6446 
6447     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6448     img->sym[1].st_value = buf;
6449     img->sym[1].st_size = buf_size;
6450 
6451     img->di.cu_low_pc = buf;
6452     img->di.cu_high_pc = buf + buf_size;
6453     img->di.fn_low_pc = buf;
6454     img->di.fn_high_pc = buf + buf_size;
6455 
6456     dfh = (DebugFrameHeader *)(img + 1);
6457     memcpy(dfh, debug_frame, debug_frame_size);
6458     dfh->fde.func_start = buf;
6459     dfh->fde.func_len = buf_size;
6460 
6461 #ifdef DEBUG_JIT
6462     /* Enable this block to be able to debug the ELF image file creation.
6463        One can use readelf, objdump, or other inspection utilities.  */
6464     {
6465         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6466         FILE *f = fopen(jit, "w+b");
6467         if (f) {
6468             if (fwrite(img, img_size, 1, f) != img_size) {
6469                 /* Avoid stupid unused return value warning for fwrite.  */
6470             }
6471             fclose(f);
6472         }
6473     }
6474 #endif
6475 
6476     one_entry.symfile_addr = img;
6477     one_entry.symfile_size = img_size;
6478 
6479     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6480     __jit_debug_descriptor.relevant_entry = &one_entry;
6481     __jit_debug_descriptor.first_entry = &one_entry;
6482     __jit_debug_register_code();
6483 }
6484 #else
6485 /* No support for the feature.  Provide the entry point expected by exec.c,
6486    and implement the internal function we declared earlier.  */
6487 
6488 static void tcg_register_jit_int(const void *buf, size_t size,
6489                                  const void *debug_frame,
6490                                  size_t debug_frame_size)
6491 {
6492 }
6493 
6494 void tcg_register_jit(const void *buf, size_t buf_size)
6495 {
6496 }
6497 #endif /* ELF_HOST_MACHINE */
6498 
6499 #if !TCG_TARGET_MAYBE_vec
6500 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6501 {
6502     g_assert_not_reached();
6503 }
6504 #endif
6505