xref: /openbmc/qemu/tcg/tcg.c (revision 74781c0888e819552538593c0932d98ea16c766b)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #ifdef CONFIG_USER_ONLY
60 #include "user/guest-base.h"
61 #endif
62 
63 /* Forward declarations for functions declared in tcg-target.c.inc and
64    used here. */
65 static void tcg_target_init(TCGContext *s);
66 static void tcg_target_qemu_prologue(TCGContext *s);
67 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
68                         intptr_t value, intptr_t addend);
69 
70 /* The CIE and FDE header definitions will be common to all hosts.  */
71 typedef struct {
72     uint32_t len __attribute__((aligned((sizeof(void *)))));
73     uint32_t id;
74     uint8_t version;
75     char augmentation[1];
76     uint8_t code_align;
77     uint8_t data_align;
78     uint8_t return_column;
79 } DebugFrameCIE;
80 
81 typedef struct QEMU_PACKED {
82     uint32_t len __attribute__((aligned((sizeof(void *)))));
83     uint32_t cie_offset;
84     uintptr_t func_start;
85     uintptr_t func_len;
86 } DebugFrameFDEHeader;
87 
88 typedef struct QEMU_PACKED {
89     DebugFrameCIE cie;
90     DebugFrameFDEHeader fde;
91 } DebugFrameHeader;
92 
93 typedef struct TCGLabelQemuLdst {
94     bool is_ld;             /* qemu_ld: true, qemu_st: false */
95     MemOpIdx oi;
96     TCGType type;           /* result type of a load */
97     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
98     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
99     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
100     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
101     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
102     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
103     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
104 } TCGLabelQemuLdst;
105 
106 static void tcg_register_jit_int(const void *buf, size_t size,
107                                  const void *debug_frame,
108                                  size_t debug_frame_size)
109     __attribute__((unused));
110 
111 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
112 static void tcg_out_tb_start(TCGContext *s);
113 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
114                        intptr_t arg2);
115 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
116 static void tcg_out_movi(TCGContext *s, TCGType type,
117                          TCGReg ret, tcg_target_long arg);
118 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
120 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
121 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
122 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
123 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
128 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
129 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
130 static void tcg_out_goto_tb(TCGContext *s, int which);
131 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
132                        const TCGArg args[TCG_MAX_OP_ARGS],
133                        const int const_args[TCG_MAX_OP_ARGS]);
134 #if TCG_TARGET_MAYBE_vec
135 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
136                             TCGReg dst, TCGReg src);
137 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
138                              TCGReg dst, TCGReg base, intptr_t offset);
139 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
140                              TCGReg dst, int64_t arg);
141 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
142                            unsigned vecl, unsigned vece,
143                            const TCGArg args[TCG_MAX_OP_ARGS],
144                            const int const_args[TCG_MAX_OP_ARGS]);
145 #else
146 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
147                                    TCGReg dst, TCGReg src)
148 {
149     g_assert_not_reached();
150 }
151 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
152                                     TCGReg dst, TCGReg base, intptr_t offset)
153 {
154     g_assert_not_reached();
155 }
156 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, int64_t arg)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
162                                   unsigned vecl, unsigned vece,
163                                   const TCGArg args[TCG_MAX_OP_ARGS],
164                                   const int const_args[TCG_MAX_OP_ARGS])
165 {
166     g_assert_not_reached();
167 }
168 #endif
169 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
170                        intptr_t arg2);
171 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
172                         TCGReg base, intptr_t ofs);
173 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
174                          const TCGHelperInfo *info);
175 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
176 static bool tcg_target_const_match(int64_t val, int ct,
177                                    TCGType type, TCGCond cond, int vece);
178 #ifdef TCG_TARGET_NEED_LDST_LABELS
179 static int tcg_out_ldst_finalize(TCGContext *s);
180 #endif
181 
182 #ifndef CONFIG_USER_ONLY
183 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
184 #endif
185 
186 typedef struct TCGLdstHelperParam {
187     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
188     unsigned ntmp;
189     int tmp[3];
190 } TCGLdstHelperParam;
191 
192 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
193                                    const TCGLdstHelperParam *p)
194     __attribute__((unused));
195 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
196                                   bool load_sign, const TCGLdstHelperParam *p)
197     __attribute__((unused));
198 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 
202 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
203     [MO_UB] = helper_ldub_mmu,
204     [MO_SB] = helper_ldsb_mmu,
205     [MO_UW] = helper_lduw_mmu,
206     [MO_SW] = helper_ldsw_mmu,
207     [MO_UL] = helper_ldul_mmu,
208     [MO_UQ] = helper_ldq_mmu,
209 #if TCG_TARGET_REG_BITS == 64
210     [MO_SL] = helper_ldsl_mmu,
211     [MO_128] = helper_ld16_mmu,
212 #endif
213 };
214 
215 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
216     [MO_8]  = helper_stb_mmu,
217     [MO_16] = helper_stw_mmu,
218     [MO_32] = helper_stl_mmu,
219     [MO_64] = helper_stq_mmu,
220 #if TCG_TARGET_REG_BITS == 64
221     [MO_128] = helper_st16_mmu,
222 #endif
223 };
224 
225 typedef struct {
226     MemOp atom;   /* lg2 bits of atomicity required */
227     MemOp align;  /* lg2 bits of alignment to use */
228 } TCGAtomAlign;
229 
230 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
231                                            MemOp host_atom, bool allow_two_ops)
232     __attribute__((unused));
233 
234 #ifdef CONFIG_USER_ONLY
235 bool tcg_use_softmmu;
236 #endif
237 
238 TCGContext tcg_init_ctx;
239 __thread TCGContext *tcg_ctx;
240 
241 TCGContext **tcg_ctxs;
242 unsigned int tcg_cur_ctxs;
243 unsigned int tcg_max_ctxs;
244 TCGv_env tcg_env;
245 const void *tcg_code_gen_epilogue;
246 uintptr_t tcg_splitwx_diff;
247 
248 #ifndef CONFIG_TCG_INTERPRETER
249 tcg_prologue_fn *tcg_qemu_tb_exec;
250 #endif
251 
252 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
253 static TCGRegSet tcg_target_call_clobber_regs;
254 
255 #if TCG_TARGET_INSN_UNIT_SIZE == 1
256 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
257 {
258     *s->code_ptr++ = v;
259 }
260 
261 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
262                                                       uint8_t v)
263 {
264     *p = v;
265 }
266 #endif
267 
268 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
269 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
270 {
271     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
272         *s->code_ptr++ = v;
273     } else {
274         tcg_insn_unit *p = s->code_ptr;
275         memcpy(p, &v, sizeof(v));
276         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
277     }
278 }
279 
280 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
281                                                        uint16_t v)
282 {
283     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
284         *p = v;
285     } else {
286         memcpy(p, &v, sizeof(v));
287     }
288 }
289 #endif
290 
291 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
292 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
293 {
294     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
295         *s->code_ptr++ = v;
296     } else {
297         tcg_insn_unit *p = s->code_ptr;
298         memcpy(p, &v, sizeof(v));
299         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
300     }
301 }
302 
303 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
304                                                        uint32_t v)
305 {
306     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
307         *p = v;
308     } else {
309         memcpy(p, &v, sizeof(v));
310     }
311 }
312 #endif
313 
314 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
315 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
316 {
317     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
318         *s->code_ptr++ = v;
319     } else {
320         tcg_insn_unit *p = s->code_ptr;
321         memcpy(p, &v, sizeof(v));
322         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
323     }
324 }
325 
326 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
327                                                        uint64_t v)
328 {
329     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
330         *p = v;
331     } else {
332         memcpy(p, &v, sizeof(v));
333     }
334 }
335 #endif
336 
337 /* label relocation processing */
338 
339 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
340                           TCGLabel *l, intptr_t addend)
341 {
342     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
343 
344     r->type = type;
345     r->ptr = code_ptr;
346     r->addend = addend;
347     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
348 }
349 
350 static void tcg_out_label(TCGContext *s, TCGLabel *l)
351 {
352     tcg_debug_assert(!l->has_value);
353     l->has_value = 1;
354     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
355 }
356 
357 TCGLabel *gen_new_label(void)
358 {
359     TCGContext *s = tcg_ctx;
360     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
361 
362     memset(l, 0, sizeof(TCGLabel));
363     l->id = s->nb_labels++;
364     QSIMPLEQ_INIT(&l->branches);
365     QSIMPLEQ_INIT(&l->relocs);
366 
367     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
368 
369     return l;
370 }
371 
372 static bool tcg_resolve_relocs(TCGContext *s)
373 {
374     TCGLabel *l;
375 
376     QSIMPLEQ_FOREACH(l, &s->labels, next) {
377         TCGRelocation *r;
378         uintptr_t value = l->u.value;
379 
380         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
381             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
382                 return false;
383             }
384         }
385     }
386     return true;
387 }
388 
389 static void set_jmp_reset_offset(TCGContext *s, int which)
390 {
391     /*
392      * We will check for overflow at the end of the opcode loop in
393      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
394      */
395     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
396 }
397 
398 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
399 {
400     /*
401      * We will check for overflow at the end of the opcode loop in
402      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
403      */
404     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
405 }
406 
407 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
408 {
409     /*
410      * Return the read-execute version of the pointer, for the benefit
411      * of any pc-relative addressing mode.
412      */
413     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
414 }
415 
416 static int __attribute__((unused))
417 tlb_mask_table_ofs(TCGContext *s, int which)
418 {
419     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
420             sizeof(CPUNegativeOffsetState));
421 }
422 
423 /* Signal overflow, starting over with fewer guest insns. */
424 static G_NORETURN
425 void tcg_raise_tb_overflow(TCGContext *s)
426 {
427     siglongjmp(s->jmp_trans, -2);
428 }
429 
430 /*
431  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
432  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
433  *
434  * However, tcg_out_helper_load_slots reuses this field to hold an
435  * argument slot number (which may designate a argument register or an
436  * argument stack slot), converting to TCGReg once all arguments that
437  * are destined for the stack are processed.
438  */
439 typedef struct TCGMovExtend {
440     unsigned dst;
441     TCGReg src;
442     TCGType dst_type;
443     TCGType src_type;
444     MemOp src_ext;
445 } TCGMovExtend;
446 
447 /**
448  * tcg_out_movext -- move and extend
449  * @s: tcg context
450  * @dst_type: integral type for destination
451  * @dst: destination register
452  * @src_type: integral type for source
453  * @src_ext: extension to apply to source
454  * @src: source register
455  *
456  * Move or extend @src into @dst, depending on @src_ext and the types.
457  */
458 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
459                            TCGType src_type, MemOp src_ext, TCGReg src)
460 {
461     switch (src_ext) {
462     case MO_UB:
463         tcg_out_ext8u(s, dst, src);
464         break;
465     case MO_SB:
466         tcg_out_ext8s(s, dst_type, dst, src);
467         break;
468     case MO_UW:
469         tcg_out_ext16u(s, dst, src);
470         break;
471     case MO_SW:
472         tcg_out_ext16s(s, dst_type, dst, src);
473         break;
474     case MO_UL:
475     case MO_SL:
476         if (dst_type == TCG_TYPE_I32) {
477             if (src_type == TCG_TYPE_I32) {
478                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
479             } else {
480                 tcg_out_extrl_i64_i32(s, dst, src);
481             }
482         } else if (src_type == TCG_TYPE_I32) {
483             if (src_ext & MO_SIGN) {
484                 tcg_out_exts_i32_i64(s, dst, src);
485             } else {
486                 tcg_out_extu_i32_i64(s, dst, src);
487             }
488         } else {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_ext32s(s, dst, src);
491             } else {
492                 tcg_out_ext32u(s, dst, src);
493             }
494         }
495         break;
496     case MO_UQ:
497         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
498         if (dst_type == TCG_TYPE_I32) {
499             tcg_out_extrl_i64_i32(s, dst, src);
500         } else {
501             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
502         }
503         break;
504     default:
505         g_assert_not_reached();
506     }
507 }
508 
509 /* Minor variations on a theme, using a structure. */
510 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
511                                     TCGReg src)
512 {
513     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
514 }
515 
516 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
517 {
518     tcg_out_movext1_new_src(s, i, i->src);
519 }
520 
521 /**
522  * tcg_out_movext2 -- move and extend two pair
523  * @s: tcg context
524  * @i1: first move description
525  * @i2: second move description
526  * @scratch: temporary register, or -1 for none
527  *
528  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
529  * between the sources and destinations.
530  */
531 
532 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
533                             const TCGMovExtend *i2, int scratch)
534 {
535     TCGReg src1 = i1->src;
536     TCGReg src2 = i2->src;
537 
538     if (i1->dst != src2) {
539         tcg_out_movext1(s, i1);
540         tcg_out_movext1(s, i2);
541         return;
542     }
543     if (i2->dst == src1) {
544         TCGType src1_type = i1->src_type;
545         TCGType src2_type = i2->src_type;
546 
547         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
548             /* The data is now in the correct registers, now extend. */
549             src1 = i2->src;
550             src2 = i1->src;
551         } else {
552             tcg_debug_assert(scratch >= 0);
553             tcg_out_mov(s, src1_type, scratch, src1);
554             src1 = scratch;
555         }
556     }
557     tcg_out_movext1_new_src(s, i2, src2);
558     tcg_out_movext1_new_src(s, i1, src1);
559 }
560 
561 /**
562  * tcg_out_movext3 -- move and extend three pair
563  * @s: tcg context
564  * @i1: first move description
565  * @i2: second move description
566  * @i3: third move description
567  * @scratch: temporary register, or -1 for none
568  *
569  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
570  * between the sources and destinations.
571  */
572 
573 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
574                             const TCGMovExtend *i2, const TCGMovExtend *i3,
575                             int scratch)
576 {
577     TCGReg src1 = i1->src;
578     TCGReg src2 = i2->src;
579     TCGReg src3 = i3->src;
580 
581     if (i1->dst != src2 && i1->dst != src3) {
582         tcg_out_movext1(s, i1);
583         tcg_out_movext2(s, i2, i3, scratch);
584         return;
585     }
586     if (i2->dst != src1 && i2->dst != src3) {
587         tcg_out_movext1(s, i2);
588         tcg_out_movext2(s, i1, i3, scratch);
589         return;
590     }
591     if (i3->dst != src1 && i3->dst != src2) {
592         tcg_out_movext1(s, i3);
593         tcg_out_movext2(s, i1, i2, scratch);
594         return;
595     }
596 
597     /*
598      * There is a cycle.  Since there are only 3 nodes, the cycle is
599      * either "clockwise" or "anti-clockwise", and can be solved with
600      * a single scratch or two xchg.
601      */
602     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
603         /* "Clockwise" */
604         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
605             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
606             /* The data is now in the correct registers, now extend. */
607             tcg_out_movext1_new_src(s, i1, i1->dst);
608             tcg_out_movext1_new_src(s, i2, i2->dst);
609             tcg_out_movext1_new_src(s, i3, i3->dst);
610         } else {
611             tcg_debug_assert(scratch >= 0);
612             tcg_out_mov(s, i1->src_type, scratch, src1);
613             tcg_out_movext1(s, i3);
614             tcg_out_movext1(s, i2);
615             tcg_out_movext1_new_src(s, i1, scratch);
616         }
617     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
618         /* "Anti-clockwise" */
619         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
620             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
621             /* The data is now in the correct registers, now extend. */
622             tcg_out_movext1_new_src(s, i1, i1->dst);
623             tcg_out_movext1_new_src(s, i2, i2->dst);
624             tcg_out_movext1_new_src(s, i3, i3->dst);
625         } else {
626             tcg_debug_assert(scratch >= 0);
627             tcg_out_mov(s, i1->src_type, scratch, src1);
628             tcg_out_movext1(s, i2);
629             tcg_out_movext1(s, i3);
630             tcg_out_movext1_new_src(s, i1, scratch);
631         }
632     } else {
633         g_assert_not_reached();
634     }
635 }
636 
637 #define C_PFX1(P, A)                    P##A
638 #define C_PFX2(P, A, B)                 P##A##_##B
639 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
640 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
641 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
642 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
643 
644 /* Define an enumeration for the various combinations. */
645 
646 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
647 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
648 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
649 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
650 
651 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
652 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
653 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
654 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
655 
656 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
657 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
658 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
659 
660 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
661 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
662 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
663 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
664 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
665 
666 typedef enum {
667 #include "tcg-target-con-set.h"
668 } TCGConstraintSetIndex;
669 
670 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
671 
672 #undef C_O0_I1
673 #undef C_O0_I2
674 #undef C_O0_I3
675 #undef C_O0_I4
676 #undef C_O1_I1
677 #undef C_O1_I2
678 #undef C_O1_I3
679 #undef C_O1_I4
680 #undef C_N1_I2
681 #undef C_N1O1_I1
682 #undef C_N2_I1
683 #undef C_O2_I1
684 #undef C_O2_I2
685 #undef C_O2_I3
686 #undef C_O2_I4
687 #undef C_N1_O1_I4
688 
689 /* Put all of the constraint sets into an array, indexed by the enum. */
690 
691 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
692 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
693 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
694 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
695 
696 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
697 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
698 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
699 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
700 
701 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
702 #define C_N1O1_I1(O1, O2, I1)           { .args_ct_str = { "&" #O1, #O2, #I1 } },
703 #define C_N2_I1(O1, O2, I1)             { .args_ct_str = { "&" #O1, "&" #O2, #I1 } },
704 
705 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
706 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
707 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
708 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
709 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
710 
711 static const TCGTargetOpDef constraint_sets[] = {
712 #include "tcg-target-con-set.h"
713 };
714 
715 
716 #undef C_O0_I1
717 #undef C_O0_I2
718 #undef C_O0_I3
719 #undef C_O0_I4
720 #undef C_O1_I1
721 #undef C_O1_I2
722 #undef C_O1_I3
723 #undef C_O1_I4
724 #undef C_N1_I2
725 #undef C_N1O1_I1
726 #undef C_N2_I1
727 #undef C_O2_I1
728 #undef C_O2_I2
729 #undef C_O2_I3
730 #undef C_O2_I4
731 #undef C_N1_O1_I4
732 
733 /* Expand the enumerator to be returned from tcg_target_op_def(). */
734 
735 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
736 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
737 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
738 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
739 
740 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
741 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
742 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
743 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
744 
745 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
746 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
747 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
748 
749 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
750 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
751 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
752 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
753 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
754 
755 #include "tcg-target.c.inc"
756 
757 #ifndef CONFIG_TCG_INTERPRETER
758 /* Validate CPUTLBDescFast placement. */
759 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
760                         sizeof(CPUNegativeOffsetState))
761                   < MIN_TLB_MASK_TABLE_OFS);
762 #endif
763 
764 static void alloc_tcg_plugin_context(TCGContext *s)
765 {
766 #ifdef CONFIG_PLUGIN
767     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
768     s->plugin_tb->insns =
769         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
770 #endif
771 }
772 
773 /*
774  * All TCG threads except the parent (i.e. the one that called tcg_context_init
775  * and registered the target's TCG globals) must register with this function
776  * before initiating translation.
777  *
778  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
779  * of tcg_region_init() for the reasoning behind this.
780  *
781  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
782  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
783  * is not used anymore for translation once this function is called.
784  *
785  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
786  * iterates over the array (e.g. tcg_code_size() the same for both system/user
787  * modes.
788  */
789 #ifdef CONFIG_USER_ONLY
790 void tcg_register_thread(void)
791 {
792     tcg_ctx = &tcg_init_ctx;
793 }
794 #else
795 void tcg_register_thread(void)
796 {
797     TCGContext *s = g_malloc(sizeof(*s));
798     unsigned int i, n;
799 
800     *s = tcg_init_ctx;
801 
802     /* Relink mem_base.  */
803     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
804         if (tcg_init_ctx.temps[i].mem_base) {
805             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
806             tcg_debug_assert(b >= 0 && b < n);
807             s->temps[i].mem_base = &s->temps[b];
808         }
809     }
810 
811     /* Claim an entry in tcg_ctxs */
812     n = qatomic_fetch_inc(&tcg_cur_ctxs);
813     g_assert(n < tcg_max_ctxs);
814     qatomic_set(&tcg_ctxs[n], s);
815 
816     if (n > 0) {
817         alloc_tcg_plugin_context(s);
818         tcg_region_initial_alloc(s);
819     }
820 
821     tcg_ctx = s;
822 }
823 #endif /* !CONFIG_USER_ONLY */
824 
825 /* pool based memory allocation */
826 void *tcg_malloc_internal(TCGContext *s, int size)
827 {
828     TCGPool *p;
829     int pool_size;
830 
831     if (size > TCG_POOL_CHUNK_SIZE) {
832         /* big malloc: insert a new pool (XXX: could optimize) */
833         p = g_malloc(sizeof(TCGPool) + size);
834         p->size = size;
835         p->next = s->pool_first_large;
836         s->pool_first_large = p;
837         return p->data;
838     } else {
839         p = s->pool_current;
840         if (!p) {
841             p = s->pool_first;
842             if (!p)
843                 goto new_pool;
844         } else {
845             if (!p->next) {
846             new_pool:
847                 pool_size = TCG_POOL_CHUNK_SIZE;
848                 p = g_malloc(sizeof(TCGPool) + pool_size);
849                 p->size = pool_size;
850                 p->next = NULL;
851                 if (s->pool_current) {
852                     s->pool_current->next = p;
853                 } else {
854                     s->pool_first = p;
855                 }
856             } else {
857                 p = p->next;
858             }
859         }
860     }
861     s->pool_current = p;
862     s->pool_cur = p->data + size;
863     s->pool_end = p->data + p->size;
864     return p->data;
865 }
866 
867 void tcg_pool_reset(TCGContext *s)
868 {
869     TCGPool *p, *t;
870     for (p = s->pool_first_large; p; p = t) {
871         t = p->next;
872         g_free(p);
873     }
874     s->pool_first_large = NULL;
875     s->pool_cur = s->pool_end = NULL;
876     s->pool_current = NULL;
877 }
878 
879 /*
880  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
881  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
882  * We only use these for layout in tcg_out_ld_helper_ret and
883  * tcg_out_st_helper_args, and share them between several of
884  * the helpers, with the end result that it's easier to build manually.
885  */
886 
887 #if TCG_TARGET_REG_BITS == 32
888 # define dh_typecode_ttl  dh_typecode_i32
889 #else
890 # define dh_typecode_ttl  dh_typecode_i64
891 #endif
892 
893 static TCGHelperInfo info_helper_ld32_mmu = {
894     .flags = TCG_CALL_NO_WG,
895     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
896               | dh_typemask(env, 1)
897               | dh_typemask(i64, 2)  /* uint64_t addr */
898               | dh_typemask(i32, 3)  /* unsigned oi */
899               | dh_typemask(ptr, 4)  /* uintptr_t ra */
900 };
901 
902 static TCGHelperInfo info_helper_ld64_mmu = {
903     .flags = TCG_CALL_NO_WG,
904     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
905               | dh_typemask(env, 1)
906               | dh_typemask(i64, 2)  /* uint64_t addr */
907               | dh_typemask(i32, 3)  /* unsigned oi */
908               | dh_typemask(ptr, 4)  /* uintptr_t ra */
909 };
910 
911 static TCGHelperInfo info_helper_ld128_mmu = {
912     .flags = TCG_CALL_NO_WG,
913     .typemask = dh_typemask(i128, 0) /* return Int128 */
914               | dh_typemask(env, 1)
915               | dh_typemask(i64, 2)  /* uint64_t addr */
916               | dh_typemask(i32, 3)  /* unsigned oi */
917               | dh_typemask(ptr, 4)  /* uintptr_t ra */
918 };
919 
920 static TCGHelperInfo info_helper_st32_mmu = {
921     .flags = TCG_CALL_NO_WG,
922     .typemask = dh_typemask(void, 0)
923               | dh_typemask(env, 1)
924               | dh_typemask(i64, 2)  /* uint64_t addr */
925               | dh_typemask(i32, 3)  /* uint32_t data */
926               | dh_typemask(i32, 4)  /* unsigned oi */
927               | dh_typemask(ptr, 5)  /* uintptr_t ra */
928 };
929 
930 static TCGHelperInfo info_helper_st64_mmu = {
931     .flags = TCG_CALL_NO_WG,
932     .typemask = dh_typemask(void, 0)
933               | dh_typemask(env, 1)
934               | dh_typemask(i64, 2)  /* uint64_t addr */
935               | dh_typemask(i64, 3)  /* uint64_t data */
936               | dh_typemask(i32, 4)  /* unsigned oi */
937               | dh_typemask(ptr, 5)  /* uintptr_t ra */
938 };
939 
940 static TCGHelperInfo info_helper_st128_mmu = {
941     .flags = TCG_CALL_NO_WG,
942     .typemask = dh_typemask(void, 0)
943               | dh_typemask(env, 1)
944               | dh_typemask(i64, 2)  /* uint64_t addr */
945               | dh_typemask(i128, 3) /* Int128 data */
946               | dh_typemask(i32, 4)  /* unsigned oi */
947               | dh_typemask(ptr, 5)  /* uintptr_t ra */
948 };
949 
950 #ifdef CONFIG_TCG_INTERPRETER
951 static ffi_type *typecode_to_ffi(int argmask)
952 {
953     /*
954      * libffi does not support __int128_t, so we have forced Int128
955      * to use the structure definition instead of the builtin type.
956      */
957     static ffi_type *ffi_type_i128_elements[3] = {
958         &ffi_type_uint64,
959         &ffi_type_uint64,
960         NULL
961     };
962     static ffi_type ffi_type_i128 = {
963         .size = 16,
964         .alignment = __alignof__(Int128),
965         .type = FFI_TYPE_STRUCT,
966         .elements = ffi_type_i128_elements,
967     };
968 
969     switch (argmask) {
970     case dh_typecode_void:
971         return &ffi_type_void;
972     case dh_typecode_i32:
973         return &ffi_type_uint32;
974     case dh_typecode_s32:
975         return &ffi_type_sint32;
976     case dh_typecode_i64:
977         return &ffi_type_uint64;
978     case dh_typecode_s64:
979         return &ffi_type_sint64;
980     case dh_typecode_ptr:
981         return &ffi_type_pointer;
982     case dh_typecode_i128:
983         return &ffi_type_i128;
984     }
985     g_assert_not_reached();
986 }
987 
988 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
989 {
990     unsigned typemask = info->typemask;
991     struct {
992         ffi_cif cif;
993         ffi_type *args[];
994     } *ca;
995     ffi_status status;
996     int nargs;
997 
998     /* Ignoring the return type, find the last non-zero field. */
999     nargs = 32 - clz32(typemask >> 3);
1000     nargs = DIV_ROUND_UP(nargs, 3);
1001     assert(nargs <= MAX_CALL_IARGS);
1002 
1003     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1004     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1005     ca->cif.nargs = nargs;
1006 
1007     if (nargs != 0) {
1008         ca->cif.arg_types = ca->args;
1009         for (int j = 0; j < nargs; ++j) {
1010             int typecode = extract32(typemask, (j + 1) * 3, 3);
1011             ca->args[j] = typecode_to_ffi(typecode);
1012         }
1013     }
1014 
1015     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1016                           ca->cif.rtype, ca->cif.arg_types);
1017     assert(status == FFI_OK);
1018 
1019     return &ca->cif;
1020 }
1021 
1022 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1023 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1024 #else
1025 #define HELPER_INFO_INIT(I)      (&(I)->init)
1026 #define HELPER_INFO_INIT_VAL(I)  1
1027 #endif /* CONFIG_TCG_INTERPRETER */
1028 
1029 static inline bool arg_slot_reg_p(unsigned arg_slot)
1030 {
1031     /*
1032      * Split the sizeof away from the comparison to avoid Werror from
1033      * "unsigned < 0 is always false", when iarg_regs is empty.
1034      */
1035     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1036     return arg_slot < nreg;
1037 }
1038 
1039 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1040 {
1041     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1042     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1043 
1044     tcg_debug_assert(stk_slot < max);
1045     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1046 }
1047 
1048 typedef struct TCGCumulativeArgs {
1049     int arg_idx;                /* tcg_gen_callN args[] */
1050     int info_in_idx;            /* TCGHelperInfo in[] */
1051     int arg_slot;               /* regs+stack slot */
1052     int ref_slot;               /* stack slots for references */
1053 } TCGCumulativeArgs;
1054 
1055 static void layout_arg_even(TCGCumulativeArgs *cum)
1056 {
1057     cum->arg_slot += cum->arg_slot & 1;
1058 }
1059 
1060 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1061                          TCGCallArgumentKind kind)
1062 {
1063     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1064 
1065     *loc = (TCGCallArgumentLoc){
1066         .kind = kind,
1067         .arg_idx = cum->arg_idx,
1068         .arg_slot = cum->arg_slot,
1069     };
1070     cum->info_in_idx++;
1071     cum->arg_slot++;
1072 }
1073 
1074 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1075                                 TCGHelperInfo *info, int n)
1076 {
1077     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1078 
1079     for (int i = 0; i < n; ++i) {
1080         /* Layout all using the same arg_idx, adjusting the subindex. */
1081         loc[i] = (TCGCallArgumentLoc){
1082             .kind = TCG_CALL_ARG_NORMAL,
1083             .arg_idx = cum->arg_idx,
1084             .tmp_subindex = i,
1085             .arg_slot = cum->arg_slot + i,
1086         };
1087     }
1088     cum->info_in_idx += n;
1089     cum->arg_slot += n;
1090 }
1091 
1092 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1093 {
1094     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1095     int n = 128 / TCG_TARGET_REG_BITS;
1096 
1097     /* The first subindex carries the pointer. */
1098     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1099 
1100     /*
1101      * The callee is allowed to clobber memory associated with
1102      * structure pass by-reference.  Therefore we must make copies.
1103      * Allocate space from "ref_slot", which will be adjusted to
1104      * follow the parameters on the stack.
1105      */
1106     loc[0].ref_slot = cum->ref_slot;
1107 
1108     /*
1109      * Subsequent words also go into the reference slot, but
1110      * do not accumulate into the regular arguments.
1111      */
1112     for (int i = 1; i < n; ++i) {
1113         loc[i] = (TCGCallArgumentLoc){
1114             .kind = TCG_CALL_ARG_BY_REF_N,
1115             .arg_idx = cum->arg_idx,
1116             .tmp_subindex = i,
1117             .ref_slot = cum->ref_slot + i,
1118         };
1119     }
1120     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1121     cum->ref_slot += n;
1122 }
1123 
1124 static void init_call_layout(TCGHelperInfo *info)
1125 {
1126     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1127     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1128     unsigned typemask = info->typemask;
1129     unsigned typecode;
1130     TCGCumulativeArgs cum = { };
1131 
1132     /*
1133      * Parse and place any function return value.
1134      */
1135     typecode = typemask & 7;
1136     switch (typecode) {
1137     case dh_typecode_void:
1138         info->nr_out = 0;
1139         break;
1140     case dh_typecode_i32:
1141     case dh_typecode_s32:
1142     case dh_typecode_ptr:
1143         info->nr_out = 1;
1144         info->out_kind = TCG_CALL_RET_NORMAL;
1145         break;
1146     case dh_typecode_i64:
1147     case dh_typecode_s64:
1148         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1149         info->out_kind = TCG_CALL_RET_NORMAL;
1150         /* Query the last register now to trigger any assert early. */
1151         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1152         break;
1153     case dh_typecode_i128:
1154         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1155         info->out_kind = TCG_TARGET_CALL_RET_I128;
1156         switch (TCG_TARGET_CALL_RET_I128) {
1157         case TCG_CALL_RET_NORMAL:
1158             /* Query the last register now to trigger any assert early. */
1159             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1160             break;
1161         case TCG_CALL_RET_BY_VEC:
1162             /* Query the single register now to trigger any assert early. */
1163             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1164             break;
1165         case TCG_CALL_RET_BY_REF:
1166             /*
1167              * Allocate the first argument to the output.
1168              * We don't need to store this anywhere, just make it
1169              * unavailable for use in the input loop below.
1170              */
1171             cum.arg_slot = 1;
1172             break;
1173         default:
1174             qemu_build_not_reached();
1175         }
1176         break;
1177     default:
1178         g_assert_not_reached();
1179     }
1180 
1181     /*
1182      * Parse and place function arguments.
1183      */
1184     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1185         TCGCallArgumentKind kind;
1186         TCGType type;
1187 
1188         typecode = typemask & 7;
1189         switch (typecode) {
1190         case dh_typecode_i32:
1191         case dh_typecode_s32:
1192             type = TCG_TYPE_I32;
1193             break;
1194         case dh_typecode_i64:
1195         case dh_typecode_s64:
1196             type = TCG_TYPE_I64;
1197             break;
1198         case dh_typecode_ptr:
1199             type = TCG_TYPE_PTR;
1200             break;
1201         case dh_typecode_i128:
1202             type = TCG_TYPE_I128;
1203             break;
1204         default:
1205             g_assert_not_reached();
1206         }
1207 
1208         switch (type) {
1209         case TCG_TYPE_I32:
1210             switch (TCG_TARGET_CALL_ARG_I32) {
1211             case TCG_CALL_ARG_EVEN:
1212                 layout_arg_even(&cum);
1213                 /* fall through */
1214             case TCG_CALL_ARG_NORMAL:
1215                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1216                 break;
1217             case TCG_CALL_ARG_EXTEND:
1218                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1219                 layout_arg_1(&cum, info, kind);
1220                 break;
1221             default:
1222                 qemu_build_not_reached();
1223             }
1224             break;
1225 
1226         case TCG_TYPE_I64:
1227             switch (TCG_TARGET_CALL_ARG_I64) {
1228             case TCG_CALL_ARG_EVEN:
1229                 layout_arg_even(&cum);
1230                 /* fall through */
1231             case TCG_CALL_ARG_NORMAL:
1232                 if (TCG_TARGET_REG_BITS == 32) {
1233                     layout_arg_normal_n(&cum, info, 2);
1234                 } else {
1235                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1236                 }
1237                 break;
1238             default:
1239                 qemu_build_not_reached();
1240             }
1241             break;
1242 
1243         case TCG_TYPE_I128:
1244             switch (TCG_TARGET_CALL_ARG_I128) {
1245             case TCG_CALL_ARG_EVEN:
1246                 layout_arg_even(&cum);
1247                 /* fall through */
1248             case TCG_CALL_ARG_NORMAL:
1249                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1250                 break;
1251             case TCG_CALL_ARG_BY_REF:
1252                 layout_arg_by_ref(&cum, info);
1253                 break;
1254             default:
1255                 qemu_build_not_reached();
1256             }
1257             break;
1258 
1259         default:
1260             g_assert_not_reached();
1261         }
1262     }
1263     info->nr_in = cum.info_in_idx;
1264 
1265     /* Validate that we didn't overrun the input array. */
1266     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1267     /* Validate the backend has enough argument space. */
1268     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1269 
1270     /*
1271      * Relocate the "ref_slot" area to the end of the parameters.
1272      * Minimizing this stack offset helps code size for x86,
1273      * which has a signed 8-bit offset encoding.
1274      */
1275     if (cum.ref_slot != 0) {
1276         int ref_base = 0;
1277 
1278         if (cum.arg_slot > max_reg_slots) {
1279             int align = __alignof(Int128) / sizeof(tcg_target_long);
1280 
1281             ref_base = cum.arg_slot - max_reg_slots;
1282             if (align > 1) {
1283                 ref_base = ROUND_UP(ref_base, align);
1284             }
1285         }
1286         assert(ref_base + cum.ref_slot <= max_stk_slots);
1287         ref_base += max_reg_slots;
1288 
1289         if (ref_base != 0) {
1290             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1291                 TCGCallArgumentLoc *loc = &info->in[i];
1292                 switch (loc->kind) {
1293                 case TCG_CALL_ARG_BY_REF:
1294                 case TCG_CALL_ARG_BY_REF_N:
1295                     loc->ref_slot += ref_base;
1296                     break;
1297                 default:
1298                     break;
1299                 }
1300             }
1301         }
1302     }
1303 }
1304 
1305 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1306 static void process_op_defs(TCGContext *s);
1307 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1308                                             TCGReg reg, const char *name);
1309 
1310 static void tcg_context_init(unsigned max_cpus)
1311 {
1312     TCGContext *s = &tcg_init_ctx;
1313     int op, total_args, n, i;
1314     TCGOpDef *def;
1315     TCGArgConstraint *args_ct;
1316     TCGTemp *ts;
1317 
1318     memset(s, 0, sizeof(*s));
1319     s->nb_globals = 0;
1320 
1321     /* Count total number of arguments and allocate the corresponding
1322        space */
1323     total_args = 0;
1324     for(op = 0; op < NB_OPS; op++) {
1325         def = &tcg_op_defs[op];
1326         n = def->nb_iargs + def->nb_oargs;
1327         total_args += n;
1328     }
1329 
1330     args_ct = g_new0(TCGArgConstraint, total_args);
1331 
1332     for(op = 0; op < NB_OPS; op++) {
1333         def = &tcg_op_defs[op];
1334         def->args_ct = args_ct;
1335         n = def->nb_iargs + def->nb_oargs;
1336         args_ct += n;
1337     }
1338 
1339     init_call_layout(&info_helper_ld32_mmu);
1340     init_call_layout(&info_helper_ld64_mmu);
1341     init_call_layout(&info_helper_ld128_mmu);
1342     init_call_layout(&info_helper_st32_mmu);
1343     init_call_layout(&info_helper_st64_mmu);
1344     init_call_layout(&info_helper_st128_mmu);
1345 
1346     tcg_target_init(s);
1347     process_op_defs(s);
1348 
1349     /* Reverse the order of the saved registers, assuming they're all at
1350        the start of tcg_target_reg_alloc_order.  */
1351     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1352         int r = tcg_target_reg_alloc_order[n];
1353         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1354             break;
1355         }
1356     }
1357     for (i = 0; i < n; ++i) {
1358         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1359     }
1360     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1361         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1362     }
1363 
1364     alloc_tcg_plugin_context(s);
1365 
1366     tcg_ctx = s;
1367     /*
1368      * In user-mode we simply share the init context among threads, since we
1369      * use a single region. See the documentation tcg_region_init() for the
1370      * reasoning behind this.
1371      * In system-mode we will have at most max_cpus TCG threads.
1372      */
1373 #ifdef CONFIG_USER_ONLY
1374     tcg_ctxs = &tcg_ctx;
1375     tcg_cur_ctxs = 1;
1376     tcg_max_ctxs = 1;
1377 #else
1378     tcg_max_ctxs = max_cpus;
1379     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1380 #endif
1381 
1382     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1383     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1384     tcg_env = temp_tcgv_ptr(ts);
1385 }
1386 
1387 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1388 {
1389     tcg_context_init(max_cpus);
1390     tcg_region_init(tb_size, splitwx, max_cpus);
1391 }
1392 
1393 /*
1394  * Allocate TBs right before their corresponding translated code, making
1395  * sure that TBs and code are on different cache lines.
1396  */
1397 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1398 {
1399     uintptr_t align = qemu_icache_linesize;
1400     TranslationBlock *tb;
1401     void *next;
1402 
1403  retry:
1404     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1405     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1406 
1407     if (unlikely(next > s->code_gen_highwater)) {
1408         if (tcg_region_alloc(s)) {
1409             return NULL;
1410         }
1411         goto retry;
1412     }
1413     qatomic_set(&s->code_gen_ptr, next);
1414     s->data_gen_ptr = NULL;
1415     return tb;
1416 }
1417 
1418 void tcg_prologue_init(void)
1419 {
1420     TCGContext *s = tcg_ctx;
1421     size_t prologue_size;
1422 
1423     s->code_ptr = s->code_gen_ptr;
1424     s->code_buf = s->code_gen_ptr;
1425     s->data_gen_ptr = NULL;
1426 
1427 #ifndef CONFIG_TCG_INTERPRETER
1428     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1429 #endif
1430 
1431 #ifdef TCG_TARGET_NEED_POOL_LABELS
1432     s->pool_labels = NULL;
1433 #endif
1434 
1435     qemu_thread_jit_write();
1436     /* Generate the prologue.  */
1437     tcg_target_qemu_prologue(s);
1438 
1439 #ifdef TCG_TARGET_NEED_POOL_LABELS
1440     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1441     {
1442         int result = tcg_out_pool_finalize(s);
1443         tcg_debug_assert(result == 0);
1444     }
1445 #endif
1446 
1447     prologue_size = tcg_current_code_size(s);
1448     perf_report_prologue(s->code_gen_ptr, prologue_size);
1449 
1450 #ifndef CONFIG_TCG_INTERPRETER
1451     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1452                         (uintptr_t)s->code_buf, prologue_size);
1453 #endif
1454 
1455     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1456         FILE *logfile = qemu_log_trylock();
1457         if (logfile) {
1458             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1459             if (s->data_gen_ptr) {
1460                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1461                 size_t data_size = prologue_size - code_size;
1462                 size_t i;
1463 
1464                 disas(logfile, s->code_gen_ptr, code_size);
1465 
1466                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1467                     if (sizeof(tcg_target_ulong) == 8) {
1468                         fprintf(logfile,
1469                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1470                                 (uintptr_t)s->data_gen_ptr + i,
1471                                 *(uint64_t *)(s->data_gen_ptr + i));
1472                     } else {
1473                         fprintf(logfile,
1474                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1475                                 (uintptr_t)s->data_gen_ptr + i,
1476                                 *(uint32_t *)(s->data_gen_ptr + i));
1477                     }
1478                 }
1479             } else {
1480                 disas(logfile, s->code_gen_ptr, prologue_size);
1481             }
1482             fprintf(logfile, "\n");
1483             qemu_log_unlock(logfile);
1484         }
1485     }
1486 
1487 #ifndef CONFIG_TCG_INTERPRETER
1488     /*
1489      * Assert that goto_ptr is implemented completely, setting an epilogue.
1490      * For tci, we use NULL as the signal to return from the interpreter,
1491      * so skip this check.
1492      */
1493     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1494 #endif
1495 
1496     tcg_region_prologue_set(s);
1497 }
1498 
1499 void tcg_func_start(TCGContext *s)
1500 {
1501     tcg_pool_reset(s);
1502     s->nb_temps = s->nb_globals;
1503 
1504     /* No temps have been previously allocated for size or locality.  */
1505     memset(s->free_temps, 0, sizeof(s->free_temps));
1506 
1507     /* No constant temps have been previously allocated. */
1508     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1509         if (s->const_table[i]) {
1510             g_hash_table_remove_all(s->const_table[i]);
1511         }
1512     }
1513 
1514     s->nb_ops = 0;
1515     s->nb_labels = 0;
1516     s->current_frame_offset = s->frame_start;
1517 
1518 #ifdef CONFIG_DEBUG_TCG
1519     s->goto_tb_issue_mask = 0;
1520 #endif
1521 
1522     QTAILQ_INIT(&s->ops);
1523     QTAILQ_INIT(&s->free_ops);
1524     s->emit_before_op = NULL;
1525     QSIMPLEQ_INIT(&s->labels);
1526 
1527     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1528                      s->addr_type == TCG_TYPE_I64);
1529 
1530     tcg_debug_assert(s->insn_start_words > 0);
1531 }
1532 
1533 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1534 {
1535     int n = s->nb_temps++;
1536 
1537     if (n >= TCG_MAX_TEMPS) {
1538         tcg_raise_tb_overflow(s);
1539     }
1540     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1541 }
1542 
1543 static TCGTemp *tcg_global_alloc(TCGContext *s)
1544 {
1545     TCGTemp *ts;
1546 
1547     tcg_debug_assert(s->nb_globals == s->nb_temps);
1548     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1549     s->nb_globals++;
1550     ts = tcg_temp_alloc(s);
1551     ts->kind = TEMP_GLOBAL;
1552 
1553     return ts;
1554 }
1555 
1556 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1557                                             TCGReg reg, const char *name)
1558 {
1559     TCGTemp *ts;
1560 
1561     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1562 
1563     ts = tcg_global_alloc(s);
1564     ts->base_type = type;
1565     ts->type = type;
1566     ts->kind = TEMP_FIXED;
1567     ts->reg = reg;
1568     ts->name = name;
1569     tcg_regset_set_reg(s->reserved_regs, reg);
1570 
1571     return ts;
1572 }
1573 
1574 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1575 {
1576     s->frame_start = start;
1577     s->frame_end = start + size;
1578     s->frame_temp
1579         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1580 }
1581 
1582 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1583                                             const char *name, TCGType type)
1584 {
1585     TCGContext *s = tcg_ctx;
1586     TCGTemp *base_ts = tcgv_ptr_temp(base);
1587     TCGTemp *ts = tcg_global_alloc(s);
1588     int indirect_reg = 0;
1589 
1590     switch (base_ts->kind) {
1591     case TEMP_FIXED:
1592         break;
1593     case TEMP_GLOBAL:
1594         /* We do not support double-indirect registers.  */
1595         tcg_debug_assert(!base_ts->indirect_reg);
1596         base_ts->indirect_base = 1;
1597         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1598                             ? 2 : 1);
1599         indirect_reg = 1;
1600         break;
1601     default:
1602         g_assert_not_reached();
1603     }
1604 
1605     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1606         TCGTemp *ts2 = tcg_global_alloc(s);
1607         char buf[64];
1608 
1609         ts->base_type = TCG_TYPE_I64;
1610         ts->type = TCG_TYPE_I32;
1611         ts->indirect_reg = indirect_reg;
1612         ts->mem_allocated = 1;
1613         ts->mem_base = base_ts;
1614         ts->mem_offset = offset;
1615         pstrcpy(buf, sizeof(buf), name);
1616         pstrcat(buf, sizeof(buf), "_0");
1617         ts->name = strdup(buf);
1618 
1619         tcg_debug_assert(ts2 == ts + 1);
1620         ts2->base_type = TCG_TYPE_I64;
1621         ts2->type = TCG_TYPE_I32;
1622         ts2->indirect_reg = indirect_reg;
1623         ts2->mem_allocated = 1;
1624         ts2->mem_base = base_ts;
1625         ts2->mem_offset = offset + 4;
1626         ts2->temp_subindex = 1;
1627         pstrcpy(buf, sizeof(buf), name);
1628         pstrcat(buf, sizeof(buf), "_1");
1629         ts2->name = strdup(buf);
1630     } else {
1631         ts->base_type = type;
1632         ts->type = type;
1633         ts->indirect_reg = indirect_reg;
1634         ts->mem_allocated = 1;
1635         ts->mem_base = base_ts;
1636         ts->mem_offset = offset;
1637         ts->name = name;
1638     }
1639     return ts;
1640 }
1641 
1642 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1643 {
1644     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1645     return temp_tcgv_i32(ts);
1646 }
1647 
1648 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1649 {
1650     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1651     return temp_tcgv_i64(ts);
1652 }
1653 
1654 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1655 {
1656     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1657     return temp_tcgv_ptr(ts);
1658 }
1659 
1660 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1661 {
1662     TCGContext *s = tcg_ctx;
1663     TCGTemp *ts;
1664     int n;
1665 
1666     if (kind == TEMP_EBB) {
1667         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1668 
1669         if (idx < TCG_MAX_TEMPS) {
1670             /* There is already an available temp with the right type.  */
1671             clear_bit(idx, s->free_temps[type].l);
1672 
1673             ts = &s->temps[idx];
1674             ts->temp_allocated = 1;
1675             tcg_debug_assert(ts->base_type == type);
1676             tcg_debug_assert(ts->kind == kind);
1677             return ts;
1678         }
1679     } else {
1680         tcg_debug_assert(kind == TEMP_TB);
1681     }
1682 
1683     switch (type) {
1684     case TCG_TYPE_I32:
1685     case TCG_TYPE_V64:
1686     case TCG_TYPE_V128:
1687     case TCG_TYPE_V256:
1688         n = 1;
1689         break;
1690     case TCG_TYPE_I64:
1691         n = 64 / TCG_TARGET_REG_BITS;
1692         break;
1693     case TCG_TYPE_I128:
1694         n = 128 / TCG_TARGET_REG_BITS;
1695         break;
1696     default:
1697         g_assert_not_reached();
1698     }
1699 
1700     ts = tcg_temp_alloc(s);
1701     ts->base_type = type;
1702     ts->temp_allocated = 1;
1703     ts->kind = kind;
1704 
1705     if (n == 1) {
1706         ts->type = type;
1707     } else {
1708         ts->type = TCG_TYPE_REG;
1709 
1710         for (int i = 1; i < n; ++i) {
1711             TCGTemp *ts2 = tcg_temp_alloc(s);
1712 
1713             tcg_debug_assert(ts2 == ts + i);
1714             ts2->base_type = type;
1715             ts2->type = TCG_TYPE_REG;
1716             ts2->temp_allocated = 1;
1717             ts2->temp_subindex = i;
1718             ts2->kind = kind;
1719         }
1720     }
1721     return ts;
1722 }
1723 
1724 TCGv_i32 tcg_temp_new_i32(void)
1725 {
1726     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1727 }
1728 
1729 TCGv_i32 tcg_temp_ebb_new_i32(void)
1730 {
1731     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1732 }
1733 
1734 TCGv_i64 tcg_temp_new_i64(void)
1735 {
1736     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1737 }
1738 
1739 TCGv_i64 tcg_temp_ebb_new_i64(void)
1740 {
1741     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1742 }
1743 
1744 TCGv_ptr tcg_temp_new_ptr(void)
1745 {
1746     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1747 }
1748 
1749 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1750 {
1751     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1752 }
1753 
1754 TCGv_i128 tcg_temp_new_i128(void)
1755 {
1756     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1757 }
1758 
1759 TCGv_i128 tcg_temp_ebb_new_i128(void)
1760 {
1761     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1762 }
1763 
1764 TCGv_vec tcg_temp_new_vec(TCGType type)
1765 {
1766     TCGTemp *t;
1767 
1768 #ifdef CONFIG_DEBUG_TCG
1769     switch (type) {
1770     case TCG_TYPE_V64:
1771         assert(TCG_TARGET_HAS_v64);
1772         break;
1773     case TCG_TYPE_V128:
1774         assert(TCG_TARGET_HAS_v128);
1775         break;
1776     case TCG_TYPE_V256:
1777         assert(TCG_TARGET_HAS_v256);
1778         break;
1779     default:
1780         g_assert_not_reached();
1781     }
1782 #endif
1783 
1784     t = tcg_temp_new_internal(type, TEMP_EBB);
1785     return temp_tcgv_vec(t);
1786 }
1787 
1788 /* Create a new temp of the same type as an existing temp.  */
1789 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1790 {
1791     TCGTemp *t = tcgv_vec_temp(match);
1792 
1793     tcg_debug_assert(t->temp_allocated != 0);
1794 
1795     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1796     return temp_tcgv_vec(t);
1797 }
1798 
1799 void tcg_temp_free_internal(TCGTemp *ts)
1800 {
1801     TCGContext *s = tcg_ctx;
1802 
1803     switch (ts->kind) {
1804     case TEMP_CONST:
1805     case TEMP_TB:
1806         /* Silently ignore free. */
1807         break;
1808     case TEMP_EBB:
1809         tcg_debug_assert(ts->temp_allocated != 0);
1810         ts->temp_allocated = 0;
1811         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1812         break;
1813     default:
1814         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1815         g_assert_not_reached();
1816     }
1817 }
1818 
1819 void tcg_temp_free_i32(TCGv_i32 arg)
1820 {
1821     tcg_temp_free_internal(tcgv_i32_temp(arg));
1822 }
1823 
1824 void tcg_temp_free_i64(TCGv_i64 arg)
1825 {
1826     tcg_temp_free_internal(tcgv_i64_temp(arg));
1827 }
1828 
1829 void tcg_temp_free_i128(TCGv_i128 arg)
1830 {
1831     tcg_temp_free_internal(tcgv_i128_temp(arg));
1832 }
1833 
1834 void tcg_temp_free_ptr(TCGv_ptr arg)
1835 {
1836     tcg_temp_free_internal(tcgv_ptr_temp(arg));
1837 }
1838 
1839 void tcg_temp_free_vec(TCGv_vec arg)
1840 {
1841     tcg_temp_free_internal(tcgv_vec_temp(arg));
1842 }
1843 
1844 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1845 {
1846     TCGContext *s = tcg_ctx;
1847     GHashTable *h = s->const_table[type];
1848     TCGTemp *ts;
1849 
1850     if (h == NULL) {
1851         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1852         s->const_table[type] = h;
1853     }
1854 
1855     ts = g_hash_table_lookup(h, &val);
1856     if (ts == NULL) {
1857         int64_t *val_ptr;
1858 
1859         ts = tcg_temp_alloc(s);
1860 
1861         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1862             TCGTemp *ts2 = tcg_temp_alloc(s);
1863 
1864             tcg_debug_assert(ts2 == ts + 1);
1865 
1866             ts->base_type = TCG_TYPE_I64;
1867             ts->type = TCG_TYPE_I32;
1868             ts->kind = TEMP_CONST;
1869             ts->temp_allocated = 1;
1870 
1871             ts2->base_type = TCG_TYPE_I64;
1872             ts2->type = TCG_TYPE_I32;
1873             ts2->kind = TEMP_CONST;
1874             ts2->temp_allocated = 1;
1875             ts2->temp_subindex = 1;
1876 
1877             /*
1878              * Retain the full value of the 64-bit constant in the low
1879              * part, so that the hash table works.  Actual uses will
1880              * truncate the value to the low part.
1881              */
1882             ts[HOST_BIG_ENDIAN].val = val;
1883             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1884             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1885         } else {
1886             ts->base_type = type;
1887             ts->type = type;
1888             ts->kind = TEMP_CONST;
1889             ts->temp_allocated = 1;
1890             ts->val = val;
1891             val_ptr = &ts->val;
1892         }
1893         g_hash_table_insert(h, val_ptr, ts);
1894     }
1895 
1896     return ts;
1897 }
1898 
1899 TCGv_i32 tcg_constant_i32(int32_t val)
1900 {
1901     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
1902 }
1903 
1904 TCGv_i64 tcg_constant_i64(int64_t val)
1905 {
1906     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
1907 }
1908 
1909 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
1910 {
1911     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
1912 }
1913 
1914 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1915 {
1916     val = dup_const(vece, val);
1917     return temp_tcgv_vec(tcg_constant_internal(type, val));
1918 }
1919 
1920 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1921 {
1922     TCGTemp *t = tcgv_vec_temp(match);
1923 
1924     tcg_debug_assert(t->temp_allocated != 0);
1925     return tcg_constant_vec(t->base_type, vece, val);
1926 }
1927 
1928 #ifdef CONFIG_DEBUG_TCG
1929 size_t temp_idx(TCGTemp *ts)
1930 {
1931     ptrdiff_t n = ts - tcg_ctx->temps;
1932     assert(n >= 0 && n < tcg_ctx->nb_temps);
1933     return n;
1934 }
1935 
1936 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1937 {
1938     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1939 
1940     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1941     assert(o % sizeof(TCGTemp) == 0);
1942 
1943     return (void *)tcg_ctx + (uintptr_t)v;
1944 }
1945 #endif /* CONFIG_DEBUG_TCG */
1946 
1947 /* Return true if OP may appear in the opcode stream.
1948    Test the runtime variable that controls each opcode.  */
1949 bool tcg_op_supported(TCGOpcode op)
1950 {
1951     const bool have_vec
1952         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1953 
1954     switch (op) {
1955     case INDEX_op_discard:
1956     case INDEX_op_set_label:
1957     case INDEX_op_call:
1958     case INDEX_op_br:
1959     case INDEX_op_mb:
1960     case INDEX_op_insn_start:
1961     case INDEX_op_exit_tb:
1962     case INDEX_op_goto_tb:
1963     case INDEX_op_goto_ptr:
1964     case INDEX_op_qemu_ld_a32_i32:
1965     case INDEX_op_qemu_ld_a64_i32:
1966     case INDEX_op_qemu_st_a32_i32:
1967     case INDEX_op_qemu_st_a64_i32:
1968     case INDEX_op_qemu_ld_a32_i64:
1969     case INDEX_op_qemu_ld_a64_i64:
1970     case INDEX_op_qemu_st_a32_i64:
1971     case INDEX_op_qemu_st_a64_i64:
1972         return true;
1973 
1974     case INDEX_op_qemu_st8_a32_i32:
1975     case INDEX_op_qemu_st8_a64_i32:
1976         return TCG_TARGET_HAS_qemu_st8_i32;
1977 
1978     case INDEX_op_qemu_ld_a32_i128:
1979     case INDEX_op_qemu_ld_a64_i128:
1980     case INDEX_op_qemu_st_a32_i128:
1981     case INDEX_op_qemu_st_a64_i128:
1982         return TCG_TARGET_HAS_qemu_ldst_i128;
1983 
1984     case INDEX_op_mov_i32:
1985     case INDEX_op_setcond_i32:
1986     case INDEX_op_brcond_i32:
1987     case INDEX_op_movcond_i32:
1988     case INDEX_op_ld8u_i32:
1989     case INDEX_op_ld8s_i32:
1990     case INDEX_op_ld16u_i32:
1991     case INDEX_op_ld16s_i32:
1992     case INDEX_op_ld_i32:
1993     case INDEX_op_st8_i32:
1994     case INDEX_op_st16_i32:
1995     case INDEX_op_st_i32:
1996     case INDEX_op_add_i32:
1997     case INDEX_op_sub_i32:
1998     case INDEX_op_neg_i32:
1999     case INDEX_op_mul_i32:
2000     case INDEX_op_and_i32:
2001     case INDEX_op_or_i32:
2002     case INDEX_op_xor_i32:
2003     case INDEX_op_shl_i32:
2004     case INDEX_op_shr_i32:
2005     case INDEX_op_sar_i32:
2006         return true;
2007 
2008     case INDEX_op_negsetcond_i32:
2009         return TCG_TARGET_HAS_negsetcond_i32;
2010     case INDEX_op_div_i32:
2011     case INDEX_op_divu_i32:
2012         return TCG_TARGET_HAS_div_i32;
2013     case INDEX_op_rem_i32:
2014     case INDEX_op_remu_i32:
2015         return TCG_TARGET_HAS_rem_i32;
2016     case INDEX_op_div2_i32:
2017     case INDEX_op_divu2_i32:
2018         return TCG_TARGET_HAS_div2_i32;
2019     case INDEX_op_rotl_i32:
2020     case INDEX_op_rotr_i32:
2021         return TCG_TARGET_HAS_rot_i32;
2022     case INDEX_op_deposit_i32:
2023         return TCG_TARGET_HAS_deposit_i32;
2024     case INDEX_op_extract_i32:
2025         return TCG_TARGET_HAS_extract_i32;
2026     case INDEX_op_sextract_i32:
2027         return TCG_TARGET_HAS_sextract_i32;
2028     case INDEX_op_extract2_i32:
2029         return TCG_TARGET_HAS_extract2_i32;
2030     case INDEX_op_add2_i32:
2031         return TCG_TARGET_HAS_add2_i32;
2032     case INDEX_op_sub2_i32:
2033         return TCG_TARGET_HAS_sub2_i32;
2034     case INDEX_op_mulu2_i32:
2035         return TCG_TARGET_HAS_mulu2_i32;
2036     case INDEX_op_muls2_i32:
2037         return TCG_TARGET_HAS_muls2_i32;
2038     case INDEX_op_muluh_i32:
2039         return TCG_TARGET_HAS_muluh_i32;
2040     case INDEX_op_mulsh_i32:
2041         return TCG_TARGET_HAS_mulsh_i32;
2042     case INDEX_op_ext8s_i32:
2043         return TCG_TARGET_HAS_ext8s_i32;
2044     case INDEX_op_ext16s_i32:
2045         return TCG_TARGET_HAS_ext16s_i32;
2046     case INDEX_op_ext8u_i32:
2047         return TCG_TARGET_HAS_ext8u_i32;
2048     case INDEX_op_ext16u_i32:
2049         return TCG_TARGET_HAS_ext16u_i32;
2050     case INDEX_op_bswap16_i32:
2051         return TCG_TARGET_HAS_bswap16_i32;
2052     case INDEX_op_bswap32_i32:
2053         return TCG_TARGET_HAS_bswap32_i32;
2054     case INDEX_op_not_i32:
2055         return TCG_TARGET_HAS_not_i32;
2056     case INDEX_op_andc_i32:
2057         return TCG_TARGET_HAS_andc_i32;
2058     case INDEX_op_orc_i32:
2059         return TCG_TARGET_HAS_orc_i32;
2060     case INDEX_op_eqv_i32:
2061         return TCG_TARGET_HAS_eqv_i32;
2062     case INDEX_op_nand_i32:
2063         return TCG_TARGET_HAS_nand_i32;
2064     case INDEX_op_nor_i32:
2065         return TCG_TARGET_HAS_nor_i32;
2066     case INDEX_op_clz_i32:
2067         return TCG_TARGET_HAS_clz_i32;
2068     case INDEX_op_ctz_i32:
2069         return TCG_TARGET_HAS_ctz_i32;
2070     case INDEX_op_ctpop_i32:
2071         return TCG_TARGET_HAS_ctpop_i32;
2072 
2073     case INDEX_op_brcond2_i32:
2074     case INDEX_op_setcond2_i32:
2075         return TCG_TARGET_REG_BITS == 32;
2076 
2077     case INDEX_op_mov_i64:
2078     case INDEX_op_setcond_i64:
2079     case INDEX_op_brcond_i64:
2080     case INDEX_op_movcond_i64:
2081     case INDEX_op_ld8u_i64:
2082     case INDEX_op_ld8s_i64:
2083     case INDEX_op_ld16u_i64:
2084     case INDEX_op_ld16s_i64:
2085     case INDEX_op_ld32u_i64:
2086     case INDEX_op_ld32s_i64:
2087     case INDEX_op_ld_i64:
2088     case INDEX_op_st8_i64:
2089     case INDEX_op_st16_i64:
2090     case INDEX_op_st32_i64:
2091     case INDEX_op_st_i64:
2092     case INDEX_op_add_i64:
2093     case INDEX_op_sub_i64:
2094     case INDEX_op_neg_i64:
2095     case INDEX_op_mul_i64:
2096     case INDEX_op_and_i64:
2097     case INDEX_op_or_i64:
2098     case INDEX_op_xor_i64:
2099     case INDEX_op_shl_i64:
2100     case INDEX_op_shr_i64:
2101     case INDEX_op_sar_i64:
2102     case INDEX_op_ext_i32_i64:
2103     case INDEX_op_extu_i32_i64:
2104         return TCG_TARGET_REG_BITS == 64;
2105 
2106     case INDEX_op_negsetcond_i64:
2107         return TCG_TARGET_HAS_negsetcond_i64;
2108     case INDEX_op_div_i64:
2109     case INDEX_op_divu_i64:
2110         return TCG_TARGET_HAS_div_i64;
2111     case INDEX_op_rem_i64:
2112     case INDEX_op_remu_i64:
2113         return TCG_TARGET_HAS_rem_i64;
2114     case INDEX_op_div2_i64:
2115     case INDEX_op_divu2_i64:
2116         return TCG_TARGET_HAS_div2_i64;
2117     case INDEX_op_rotl_i64:
2118     case INDEX_op_rotr_i64:
2119         return TCG_TARGET_HAS_rot_i64;
2120     case INDEX_op_deposit_i64:
2121         return TCG_TARGET_HAS_deposit_i64;
2122     case INDEX_op_extract_i64:
2123         return TCG_TARGET_HAS_extract_i64;
2124     case INDEX_op_sextract_i64:
2125         return TCG_TARGET_HAS_sextract_i64;
2126     case INDEX_op_extract2_i64:
2127         return TCG_TARGET_HAS_extract2_i64;
2128     case INDEX_op_extrl_i64_i32:
2129     case INDEX_op_extrh_i64_i32:
2130         return TCG_TARGET_HAS_extr_i64_i32;
2131     case INDEX_op_ext8s_i64:
2132         return TCG_TARGET_HAS_ext8s_i64;
2133     case INDEX_op_ext16s_i64:
2134         return TCG_TARGET_HAS_ext16s_i64;
2135     case INDEX_op_ext32s_i64:
2136         return TCG_TARGET_HAS_ext32s_i64;
2137     case INDEX_op_ext8u_i64:
2138         return TCG_TARGET_HAS_ext8u_i64;
2139     case INDEX_op_ext16u_i64:
2140         return TCG_TARGET_HAS_ext16u_i64;
2141     case INDEX_op_ext32u_i64:
2142         return TCG_TARGET_HAS_ext32u_i64;
2143     case INDEX_op_bswap16_i64:
2144         return TCG_TARGET_HAS_bswap16_i64;
2145     case INDEX_op_bswap32_i64:
2146         return TCG_TARGET_HAS_bswap32_i64;
2147     case INDEX_op_bswap64_i64:
2148         return TCG_TARGET_HAS_bswap64_i64;
2149     case INDEX_op_not_i64:
2150         return TCG_TARGET_HAS_not_i64;
2151     case INDEX_op_andc_i64:
2152         return TCG_TARGET_HAS_andc_i64;
2153     case INDEX_op_orc_i64:
2154         return TCG_TARGET_HAS_orc_i64;
2155     case INDEX_op_eqv_i64:
2156         return TCG_TARGET_HAS_eqv_i64;
2157     case INDEX_op_nand_i64:
2158         return TCG_TARGET_HAS_nand_i64;
2159     case INDEX_op_nor_i64:
2160         return TCG_TARGET_HAS_nor_i64;
2161     case INDEX_op_clz_i64:
2162         return TCG_TARGET_HAS_clz_i64;
2163     case INDEX_op_ctz_i64:
2164         return TCG_TARGET_HAS_ctz_i64;
2165     case INDEX_op_ctpop_i64:
2166         return TCG_TARGET_HAS_ctpop_i64;
2167     case INDEX_op_add2_i64:
2168         return TCG_TARGET_HAS_add2_i64;
2169     case INDEX_op_sub2_i64:
2170         return TCG_TARGET_HAS_sub2_i64;
2171     case INDEX_op_mulu2_i64:
2172         return TCG_TARGET_HAS_mulu2_i64;
2173     case INDEX_op_muls2_i64:
2174         return TCG_TARGET_HAS_muls2_i64;
2175     case INDEX_op_muluh_i64:
2176         return TCG_TARGET_HAS_muluh_i64;
2177     case INDEX_op_mulsh_i64:
2178         return TCG_TARGET_HAS_mulsh_i64;
2179 
2180     case INDEX_op_mov_vec:
2181     case INDEX_op_dup_vec:
2182     case INDEX_op_dupm_vec:
2183     case INDEX_op_ld_vec:
2184     case INDEX_op_st_vec:
2185     case INDEX_op_add_vec:
2186     case INDEX_op_sub_vec:
2187     case INDEX_op_and_vec:
2188     case INDEX_op_or_vec:
2189     case INDEX_op_xor_vec:
2190     case INDEX_op_cmp_vec:
2191         return have_vec;
2192     case INDEX_op_dup2_vec:
2193         return have_vec && TCG_TARGET_REG_BITS == 32;
2194     case INDEX_op_not_vec:
2195         return have_vec && TCG_TARGET_HAS_not_vec;
2196     case INDEX_op_neg_vec:
2197         return have_vec && TCG_TARGET_HAS_neg_vec;
2198     case INDEX_op_abs_vec:
2199         return have_vec && TCG_TARGET_HAS_abs_vec;
2200     case INDEX_op_andc_vec:
2201         return have_vec && TCG_TARGET_HAS_andc_vec;
2202     case INDEX_op_orc_vec:
2203         return have_vec && TCG_TARGET_HAS_orc_vec;
2204     case INDEX_op_nand_vec:
2205         return have_vec && TCG_TARGET_HAS_nand_vec;
2206     case INDEX_op_nor_vec:
2207         return have_vec && TCG_TARGET_HAS_nor_vec;
2208     case INDEX_op_eqv_vec:
2209         return have_vec && TCG_TARGET_HAS_eqv_vec;
2210     case INDEX_op_mul_vec:
2211         return have_vec && TCG_TARGET_HAS_mul_vec;
2212     case INDEX_op_shli_vec:
2213     case INDEX_op_shri_vec:
2214     case INDEX_op_sari_vec:
2215         return have_vec && TCG_TARGET_HAS_shi_vec;
2216     case INDEX_op_shls_vec:
2217     case INDEX_op_shrs_vec:
2218     case INDEX_op_sars_vec:
2219         return have_vec && TCG_TARGET_HAS_shs_vec;
2220     case INDEX_op_shlv_vec:
2221     case INDEX_op_shrv_vec:
2222     case INDEX_op_sarv_vec:
2223         return have_vec && TCG_TARGET_HAS_shv_vec;
2224     case INDEX_op_rotli_vec:
2225         return have_vec && TCG_TARGET_HAS_roti_vec;
2226     case INDEX_op_rotls_vec:
2227         return have_vec && TCG_TARGET_HAS_rots_vec;
2228     case INDEX_op_rotlv_vec:
2229     case INDEX_op_rotrv_vec:
2230         return have_vec && TCG_TARGET_HAS_rotv_vec;
2231     case INDEX_op_ssadd_vec:
2232     case INDEX_op_usadd_vec:
2233     case INDEX_op_sssub_vec:
2234     case INDEX_op_ussub_vec:
2235         return have_vec && TCG_TARGET_HAS_sat_vec;
2236     case INDEX_op_smin_vec:
2237     case INDEX_op_umin_vec:
2238     case INDEX_op_smax_vec:
2239     case INDEX_op_umax_vec:
2240         return have_vec && TCG_TARGET_HAS_minmax_vec;
2241     case INDEX_op_bitsel_vec:
2242         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2243     case INDEX_op_cmpsel_vec:
2244         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2245 
2246     default:
2247         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2248         return true;
2249     }
2250 }
2251 
2252 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2253 
2254 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2255                           TCGTemp *ret, TCGTemp **args)
2256 {
2257     TCGv_i64 extend_free[MAX_CALL_IARGS];
2258     int n_extend = 0;
2259     TCGOp *op;
2260     int i, n, pi = 0, total_args;
2261 
2262     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2263         init_call_layout(info);
2264         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2265     }
2266 
2267     total_args = info->nr_out + info->nr_in + 2;
2268     op = tcg_op_alloc(INDEX_op_call, total_args);
2269 
2270 #ifdef CONFIG_PLUGIN
2271     /* Flag helpers that may affect guest state */
2272     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2273         tcg_ctx->plugin_insn->calls_helpers = true;
2274     }
2275 #endif
2276 
2277     TCGOP_CALLO(op) = n = info->nr_out;
2278     switch (n) {
2279     case 0:
2280         tcg_debug_assert(ret == NULL);
2281         break;
2282     case 1:
2283         tcg_debug_assert(ret != NULL);
2284         op->args[pi++] = temp_arg(ret);
2285         break;
2286     case 2:
2287     case 4:
2288         tcg_debug_assert(ret != NULL);
2289         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2290         tcg_debug_assert(ret->temp_subindex == 0);
2291         for (i = 0; i < n; ++i) {
2292             op->args[pi++] = temp_arg(ret + i);
2293         }
2294         break;
2295     default:
2296         g_assert_not_reached();
2297     }
2298 
2299     TCGOP_CALLI(op) = n = info->nr_in;
2300     for (i = 0; i < n; i++) {
2301         const TCGCallArgumentLoc *loc = &info->in[i];
2302         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2303 
2304         switch (loc->kind) {
2305         case TCG_CALL_ARG_NORMAL:
2306         case TCG_CALL_ARG_BY_REF:
2307         case TCG_CALL_ARG_BY_REF_N:
2308             op->args[pi++] = temp_arg(ts);
2309             break;
2310 
2311         case TCG_CALL_ARG_EXTEND_U:
2312         case TCG_CALL_ARG_EXTEND_S:
2313             {
2314                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2315                 TCGv_i32 orig = temp_tcgv_i32(ts);
2316 
2317                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2318                     tcg_gen_ext_i32_i64(temp, orig);
2319                 } else {
2320                     tcg_gen_extu_i32_i64(temp, orig);
2321                 }
2322                 op->args[pi++] = tcgv_i64_arg(temp);
2323                 extend_free[n_extend++] = temp;
2324             }
2325             break;
2326 
2327         default:
2328             g_assert_not_reached();
2329         }
2330     }
2331     op->args[pi++] = (uintptr_t)func;
2332     op->args[pi++] = (uintptr_t)info;
2333     tcg_debug_assert(pi == total_args);
2334 
2335     if (tcg_ctx->emit_before_op) {
2336         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2337     } else {
2338         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2339     }
2340 
2341     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2342     for (i = 0; i < n_extend; ++i) {
2343         tcg_temp_free_i64(extend_free[i]);
2344     }
2345 }
2346 
2347 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2348 {
2349     tcg_gen_callN(func, info, ret, NULL);
2350 }
2351 
2352 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2353 {
2354     tcg_gen_callN(func, info, ret, &t1);
2355 }
2356 
2357 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2358                    TCGTemp *t1, TCGTemp *t2)
2359 {
2360     TCGTemp *args[2] = { t1, t2 };
2361     tcg_gen_callN(func, info, ret, args);
2362 }
2363 
2364 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2365                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2366 {
2367     TCGTemp *args[3] = { t1, t2, t3 };
2368     tcg_gen_callN(func, info, ret, args);
2369 }
2370 
2371 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2372                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2373 {
2374     TCGTemp *args[4] = { t1, t2, t3, t4 };
2375     tcg_gen_callN(func, info, ret, args);
2376 }
2377 
2378 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2379                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2380 {
2381     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2382     tcg_gen_callN(func, info, ret, args);
2383 }
2384 
2385 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2386                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2387                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2388 {
2389     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2390     tcg_gen_callN(func, info, ret, args);
2391 }
2392 
2393 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2394                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2395                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2396 {
2397     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2398     tcg_gen_callN(func, info, ret, args);
2399 }
2400 
2401 static void tcg_reg_alloc_start(TCGContext *s)
2402 {
2403     int i, n;
2404 
2405     for (i = 0, n = s->nb_temps; i < n; i++) {
2406         TCGTemp *ts = &s->temps[i];
2407         TCGTempVal val = TEMP_VAL_MEM;
2408 
2409         switch (ts->kind) {
2410         case TEMP_CONST:
2411             val = TEMP_VAL_CONST;
2412             break;
2413         case TEMP_FIXED:
2414             val = TEMP_VAL_REG;
2415             break;
2416         case TEMP_GLOBAL:
2417             break;
2418         case TEMP_EBB:
2419             val = TEMP_VAL_DEAD;
2420             /* fall through */
2421         case TEMP_TB:
2422             ts->mem_allocated = 0;
2423             break;
2424         default:
2425             g_assert_not_reached();
2426         }
2427         ts->val_type = val;
2428     }
2429 
2430     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2431 }
2432 
2433 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2434                                  TCGTemp *ts)
2435 {
2436     int idx = temp_idx(ts);
2437 
2438     switch (ts->kind) {
2439     case TEMP_FIXED:
2440     case TEMP_GLOBAL:
2441         pstrcpy(buf, buf_size, ts->name);
2442         break;
2443     case TEMP_TB:
2444         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2445         break;
2446     case TEMP_EBB:
2447         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2448         break;
2449     case TEMP_CONST:
2450         switch (ts->type) {
2451         case TCG_TYPE_I32:
2452             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2453             break;
2454 #if TCG_TARGET_REG_BITS > 32
2455         case TCG_TYPE_I64:
2456             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2457             break;
2458 #endif
2459         case TCG_TYPE_V64:
2460         case TCG_TYPE_V128:
2461         case TCG_TYPE_V256:
2462             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2463                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2464             break;
2465         default:
2466             g_assert_not_reached();
2467         }
2468         break;
2469     }
2470     return buf;
2471 }
2472 
2473 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2474                              int buf_size, TCGArg arg)
2475 {
2476     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2477 }
2478 
2479 static const char * const cond_name[] =
2480 {
2481     [TCG_COND_NEVER] = "never",
2482     [TCG_COND_ALWAYS] = "always",
2483     [TCG_COND_EQ] = "eq",
2484     [TCG_COND_NE] = "ne",
2485     [TCG_COND_LT] = "lt",
2486     [TCG_COND_GE] = "ge",
2487     [TCG_COND_LE] = "le",
2488     [TCG_COND_GT] = "gt",
2489     [TCG_COND_LTU] = "ltu",
2490     [TCG_COND_GEU] = "geu",
2491     [TCG_COND_LEU] = "leu",
2492     [TCG_COND_GTU] = "gtu",
2493     [TCG_COND_TSTEQ] = "tsteq",
2494     [TCG_COND_TSTNE] = "tstne",
2495 };
2496 
2497 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2498 {
2499     [MO_UB]   = "ub",
2500     [MO_SB]   = "sb",
2501     [MO_LEUW] = "leuw",
2502     [MO_LESW] = "lesw",
2503     [MO_LEUL] = "leul",
2504     [MO_LESL] = "lesl",
2505     [MO_LEUQ] = "leq",
2506     [MO_BEUW] = "beuw",
2507     [MO_BESW] = "besw",
2508     [MO_BEUL] = "beul",
2509     [MO_BESL] = "besl",
2510     [MO_BEUQ] = "beq",
2511     [MO_128 + MO_BE] = "beo",
2512     [MO_128 + MO_LE] = "leo",
2513 };
2514 
2515 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2516     [MO_UNALN >> MO_ASHIFT]    = "un+",
2517     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2518     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2519     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2520     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2521     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2522     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2523     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2524 };
2525 
2526 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2527     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2528     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2529     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2530     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2531     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2532     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2533 };
2534 
2535 static const char bswap_flag_name[][6] = {
2536     [TCG_BSWAP_IZ] = "iz",
2537     [TCG_BSWAP_OZ] = "oz",
2538     [TCG_BSWAP_OS] = "os",
2539     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2540     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2541 };
2542 
2543 #ifdef CONFIG_PLUGIN
2544 static const char * const plugin_from_name[] = {
2545     "from-tb",
2546     "from-insn",
2547     "after-insn",
2548     "after-tb",
2549 };
2550 #endif
2551 
2552 static inline bool tcg_regset_single(TCGRegSet d)
2553 {
2554     return (d & (d - 1)) == 0;
2555 }
2556 
2557 static inline TCGReg tcg_regset_first(TCGRegSet d)
2558 {
2559     if (TCG_TARGET_NB_REGS <= 32) {
2560         return ctz32(d);
2561     } else {
2562         return ctz64(d);
2563     }
2564 }
2565 
2566 /* Return only the number of characters output -- no error return. */
2567 #define ne_fprintf(...) \
2568     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2569 
2570 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2571 {
2572     char buf[128];
2573     TCGOp *op;
2574 
2575     QTAILQ_FOREACH(op, &s->ops, link) {
2576         int i, k, nb_oargs, nb_iargs, nb_cargs;
2577         const TCGOpDef *def;
2578         TCGOpcode c;
2579         int col = 0;
2580 
2581         c = op->opc;
2582         def = &tcg_op_defs[c];
2583 
2584         if (c == INDEX_op_insn_start) {
2585             nb_oargs = 0;
2586             col += ne_fprintf(f, "\n ----");
2587 
2588             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2589                 col += ne_fprintf(f, " %016" PRIx64,
2590                                   tcg_get_insn_start_param(op, i));
2591             }
2592         } else if (c == INDEX_op_call) {
2593             const TCGHelperInfo *info = tcg_call_info(op);
2594             void *func = tcg_call_func(op);
2595 
2596             /* variable number of arguments */
2597             nb_oargs = TCGOP_CALLO(op);
2598             nb_iargs = TCGOP_CALLI(op);
2599             nb_cargs = def->nb_cargs;
2600 
2601             col += ne_fprintf(f, " %s ", def->name);
2602 
2603             /*
2604              * Print the function name from TCGHelperInfo, if available.
2605              * Note that plugins have a template function for the info,
2606              * but the actual function pointer comes from the plugin.
2607              */
2608             if (func == info->func) {
2609                 col += ne_fprintf(f, "%s", info->name);
2610             } else {
2611                 col += ne_fprintf(f, "plugin(%p)", func);
2612             }
2613 
2614             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2615             for (i = 0; i < nb_oargs; i++) {
2616                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2617                                                             op->args[i]));
2618             }
2619             for (i = 0; i < nb_iargs; i++) {
2620                 TCGArg arg = op->args[nb_oargs + i];
2621                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2622                 col += ne_fprintf(f, ",%s", t);
2623             }
2624         } else {
2625             col += ne_fprintf(f, " %s ", def->name);
2626 
2627             nb_oargs = def->nb_oargs;
2628             nb_iargs = def->nb_iargs;
2629             nb_cargs = def->nb_cargs;
2630 
2631             if (def->flags & TCG_OPF_VECTOR) {
2632                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2633                                   8 << TCGOP_VECE(op));
2634             }
2635 
2636             k = 0;
2637             for (i = 0; i < nb_oargs; i++) {
2638                 const char *sep =  k ? "," : "";
2639                 col += ne_fprintf(f, "%s%s", sep,
2640                                   tcg_get_arg_str(s, buf, sizeof(buf),
2641                                                   op->args[k++]));
2642             }
2643             for (i = 0; i < nb_iargs; i++) {
2644                 const char *sep =  k ? "," : "";
2645                 col += ne_fprintf(f, "%s%s", sep,
2646                                   tcg_get_arg_str(s, buf, sizeof(buf),
2647                                                   op->args[k++]));
2648             }
2649             switch (c) {
2650             case INDEX_op_brcond_i32:
2651             case INDEX_op_setcond_i32:
2652             case INDEX_op_negsetcond_i32:
2653             case INDEX_op_movcond_i32:
2654             case INDEX_op_brcond2_i32:
2655             case INDEX_op_setcond2_i32:
2656             case INDEX_op_brcond_i64:
2657             case INDEX_op_setcond_i64:
2658             case INDEX_op_negsetcond_i64:
2659             case INDEX_op_movcond_i64:
2660             case INDEX_op_cmp_vec:
2661             case INDEX_op_cmpsel_vec:
2662                 if (op->args[k] < ARRAY_SIZE(cond_name)
2663                     && cond_name[op->args[k]]) {
2664                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2665                 } else {
2666                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2667                 }
2668                 i = 1;
2669                 break;
2670             case INDEX_op_qemu_ld_a32_i32:
2671             case INDEX_op_qemu_ld_a64_i32:
2672             case INDEX_op_qemu_st_a32_i32:
2673             case INDEX_op_qemu_st_a64_i32:
2674             case INDEX_op_qemu_st8_a32_i32:
2675             case INDEX_op_qemu_st8_a64_i32:
2676             case INDEX_op_qemu_ld_a32_i64:
2677             case INDEX_op_qemu_ld_a64_i64:
2678             case INDEX_op_qemu_st_a32_i64:
2679             case INDEX_op_qemu_st_a64_i64:
2680             case INDEX_op_qemu_ld_a32_i128:
2681             case INDEX_op_qemu_ld_a64_i128:
2682             case INDEX_op_qemu_st_a32_i128:
2683             case INDEX_op_qemu_st_a64_i128:
2684                 {
2685                     const char *s_al, *s_op, *s_at;
2686                     MemOpIdx oi = op->args[k++];
2687                     MemOp mop = get_memop(oi);
2688                     unsigned ix = get_mmuidx(oi);
2689 
2690                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2691                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2692                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2693                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2694 
2695                     /* If all fields are accounted for, print symbolically. */
2696                     if (!mop && s_al && s_op && s_at) {
2697                         col += ne_fprintf(f, ",%s%s%s,%u",
2698                                           s_at, s_al, s_op, ix);
2699                     } else {
2700                         mop = get_memop(oi);
2701                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2702                     }
2703                     i = 1;
2704                 }
2705                 break;
2706             case INDEX_op_bswap16_i32:
2707             case INDEX_op_bswap16_i64:
2708             case INDEX_op_bswap32_i32:
2709             case INDEX_op_bswap32_i64:
2710             case INDEX_op_bswap64_i64:
2711                 {
2712                     TCGArg flags = op->args[k];
2713                     const char *name = NULL;
2714 
2715                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2716                         name = bswap_flag_name[flags];
2717                     }
2718                     if (name) {
2719                         col += ne_fprintf(f, ",%s", name);
2720                     } else {
2721                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2722                     }
2723                     i = k = 1;
2724                 }
2725                 break;
2726 #ifdef CONFIG_PLUGIN
2727             case INDEX_op_plugin_cb:
2728                 {
2729                     TCGArg from = op->args[k++];
2730                     const char *name = NULL;
2731 
2732                     if (from < ARRAY_SIZE(plugin_from_name)) {
2733                         name = plugin_from_name[from];
2734                     }
2735                     if (name) {
2736                         col += ne_fprintf(f, "%s", name);
2737                     } else {
2738                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2739                     }
2740                     i = 1;
2741                 }
2742                 break;
2743 #endif
2744             default:
2745                 i = 0;
2746                 break;
2747             }
2748             switch (c) {
2749             case INDEX_op_set_label:
2750             case INDEX_op_br:
2751             case INDEX_op_brcond_i32:
2752             case INDEX_op_brcond_i64:
2753             case INDEX_op_brcond2_i32:
2754                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2755                                   arg_label(op->args[k])->id);
2756                 i++, k++;
2757                 break;
2758             case INDEX_op_mb:
2759                 {
2760                     TCGBar membar = op->args[k];
2761                     const char *b_op, *m_op;
2762 
2763                     switch (membar & TCG_BAR_SC) {
2764                     case 0:
2765                         b_op = "none";
2766                         break;
2767                     case TCG_BAR_LDAQ:
2768                         b_op = "acq";
2769                         break;
2770                     case TCG_BAR_STRL:
2771                         b_op = "rel";
2772                         break;
2773                     case TCG_BAR_SC:
2774                         b_op = "seq";
2775                         break;
2776                     default:
2777                         g_assert_not_reached();
2778                     }
2779 
2780                     switch (membar & TCG_MO_ALL) {
2781                     case 0:
2782                         m_op = "none";
2783                         break;
2784                     case TCG_MO_LD_LD:
2785                         m_op = "rr";
2786                         break;
2787                     case TCG_MO_LD_ST:
2788                         m_op = "rw";
2789                         break;
2790                     case TCG_MO_ST_LD:
2791                         m_op = "wr";
2792                         break;
2793                     case TCG_MO_ST_ST:
2794                         m_op = "ww";
2795                         break;
2796                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2797                         m_op = "rr+rw";
2798                         break;
2799                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2800                         m_op = "rr+wr";
2801                         break;
2802                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2803                         m_op = "rr+ww";
2804                         break;
2805                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2806                         m_op = "rw+wr";
2807                         break;
2808                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2809                         m_op = "rw+ww";
2810                         break;
2811                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2812                         m_op = "wr+ww";
2813                         break;
2814                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2815                         m_op = "rr+rw+wr";
2816                         break;
2817                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2818                         m_op = "rr+rw+ww";
2819                         break;
2820                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2821                         m_op = "rr+wr+ww";
2822                         break;
2823                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2824                         m_op = "rw+wr+ww";
2825                         break;
2826                     case TCG_MO_ALL:
2827                         m_op = "all";
2828                         break;
2829                     default:
2830                         g_assert_not_reached();
2831                     }
2832 
2833                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2834                     i++, k++;
2835                 }
2836                 break;
2837             default:
2838                 break;
2839             }
2840             for (; i < nb_cargs; i++, k++) {
2841                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2842                                   op->args[k]);
2843             }
2844         }
2845 
2846         if (have_prefs || op->life) {
2847             for (; col < 40; ++col) {
2848                 putc(' ', f);
2849             }
2850         }
2851 
2852         if (op->life) {
2853             unsigned life = op->life;
2854 
2855             if (life & (SYNC_ARG * 3)) {
2856                 ne_fprintf(f, "  sync:");
2857                 for (i = 0; i < 2; ++i) {
2858                     if (life & (SYNC_ARG << i)) {
2859                         ne_fprintf(f, " %d", i);
2860                     }
2861                 }
2862             }
2863             life /= DEAD_ARG;
2864             if (life) {
2865                 ne_fprintf(f, "  dead:");
2866                 for (i = 0; life; ++i, life >>= 1) {
2867                     if (life & 1) {
2868                         ne_fprintf(f, " %d", i);
2869                     }
2870                 }
2871             }
2872         }
2873 
2874         if (have_prefs) {
2875             for (i = 0; i < nb_oargs; ++i) {
2876                 TCGRegSet set = output_pref(op, i);
2877 
2878                 if (i == 0) {
2879                     ne_fprintf(f, "  pref=");
2880                 } else {
2881                     ne_fprintf(f, ",");
2882                 }
2883                 if (set == 0) {
2884                     ne_fprintf(f, "none");
2885                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2886                     ne_fprintf(f, "all");
2887 #ifdef CONFIG_DEBUG_TCG
2888                 } else if (tcg_regset_single(set)) {
2889                     TCGReg reg = tcg_regset_first(set);
2890                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2891 #endif
2892                 } else if (TCG_TARGET_NB_REGS <= 32) {
2893                     ne_fprintf(f, "0x%x", (uint32_t)set);
2894                 } else {
2895                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2896                 }
2897             }
2898         }
2899 
2900         putc('\n', f);
2901     }
2902 }
2903 
2904 /* we give more priority to constraints with less registers */
2905 static int get_constraint_priority(const TCGOpDef *def, int k)
2906 {
2907     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2908     int n = ctpop64(arg_ct->regs);
2909 
2910     /*
2911      * Sort constraints of a single register first, which includes output
2912      * aliases (which must exactly match the input already allocated).
2913      */
2914     if (n == 1 || arg_ct->oalias) {
2915         return INT_MAX;
2916     }
2917 
2918     /*
2919      * Sort register pairs next, first then second immediately after.
2920      * Arbitrarily sort multiple pairs by the index of the first reg;
2921      * there shouldn't be many pairs.
2922      */
2923     switch (arg_ct->pair) {
2924     case 1:
2925     case 3:
2926         return (k + 1) * 2;
2927     case 2:
2928         return (arg_ct->pair_index + 1) * 2 - 1;
2929     }
2930 
2931     /* Finally, sort by decreasing register count. */
2932     assert(n > 1);
2933     return -n;
2934 }
2935 
2936 /* sort from highest priority to lowest */
2937 static void sort_constraints(TCGOpDef *def, int start, int n)
2938 {
2939     int i, j;
2940     TCGArgConstraint *a = def->args_ct;
2941 
2942     for (i = 0; i < n; i++) {
2943         a[start + i].sort_index = start + i;
2944     }
2945     if (n <= 1) {
2946         return;
2947     }
2948     for (i = 0; i < n - 1; i++) {
2949         for (j = i + 1; j < n; j++) {
2950             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2951             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2952             if (p1 < p2) {
2953                 int tmp = a[start + i].sort_index;
2954                 a[start + i].sort_index = a[start + j].sort_index;
2955                 a[start + j].sort_index = tmp;
2956             }
2957         }
2958     }
2959 }
2960 
2961 static void process_op_defs(TCGContext *s)
2962 {
2963     TCGOpcode op;
2964 
2965     for (op = 0; op < NB_OPS; op++) {
2966         TCGOpDef *def = &tcg_op_defs[op];
2967         const TCGTargetOpDef *tdefs;
2968         bool saw_alias_pair = false;
2969         int i, o, i2, o2, nb_args;
2970 
2971         if (def->flags & TCG_OPF_NOT_PRESENT) {
2972             continue;
2973         }
2974 
2975         nb_args = def->nb_iargs + def->nb_oargs;
2976         if (nb_args == 0) {
2977             continue;
2978         }
2979 
2980         /*
2981          * Macro magic should make it impossible, but double-check that
2982          * the array index is in range.  Since the signness of an enum
2983          * is implementation defined, force the result to unsigned.
2984          */
2985         unsigned con_set = tcg_target_op_def(op);
2986         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2987         tdefs = &constraint_sets[con_set];
2988 
2989         for (i = 0; i < nb_args; i++) {
2990             const char *ct_str = tdefs->args_ct_str[i];
2991             bool input_p = i >= def->nb_oargs;
2992 
2993             /* Incomplete TCGTargetOpDef entry. */
2994             tcg_debug_assert(ct_str != NULL);
2995 
2996             switch (*ct_str) {
2997             case '0' ... '9':
2998                 o = *ct_str - '0';
2999                 tcg_debug_assert(input_p);
3000                 tcg_debug_assert(o < def->nb_oargs);
3001                 tcg_debug_assert(def->args_ct[o].regs != 0);
3002                 tcg_debug_assert(!def->args_ct[o].oalias);
3003                 def->args_ct[i] = def->args_ct[o];
3004                 /* The output sets oalias.  */
3005                 def->args_ct[o].oalias = 1;
3006                 def->args_ct[o].alias_index = i;
3007                 /* The input sets ialias. */
3008                 def->args_ct[i].ialias = 1;
3009                 def->args_ct[i].alias_index = o;
3010                 if (def->args_ct[i].pair) {
3011                     saw_alias_pair = true;
3012                 }
3013                 tcg_debug_assert(ct_str[1] == '\0');
3014                 continue;
3015 
3016             case '&':
3017                 tcg_debug_assert(!input_p);
3018                 def->args_ct[i].newreg = true;
3019                 ct_str++;
3020                 break;
3021 
3022             case 'p': /* plus */
3023                 /* Allocate to the register after the previous. */
3024                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
3025                 o = i - 1;
3026                 tcg_debug_assert(!def->args_ct[o].pair);
3027                 tcg_debug_assert(!def->args_ct[o].ct);
3028                 def->args_ct[i] = (TCGArgConstraint){
3029                     .pair = 2,
3030                     .pair_index = o,
3031                     .regs = def->args_ct[o].regs << 1,
3032                     .newreg = def->args_ct[o].newreg,
3033                 };
3034                 def->args_ct[o].pair = 1;
3035                 def->args_ct[o].pair_index = i;
3036                 tcg_debug_assert(ct_str[1] == '\0');
3037                 continue;
3038 
3039             case 'm': /* minus */
3040                 /* Allocate to the register before the previous. */
3041                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
3042                 o = i - 1;
3043                 tcg_debug_assert(!def->args_ct[o].pair);
3044                 tcg_debug_assert(!def->args_ct[o].ct);
3045                 def->args_ct[i] = (TCGArgConstraint){
3046                     .pair = 1,
3047                     .pair_index = o,
3048                     .regs = def->args_ct[o].regs >> 1,
3049                     .newreg = def->args_ct[o].newreg,
3050                 };
3051                 def->args_ct[o].pair = 2;
3052                 def->args_ct[o].pair_index = i;
3053                 tcg_debug_assert(ct_str[1] == '\0');
3054                 continue;
3055             }
3056 
3057             do {
3058                 switch (*ct_str) {
3059                 case 'i':
3060                     def->args_ct[i].ct |= TCG_CT_CONST;
3061                     break;
3062 
3063                 /* Include all of the target-specific constraints. */
3064 
3065 #undef CONST
3066 #define CONST(CASE, MASK) \
3067     case CASE: def->args_ct[i].ct |= MASK; break;
3068 #define REGS(CASE, MASK) \
3069     case CASE: def->args_ct[i].regs |= MASK; break;
3070 
3071 #include "tcg-target-con-str.h"
3072 
3073 #undef REGS
3074 #undef CONST
3075                 default:
3076                 case '0' ... '9':
3077                 case '&':
3078                 case 'p':
3079                 case 'm':
3080                     /* Typo in TCGTargetOpDef constraint. */
3081                     g_assert_not_reached();
3082                 }
3083             } while (*++ct_str != '\0');
3084         }
3085 
3086         /* TCGTargetOpDef entry with too much information? */
3087         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
3088 
3089         /*
3090          * Fix up output pairs that are aliased with inputs.
3091          * When we created the alias, we copied pair from the output.
3092          * There are three cases:
3093          *    (1a) Pairs of inputs alias pairs of outputs.
3094          *    (1b) One input aliases the first of a pair of outputs.
3095          *    (2)  One input aliases the second of a pair of outputs.
3096          *
3097          * Case 1a is handled by making sure that the pair_index'es are
3098          * properly updated so that they appear the same as a pair of inputs.
3099          *
3100          * Case 1b is handled by setting the pair_index of the input to
3101          * itself, simply so it doesn't point to an unrelated argument.
3102          * Since we don't encounter the "second" during the input allocation
3103          * phase, nothing happens with the second half of the input pair.
3104          *
3105          * Case 2 is handled by setting the second input to pair=3, the
3106          * first output to pair=3, and the pair_index'es to match.
3107          */
3108         if (saw_alias_pair) {
3109             for (i = def->nb_oargs; i < nb_args; i++) {
3110                 /*
3111                  * Since [0-9pm] must be alone in the constraint string,
3112                  * the only way they can both be set is if the pair comes
3113                  * from the output alias.
3114                  */
3115                 if (!def->args_ct[i].ialias) {
3116                     continue;
3117                 }
3118                 switch (def->args_ct[i].pair) {
3119                 case 0:
3120                     break;
3121                 case 1:
3122                     o = def->args_ct[i].alias_index;
3123                     o2 = def->args_ct[o].pair_index;
3124                     tcg_debug_assert(def->args_ct[o].pair == 1);
3125                     tcg_debug_assert(def->args_ct[o2].pair == 2);
3126                     if (def->args_ct[o2].oalias) {
3127                         /* Case 1a */
3128                         i2 = def->args_ct[o2].alias_index;
3129                         tcg_debug_assert(def->args_ct[i2].pair == 2);
3130                         def->args_ct[i2].pair_index = i;
3131                         def->args_ct[i].pair_index = i2;
3132                     } else {
3133                         /* Case 1b */
3134                         def->args_ct[i].pair_index = i;
3135                     }
3136                     break;
3137                 case 2:
3138                     o = def->args_ct[i].alias_index;
3139                     o2 = def->args_ct[o].pair_index;
3140                     tcg_debug_assert(def->args_ct[o].pair == 2);
3141                     tcg_debug_assert(def->args_ct[o2].pair == 1);
3142                     if (def->args_ct[o2].oalias) {
3143                         /* Case 1a */
3144                         i2 = def->args_ct[o2].alias_index;
3145                         tcg_debug_assert(def->args_ct[i2].pair == 1);
3146                         def->args_ct[i2].pair_index = i;
3147                         def->args_ct[i].pair_index = i2;
3148                     } else {
3149                         /* Case 2 */
3150                         def->args_ct[i].pair = 3;
3151                         def->args_ct[o2].pair = 3;
3152                         def->args_ct[i].pair_index = o2;
3153                         def->args_ct[o2].pair_index = i;
3154                     }
3155                     break;
3156                 default:
3157                     g_assert_not_reached();
3158                 }
3159             }
3160         }
3161 
3162         /* sort the constraints (XXX: this is just an heuristic) */
3163         sort_constraints(def, 0, def->nb_oargs);
3164         sort_constraints(def, def->nb_oargs, def->nb_iargs);
3165     }
3166 }
3167 
3168 static void remove_label_use(TCGOp *op, int idx)
3169 {
3170     TCGLabel *label = arg_label(op->args[idx]);
3171     TCGLabelUse *use;
3172 
3173     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3174         if (use->op == op) {
3175             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3176             return;
3177         }
3178     }
3179     g_assert_not_reached();
3180 }
3181 
3182 void tcg_op_remove(TCGContext *s, TCGOp *op)
3183 {
3184     switch (op->opc) {
3185     case INDEX_op_br:
3186         remove_label_use(op, 0);
3187         break;
3188     case INDEX_op_brcond_i32:
3189     case INDEX_op_brcond_i64:
3190         remove_label_use(op, 3);
3191         break;
3192     case INDEX_op_brcond2_i32:
3193         remove_label_use(op, 5);
3194         break;
3195     default:
3196         break;
3197     }
3198 
3199     QTAILQ_REMOVE(&s->ops, op, link);
3200     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3201     s->nb_ops--;
3202 }
3203 
3204 void tcg_remove_ops_after(TCGOp *op)
3205 {
3206     TCGContext *s = tcg_ctx;
3207 
3208     while (true) {
3209         TCGOp *last = tcg_last_op();
3210         if (last == op) {
3211             return;
3212         }
3213         tcg_op_remove(s, last);
3214     }
3215 }
3216 
3217 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3218 {
3219     TCGContext *s = tcg_ctx;
3220     TCGOp *op = NULL;
3221 
3222     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3223         QTAILQ_FOREACH(op, &s->free_ops, link) {
3224             if (nargs <= op->nargs) {
3225                 QTAILQ_REMOVE(&s->free_ops, op, link);
3226                 nargs = op->nargs;
3227                 goto found;
3228             }
3229         }
3230     }
3231 
3232     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3233     nargs = MAX(4, nargs);
3234     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3235 
3236  found:
3237     memset(op, 0, offsetof(TCGOp, link));
3238     op->opc = opc;
3239     op->nargs = nargs;
3240 
3241     /* Check for bitfield overflow. */
3242     tcg_debug_assert(op->nargs == nargs);
3243 
3244     s->nb_ops++;
3245     return op;
3246 }
3247 
3248 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3249 {
3250     TCGOp *op = tcg_op_alloc(opc, nargs);
3251 
3252     if (tcg_ctx->emit_before_op) {
3253         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3254     } else {
3255         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3256     }
3257     return op;
3258 }
3259 
3260 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3261                             TCGOpcode opc, unsigned nargs)
3262 {
3263     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3264     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3265     return new_op;
3266 }
3267 
3268 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3269                            TCGOpcode opc, unsigned nargs)
3270 {
3271     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3272     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3273     return new_op;
3274 }
3275 
3276 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3277 {
3278     TCGLabelUse *u;
3279 
3280     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3281         TCGOp *op = u->op;
3282         switch (op->opc) {
3283         case INDEX_op_br:
3284             op->args[0] = label_arg(to);
3285             break;
3286         case INDEX_op_brcond_i32:
3287         case INDEX_op_brcond_i64:
3288             op->args[3] = label_arg(to);
3289             break;
3290         case INDEX_op_brcond2_i32:
3291             op->args[5] = label_arg(to);
3292             break;
3293         default:
3294             g_assert_not_reached();
3295         }
3296     }
3297 
3298     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3299 }
3300 
3301 /* Reachable analysis : remove unreachable code.  */
3302 static void __attribute__((noinline))
3303 reachable_code_pass(TCGContext *s)
3304 {
3305     TCGOp *op, *op_next, *op_prev;
3306     bool dead = false;
3307 
3308     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3309         bool remove = dead;
3310         TCGLabel *label;
3311 
3312         switch (op->opc) {
3313         case INDEX_op_set_label:
3314             label = arg_label(op->args[0]);
3315 
3316             /*
3317              * Note that the first op in the TB is always a load,
3318              * so there is always something before a label.
3319              */
3320             op_prev = QTAILQ_PREV(op, link);
3321 
3322             /*
3323              * If we find two sequential labels, move all branches to
3324              * reference the second label and remove the first label.
3325              * Do this before branch to next optimization, so that the
3326              * middle label is out of the way.
3327              */
3328             if (op_prev->opc == INDEX_op_set_label) {
3329                 move_label_uses(label, arg_label(op_prev->args[0]));
3330                 tcg_op_remove(s, op_prev);
3331                 op_prev = QTAILQ_PREV(op, link);
3332             }
3333 
3334             /*
3335              * Optimization can fold conditional branches to unconditional.
3336              * If we find a label which is preceded by an unconditional
3337              * branch to next, remove the branch.  We couldn't do this when
3338              * processing the branch because any dead code between the branch
3339              * and label had not yet been removed.
3340              */
3341             if (op_prev->opc == INDEX_op_br &&
3342                 label == arg_label(op_prev->args[0])) {
3343                 tcg_op_remove(s, op_prev);
3344                 /* Fall through means insns become live again.  */
3345                 dead = false;
3346             }
3347 
3348             if (QSIMPLEQ_EMPTY(&label->branches)) {
3349                 /*
3350                  * While there is an occasional backward branch, virtually
3351                  * all branches generated by the translators are forward.
3352                  * Which means that generally we will have already removed
3353                  * all references to the label that will be, and there is
3354                  * little to be gained by iterating.
3355                  */
3356                 remove = true;
3357             } else {
3358                 /* Once we see a label, insns become live again.  */
3359                 dead = false;
3360                 remove = false;
3361             }
3362             break;
3363 
3364         case INDEX_op_br:
3365         case INDEX_op_exit_tb:
3366         case INDEX_op_goto_ptr:
3367             /* Unconditional branches; everything following is dead.  */
3368             dead = true;
3369             break;
3370 
3371         case INDEX_op_call:
3372             /* Notice noreturn helper calls, raising exceptions.  */
3373             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3374                 dead = true;
3375             }
3376             break;
3377 
3378         case INDEX_op_insn_start:
3379             /* Never remove -- we need to keep these for unwind.  */
3380             remove = false;
3381             break;
3382 
3383         default:
3384             break;
3385         }
3386 
3387         if (remove) {
3388             tcg_op_remove(s, op);
3389         }
3390     }
3391 }
3392 
3393 #define TS_DEAD  1
3394 #define TS_MEM   2
3395 
3396 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3397 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3398 
3399 /* For liveness_pass_1, the register preferences for a given temp.  */
3400 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3401 {
3402     return ts->state_ptr;
3403 }
3404 
3405 /* For liveness_pass_1, reset the preferences for a given temp to the
3406  * maximal regset for its type.
3407  */
3408 static inline void la_reset_pref(TCGTemp *ts)
3409 {
3410     *la_temp_pref(ts)
3411         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3412 }
3413 
3414 /* liveness analysis: end of function: all temps are dead, and globals
3415    should be in memory. */
3416 static void la_func_end(TCGContext *s, int ng, int nt)
3417 {
3418     int i;
3419 
3420     for (i = 0; i < ng; ++i) {
3421         s->temps[i].state = TS_DEAD | TS_MEM;
3422         la_reset_pref(&s->temps[i]);
3423     }
3424     for (i = ng; i < nt; ++i) {
3425         s->temps[i].state = TS_DEAD;
3426         la_reset_pref(&s->temps[i]);
3427     }
3428 }
3429 
3430 /* liveness analysis: end of basic block: all temps are dead, globals
3431    and local temps should be in memory. */
3432 static void la_bb_end(TCGContext *s, int ng, int nt)
3433 {
3434     int i;
3435 
3436     for (i = 0; i < nt; ++i) {
3437         TCGTemp *ts = &s->temps[i];
3438         int state;
3439 
3440         switch (ts->kind) {
3441         case TEMP_FIXED:
3442         case TEMP_GLOBAL:
3443         case TEMP_TB:
3444             state = TS_DEAD | TS_MEM;
3445             break;
3446         case TEMP_EBB:
3447         case TEMP_CONST:
3448             state = TS_DEAD;
3449             break;
3450         default:
3451             g_assert_not_reached();
3452         }
3453         ts->state = state;
3454         la_reset_pref(ts);
3455     }
3456 }
3457 
3458 /* liveness analysis: sync globals back to memory.  */
3459 static void la_global_sync(TCGContext *s, int ng)
3460 {
3461     int i;
3462 
3463     for (i = 0; i < ng; ++i) {
3464         int state = s->temps[i].state;
3465         s->temps[i].state = state | TS_MEM;
3466         if (state == TS_DEAD) {
3467             /* If the global was previously dead, reset prefs.  */
3468             la_reset_pref(&s->temps[i]);
3469         }
3470     }
3471 }
3472 
3473 /*
3474  * liveness analysis: conditional branch: all temps are dead unless
3475  * explicitly live-across-conditional-branch, globals and local temps
3476  * should be synced.
3477  */
3478 static void la_bb_sync(TCGContext *s, int ng, int nt)
3479 {
3480     la_global_sync(s, ng);
3481 
3482     for (int i = ng; i < nt; ++i) {
3483         TCGTemp *ts = &s->temps[i];
3484         int state;
3485 
3486         switch (ts->kind) {
3487         case TEMP_TB:
3488             state = ts->state;
3489             ts->state = state | TS_MEM;
3490             if (state != TS_DEAD) {
3491                 continue;
3492             }
3493             break;
3494         case TEMP_EBB:
3495         case TEMP_CONST:
3496             continue;
3497         default:
3498             g_assert_not_reached();
3499         }
3500         la_reset_pref(&s->temps[i]);
3501     }
3502 }
3503 
3504 /* liveness analysis: sync globals back to memory and kill.  */
3505 static void la_global_kill(TCGContext *s, int ng)
3506 {
3507     int i;
3508 
3509     for (i = 0; i < ng; i++) {
3510         s->temps[i].state = TS_DEAD | TS_MEM;
3511         la_reset_pref(&s->temps[i]);
3512     }
3513 }
3514 
3515 /* liveness analysis: note live globals crossing calls.  */
3516 static void la_cross_call(TCGContext *s, int nt)
3517 {
3518     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3519     int i;
3520 
3521     for (i = 0; i < nt; i++) {
3522         TCGTemp *ts = &s->temps[i];
3523         if (!(ts->state & TS_DEAD)) {
3524             TCGRegSet *pset = la_temp_pref(ts);
3525             TCGRegSet set = *pset;
3526 
3527             set &= mask;
3528             /* If the combination is not possible, restart.  */
3529             if (set == 0) {
3530                 set = tcg_target_available_regs[ts->type] & mask;
3531             }
3532             *pset = set;
3533         }
3534     }
3535 }
3536 
3537 /*
3538  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3539  * to TEMP_EBB, if possible.
3540  */
3541 static void __attribute__((noinline))
3542 liveness_pass_0(TCGContext *s)
3543 {
3544     void * const multiple_ebb = (void *)(uintptr_t)-1;
3545     int nb_temps = s->nb_temps;
3546     TCGOp *op, *ebb;
3547 
3548     for (int i = s->nb_globals; i < nb_temps; ++i) {
3549         s->temps[i].state_ptr = NULL;
3550     }
3551 
3552     /*
3553      * Represent each EBB by the op at which it begins.  In the case of
3554      * the first EBB, this is the first op, otherwise it is a label.
3555      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3556      * within a single EBB, else MULTIPLE_EBB.
3557      */
3558     ebb = QTAILQ_FIRST(&s->ops);
3559     QTAILQ_FOREACH(op, &s->ops, link) {
3560         const TCGOpDef *def;
3561         int nb_oargs, nb_iargs;
3562 
3563         switch (op->opc) {
3564         case INDEX_op_set_label:
3565             ebb = op;
3566             continue;
3567         case INDEX_op_discard:
3568             continue;
3569         case INDEX_op_call:
3570             nb_oargs = TCGOP_CALLO(op);
3571             nb_iargs = TCGOP_CALLI(op);
3572             break;
3573         default:
3574             def = &tcg_op_defs[op->opc];
3575             nb_oargs = def->nb_oargs;
3576             nb_iargs = def->nb_iargs;
3577             break;
3578         }
3579 
3580         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3581             TCGTemp *ts = arg_temp(op->args[i]);
3582 
3583             if (ts->kind != TEMP_TB) {
3584                 continue;
3585             }
3586             if (ts->state_ptr == NULL) {
3587                 ts->state_ptr = ebb;
3588             } else if (ts->state_ptr != ebb) {
3589                 ts->state_ptr = multiple_ebb;
3590             }
3591         }
3592     }
3593 
3594     /*
3595      * For TEMP_TB that turned out not to be used beyond one EBB,
3596      * reduce the liveness to TEMP_EBB.
3597      */
3598     for (int i = s->nb_globals; i < nb_temps; ++i) {
3599         TCGTemp *ts = &s->temps[i];
3600         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3601             ts->kind = TEMP_EBB;
3602         }
3603     }
3604 }
3605 
3606 /* Liveness analysis : update the opc_arg_life array to tell if a
3607    given input arguments is dead. Instructions updating dead
3608    temporaries are removed. */
3609 static void __attribute__((noinline))
3610 liveness_pass_1(TCGContext *s)
3611 {
3612     int nb_globals = s->nb_globals;
3613     int nb_temps = s->nb_temps;
3614     TCGOp *op, *op_prev;
3615     TCGRegSet *prefs;
3616     int i;
3617 
3618     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3619     for (i = 0; i < nb_temps; ++i) {
3620         s->temps[i].state_ptr = prefs + i;
3621     }
3622 
3623     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3624     la_func_end(s, nb_globals, nb_temps);
3625 
3626     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3627         int nb_iargs, nb_oargs;
3628         TCGOpcode opc_new, opc_new2;
3629         bool have_opc_new2;
3630         TCGLifeData arg_life = 0;
3631         TCGTemp *ts;
3632         TCGOpcode opc = op->opc;
3633         const TCGOpDef *def = &tcg_op_defs[opc];
3634 
3635         switch (opc) {
3636         case INDEX_op_call:
3637             {
3638                 const TCGHelperInfo *info = tcg_call_info(op);
3639                 int call_flags = tcg_call_flags(op);
3640 
3641                 nb_oargs = TCGOP_CALLO(op);
3642                 nb_iargs = TCGOP_CALLI(op);
3643 
3644                 /* pure functions can be removed if their result is unused */
3645                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3646                     for (i = 0; i < nb_oargs; i++) {
3647                         ts = arg_temp(op->args[i]);
3648                         if (ts->state != TS_DEAD) {
3649                             goto do_not_remove_call;
3650                         }
3651                     }
3652                     goto do_remove;
3653                 }
3654             do_not_remove_call:
3655 
3656                 /* Output args are dead.  */
3657                 for (i = 0; i < nb_oargs; i++) {
3658                     ts = arg_temp(op->args[i]);
3659                     if (ts->state & TS_DEAD) {
3660                         arg_life |= DEAD_ARG << i;
3661                     }
3662                     if (ts->state & TS_MEM) {
3663                         arg_life |= SYNC_ARG << i;
3664                     }
3665                     ts->state = TS_DEAD;
3666                     la_reset_pref(ts);
3667                 }
3668 
3669                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3670                 memset(op->output_pref, 0, sizeof(op->output_pref));
3671 
3672                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3673                                     TCG_CALL_NO_READ_GLOBALS))) {
3674                     la_global_kill(s, nb_globals);
3675                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3676                     la_global_sync(s, nb_globals);
3677                 }
3678 
3679                 /* Record arguments that die in this helper.  */
3680                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3681                     ts = arg_temp(op->args[i]);
3682                     if (ts->state & TS_DEAD) {
3683                         arg_life |= DEAD_ARG << i;
3684                     }
3685                 }
3686 
3687                 /* For all live registers, remove call-clobbered prefs.  */
3688                 la_cross_call(s, nb_temps);
3689 
3690                 /*
3691                  * Input arguments are live for preceding opcodes.
3692                  *
3693                  * For those arguments that die, and will be allocated in
3694                  * registers, clear the register set for that arg, to be
3695                  * filled in below.  For args that will be on the stack,
3696                  * reset to any available reg.  Process arguments in reverse
3697                  * order so that if a temp is used more than once, the stack
3698                  * reset to max happens before the register reset to 0.
3699                  */
3700                 for (i = nb_iargs - 1; i >= 0; i--) {
3701                     const TCGCallArgumentLoc *loc = &info->in[i];
3702                     ts = arg_temp(op->args[nb_oargs + i]);
3703 
3704                     if (ts->state & TS_DEAD) {
3705                         switch (loc->kind) {
3706                         case TCG_CALL_ARG_NORMAL:
3707                         case TCG_CALL_ARG_EXTEND_U:
3708                         case TCG_CALL_ARG_EXTEND_S:
3709                             if (arg_slot_reg_p(loc->arg_slot)) {
3710                                 *la_temp_pref(ts) = 0;
3711                                 break;
3712                             }
3713                             /* fall through */
3714                         default:
3715                             *la_temp_pref(ts) =
3716                                 tcg_target_available_regs[ts->type];
3717                             break;
3718                         }
3719                         ts->state &= ~TS_DEAD;
3720                     }
3721                 }
3722 
3723                 /*
3724                  * For each input argument, add its input register to prefs.
3725                  * If a temp is used once, this produces a single set bit;
3726                  * if a temp is used multiple times, this produces a set.
3727                  */
3728                 for (i = 0; i < nb_iargs; i++) {
3729                     const TCGCallArgumentLoc *loc = &info->in[i];
3730                     ts = arg_temp(op->args[nb_oargs + i]);
3731 
3732                     switch (loc->kind) {
3733                     case TCG_CALL_ARG_NORMAL:
3734                     case TCG_CALL_ARG_EXTEND_U:
3735                     case TCG_CALL_ARG_EXTEND_S:
3736                         if (arg_slot_reg_p(loc->arg_slot)) {
3737                             tcg_regset_set_reg(*la_temp_pref(ts),
3738                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3739                         }
3740                         break;
3741                     default:
3742                         break;
3743                     }
3744                 }
3745             }
3746             break;
3747         case INDEX_op_insn_start:
3748             break;
3749         case INDEX_op_discard:
3750             /* mark the temporary as dead */
3751             ts = arg_temp(op->args[0]);
3752             ts->state = TS_DEAD;
3753             la_reset_pref(ts);
3754             break;
3755 
3756         case INDEX_op_add2_i32:
3757             opc_new = INDEX_op_add_i32;
3758             goto do_addsub2;
3759         case INDEX_op_sub2_i32:
3760             opc_new = INDEX_op_sub_i32;
3761             goto do_addsub2;
3762         case INDEX_op_add2_i64:
3763             opc_new = INDEX_op_add_i64;
3764             goto do_addsub2;
3765         case INDEX_op_sub2_i64:
3766             opc_new = INDEX_op_sub_i64;
3767         do_addsub2:
3768             nb_iargs = 4;
3769             nb_oargs = 2;
3770             /* Test if the high part of the operation is dead, but not
3771                the low part.  The result can be optimized to a simple
3772                add or sub.  This happens often for x86_64 guest when the
3773                cpu mode is set to 32 bit.  */
3774             if (arg_temp(op->args[1])->state == TS_DEAD) {
3775                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3776                     goto do_remove;
3777                 }
3778                 /* Replace the opcode and adjust the args in place,
3779                    leaving 3 unused args at the end.  */
3780                 op->opc = opc = opc_new;
3781                 op->args[1] = op->args[2];
3782                 op->args[2] = op->args[4];
3783                 /* Fall through and mark the single-word operation live.  */
3784                 nb_iargs = 2;
3785                 nb_oargs = 1;
3786             }
3787             goto do_not_remove;
3788 
3789         case INDEX_op_mulu2_i32:
3790             opc_new = INDEX_op_mul_i32;
3791             opc_new2 = INDEX_op_muluh_i32;
3792             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3793             goto do_mul2;
3794         case INDEX_op_muls2_i32:
3795             opc_new = INDEX_op_mul_i32;
3796             opc_new2 = INDEX_op_mulsh_i32;
3797             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3798             goto do_mul2;
3799         case INDEX_op_mulu2_i64:
3800             opc_new = INDEX_op_mul_i64;
3801             opc_new2 = INDEX_op_muluh_i64;
3802             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3803             goto do_mul2;
3804         case INDEX_op_muls2_i64:
3805             opc_new = INDEX_op_mul_i64;
3806             opc_new2 = INDEX_op_mulsh_i64;
3807             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3808             goto do_mul2;
3809         do_mul2:
3810             nb_iargs = 2;
3811             nb_oargs = 2;
3812             if (arg_temp(op->args[1])->state == TS_DEAD) {
3813                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3814                     /* Both parts of the operation are dead.  */
3815                     goto do_remove;
3816                 }
3817                 /* The high part of the operation is dead; generate the low. */
3818                 op->opc = opc = opc_new;
3819                 op->args[1] = op->args[2];
3820                 op->args[2] = op->args[3];
3821             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3822                 /* The low part of the operation is dead; generate the high. */
3823                 op->opc = opc = opc_new2;
3824                 op->args[0] = op->args[1];
3825                 op->args[1] = op->args[2];
3826                 op->args[2] = op->args[3];
3827             } else {
3828                 goto do_not_remove;
3829             }
3830             /* Mark the single-word operation live.  */
3831             nb_oargs = 1;
3832             goto do_not_remove;
3833 
3834         default:
3835             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3836             nb_iargs = def->nb_iargs;
3837             nb_oargs = def->nb_oargs;
3838 
3839             /* Test if the operation can be removed because all
3840                its outputs are dead. We assume that nb_oargs == 0
3841                implies side effects */
3842             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3843                 for (i = 0; i < nb_oargs; i++) {
3844                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3845                         goto do_not_remove;
3846                     }
3847                 }
3848                 goto do_remove;
3849             }
3850             goto do_not_remove;
3851 
3852         do_remove:
3853             tcg_op_remove(s, op);
3854             break;
3855 
3856         do_not_remove:
3857             for (i = 0; i < nb_oargs; i++) {
3858                 ts = arg_temp(op->args[i]);
3859 
3860                 /* Remember the preference of the uses that followed.  */
3861                 if (i < ARRAY_SIZE(op->output_pref)) {
3862                     op->output_pref[i] = *la_temp_pref(ts);
3863                 }
3864 
3865                 /* Output args are dead.  */
3866                 if (ts->state & TS_DEAD) {
3867                     arg_life |= DEAD_ARG << i;
3868                 }
3869                 if (ts->state & TS_MEM) {
3870                     arg_life |= SYNC_ARG << i;
3871                 }
3872                 ts->state = TS_DEAD;
3873                 la_reset_pref(ts);
3874             }
3875 
3876             /* If end of basic block, update.  */
3877             if (def->flags & TCG_OPF_BB_EXIT) {
3878                 la_func_end(s, nb_globals, nb_temps);
3879             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3880                 la_bb_sync(s, nb_globals, nb_temps);
3881             } else if (def->flags & TCG_OPF_BB_END) {
3882                 la_bb_end(s, nb_globals, nb_temps);
3883             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3884                 la_global_sync(s, nb_globals);
3885                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3886                     la_cross_call(s, nb_temps);
3887                 }
3888             }
3889 
3890             /* Record arguments that die in this opcode.  */
3891             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3892                 ts = arg_temp(op->args[i]);
3893                 if (ts->state & TS_DEAD) {
3894                     arg_life |= DEAD_ARG << i;
3895                 }
3896             }
3897 
3898             /* Input arguments are live for preceding opcodes.  */
3899             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3900                 ts = arg_temp(op->args[i]);
3901                 if (ts->state & TS_DEAD) {
3902                     /* For operands that were dead, initially allow
3903                        all regs for the type.  */
3904                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3905                     ts->state &= ~TS_DEAD;
3906                 }
3907             }
3908 
3909             /* Incorporate constraints for this operand.  */
3910             switch (opc) {
3911             case INDEX_op_mov_i32:
3912             case INDEX_op_mov_i64:
3913                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3914                    have proper constraints.  That said, special case
3915                    moves to propagate preferences backward.  */
3916                 if (IS_DEAD_ARG(1)) {
3917                     *la_temp_pref(arg_temp(op->args[0]))
3918                         = *la_temp_pref(arg_temp(op->args[1]));
3919                 }
3920                 break;
3921 
3922             default:
3923                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3924                     const TCGArgConstraint *ct = &def->args_ct[i];
3925                     TCGRegSet set, *pset;
3926 
3927                     ts = arg_temp(op->args[i]);
3928                     pset = la_temp_pref(ts);
3929                     set = *pset;
3930 
3931                     set &= ct->regs;
3932                     if (ct->ialias) {
3933                         set &= output_pref(op, ct->alias_index);
3934                     }
3935                     /* If the combination is not possible, restart.  */
3936                     if (set == 0) {
3937                         set = ct->regs;
3938                     }
3939                     *pset = set;
3940                 }
3941                 break;
3942             }
3943             break;
3944         }
3945         op->life = arg_life;
3946     }
3947 }
3948 
3949 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3950 static bool __attribute__((noinline))
3951 liveness_pass_2(TCGContext *s)
3952 {
3953     int nb_globals = s->nb_globals;
3954     int nb_temps, i;
3955     bool changes = false;
3956     TCGOp *op, *op_next;
3957 
3958     /* Create a temporary for each indirect global.  */
3959     for (i = 0; i < nb_globals; ++i) {
3960         TCGTemp *its = &s->temps[i];
3961         if (its->indirect_reg) {
3962             TCGTemp *dts = tcg_temp_alloc(s);
3963             dts->type = its->type;
3964             dts->base_type = its->base_type;
3965             dts->temp_subindex = its->temp_subindex;
3966             dts->kind = TEMP_EBB;
3967             its->state_ptr = dts;
3968         } else {
3969             its->state_ptr = NULL;
3970         }
3971         /* All globals begin dead.  */
3972         its->state = TS_DEAD;
3973     }
3974     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3975         TCGTemp *its = &s->temps[i];
3976         its->state_ptr = NULL;
3977         its->state = TS_DEAD;
3978     }
3979 
3980     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3981         TCGOpcode opc = op->opc;
3982         const TCGOpDef *def = &tcg_op_defs[opc];
3983         TCGLifeData arg_life = op->life;
3984         int nb_iargs, nb_oargs, call_flags;
3985         TCGTemp *arg_ts, *dir_ts;
3986 
3987         if (opc == INDEX_op_call) {
3988             nb_oargs = TCGOP_CALLO(op);
3989             nb_iargs = TCGOP_CALLI(op);
3990             call_flags = tcg_call_flags(op);
3991         } else {
3992             nb_iargs = def->nb_iargs;
3993             nb_oargs = def->nb_oargs;
3994 
3995             /* Set flags similar to how calls require.  */
3996             if (def->flags & TCG_OPF_COND_BRANCH) {
3997                 /* Like reading globals: sync_globals */
3998                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3999             } else if (def->flags & TCG_OPF_BB_END) {
4000                 /* Like writing globals: save_globals */
4001                 call_flags = 0;
4002             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4003                 /* Like reading globals: sync_globals */
4004                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4005             } else {
4006                 /* No effect on globals.  */
4007                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4008                               TCG_CALL_NO_WRITE_GLOBALS);
4009             }
4010         }
4011 
4012         /* Make sure that input arguments are available.  */
4013         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4014             arg_ts = arg_temp(op->args[i]);
4015             dir_ts = arg_ts->state_ptr;
4016             if (dir_ts && arg_ts->state == TS_DEAD) {
4017                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4018                                   ? INDEX_op_ld_i32
4019                                   : INDEX_op_ld_i64);
4020                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
4021 
4022                 lop->args[0] = temp_arg(dir_ts);
4023                 lop->args[1] = temp_arg(arg_ts->mem_base);
4024                 lop->args[2] = arg_ts->mem_offset;
4025 
4026                 /* Loaded, but synced with memory.  */
4027                 arg_ts->state = TS_MEM;
4028             }
4029         }
4030 
4031         /* Perform input replacement, and mark inputs that became dead.
4032            No action is required except keeping temp_state up to date
4033            so that we reload when needed.  */
4034         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4035             arg_ts = arg_temp(op->args[i]);
4036             dir_ts = arg_ts->state_ptr;
4037             if (dir_ts) {
4038                 op->args[i] = temp_arg(dir_ts);
4039                 changes = true;
4040                 if (IS_DEAD_ARG(i)) {
4041                     arg_ts->state = TS_DEAD;
4042                 }
4043             }
4044         }
4045 
4046         /* Liveness analysis should ensure that the following are
4047            all correct, for call sites and basic block end points.  */
4048         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4049             /* Nothing to do */
4050         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4051             for (i = 0; i < nb_globals; ++i) {
4052                 /* Liveness should see that globals are synced back,
4053                    that is, either TS_DEAD or TS_MEM.  */
4054                 arg_ts = &s->temps[i];
4055                 tcg_debug_assert(arg_ts->state_ptr == 0
4056                                  || arg_ts->state != 0);
4057             }
4058         } else {
4059             for (i = 0; i < nb_globals; ++i) {
4060                 /* Liveness should see that globals are saved back,
4061                    that is, TS_DEAD, waiting to be reloaded.  */
4062                 arg_ts = &s->temps[i];
4063                 tcg_debug_assert(arg_ts->state_ptr == 0
4064                                  || arg_ts->state == TS_DEAD);
4065             }
4066         }
4067 
4068         /* Outputs become available.  */
4069         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
4070             arg_ts = arg_temp(op->args[0]);
4071             dir_ts = arg_ts->state_ptr;
4072             if (dir_ts) {
4073                 op->args[0] = temp_arg(dir_ts);
4074                 changes = true;
4075 
4076                 /* The output is now live and modified.  */
4077                 arg_ts->state = 0;
4078 
4079                 if (NEED_SYNC_ARG(0)) {
4080                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4081                                       ? INDEX_op_st_i32
4082                                       : INDEX_op_st_i64);
4083                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4084                     TCGTemp *out_ts = dir_ts;
4085 
4086                     if (IS_DEAD_ARG(0)) {
4087                         out_ts = arg_temp(op->args[1]);
4088                         arg_ts->state = TS_DEAD;
4089                         tcg_op_remove(s, op);
4090                     } else {
4091                         arg_ts->state = TS_MEM;
4092                     }
4093 
4094                     sop->args[0] = temp_arg(out_ts);
4095                     sop->args[1] = temp_arg(arg_ts->mem_base);
4096                     sop->args[2] = arg_ts->mem_offset;
4097                 } else {
4098                     tcg_debug_assert(!IS_DEAD_ARG(0));
4099                 }
4100             }
4101         } else {
4102             for (i = 0; i < nb_oargs; i++) {
4103                 arg_ts = arg_temp(op->args[i]);
4104                 dir_ts = arg_ts->state_ptr;
4105                 if (!dir_ts) {
4106                     continue;
4107                 }
4108                 op->args[i] = temp_arg(dir_ts);
4109                 changes = true;
4110 
4111                 /* The output is now live and modified.  */
4112                 arg_ts->state = 0;
4113 
4114                 /* Sync outputs upon their last write.  */
4115                 if (NEED_SYNC_ARG(i)) {
4116                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4117                                       ? INDEX_op_st_i32
4118                                       : INDEX_op_st_i64);
4119                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4120 
4121                     sop->args[0] = temp_arg(dir_ts);
4122                     sop->args[1] = temp_arg(arg_ts->mem_base);
4123                     sop->args[2] = arg_ts->mem_offset;
4124 
4125                     arg_ts->state = TS_MEM;
4126                 }
4127                 /* Drop outputs that are dead.  */
4128                 if (IS_DEAD_ARG(i)) {
4129                     arg_ts->state = TS_DEAD;
4130                 }
4131             }
4132         }
4133     }
4134 
4135     return changes;
4136 }
4137 
4138 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4139 {
4140     intptr_t off;
4141     int size, align;
4142 
4143     /* When allocating an object, look at the full type. */
4144     size = tcg_type_size(ts->base_type);
4145     switch (ts->base_type) {
4146     case TCG_TYPE_I32:
4147         align = 4;
4148         break;
4149     case TCG_TYPE_I64:
4150     case TCG_TYPE_V64:
4151         align = 8;
4152         break;
4153     case TCG_TYPE_I128:
4154     case TCG_TYPE_V128:
4155     case TCG_TYPE_V256:
4156         /*
4157          * Note that we do not require aligned storage for V256,
4158          * and that we provide alignment for I128 to match V128,
4159          * even if that's above what the host ABI requires.
4160          */
4161         align = 16;
4162         break;
4163     default:
4164         g_assert_not_reached();
4165     }
4166 
4167     /*
4168      * Assume the stack is sufficiently aligned.
4169      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4170      * and do not require 16 byte vector alignment.  This seems slightly
4171      * easier than fully parameterizing the above switch statement.
4172      */
4173     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4174     off = ROUND_UP(s->current_frame_offset, align);
4175 
4176     /* If we've exhausted the stack frame, restart with a smaller TB. */
4177     if (off + size > s->frame_end) {
4178         tcg_raise_tb_overflow(s);
4179     }
4180     s->current_frame_offset = off + size;
4181 #if defined(__sparc__)
4182     off += TCG_TARGET_STACK_BIAS;
4183 #endif
4184 
4185     /* If the object was subdivided, assign memory to all the parts. */
4186     if (ts->base_type != ts->type) {
4187         int part_size = tcg_type_size(ts->type);
4188         int part_count = size / part_size;
4189 
4190         /*
4191          * Each part is allocated sequentially in tcg_temp_new_internal.
4192          * Jump back to the first part by subtracting the current index.
4193          */
4194         ts -= ts->temp_subindex;
4195         for (int i = 0; i < part_count; ++i) {
4196             ts[i].mem_offset = off + i * part_size;
4197             ts[i].mem_base = s->frame_temp;
4198             ts[i].mem_allocated = 1;
4199         }
4200     } else {
4201         ts->mem_offset = off;
4202         ts->mem_base = s->frame_temp;
4203         ts->mem_allocated = 1;
4204     }
4205 }
4206 
4207 /* Assign @reg to @ts, and update reg_to_temp[]. */
4208 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4209 {
4210     if (ts->val_type == TEMP_VAL_REG) {
4211         TCGReg old = ts->reg;
4212         tcg_debug_assert(s->reg_to_temp[old] == ts);
4213         if (old == reg) {
4214             return;
4215         }
4216         s->reg_to_temp[old] = NULL;
4217     }
4218     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4219     s->reg_to_temp[reg] = ts;
4220     ts->val_type = TEMP_VAL_REG;
4221     ts->reg = reg;
4222 }
4223 
4224 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4225 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4226 {
4227     tcg_debug_assert(type != TEMP_VAL_REG);
4228     if (ts->val_type == TEMP_VAL_REG) {
4229         TCGReg reg = ts->reg;
4230         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4231         s->reg_to_temp[reg] = NULL;
4232     }
4233     ts->val_type = type;
4234 }
4235 
4236 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4237 
4238 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4239    mark it free; otherwise mark it dead.  */
4240 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4241 {
4242     TCGTempVal new_type;
4243 
4244     switch (ts->kind) {
4245     case TEMP_FIXED:
4246         return;
4247     case TEMP_GLOBAL:
4248     case TEMP_TB:
4249         new_type = TEMP_VAL_MEM;
4250         break;
4251     case TEMP_EBB:
4252         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4253         break;
4254     case TEMP_CONST:
4255         new_type = TEMP_VAL_CONST;
4256         break;
4257     default:
4258         g_assert_not_reached();
4259     }
4260     set_temp_val_nonreg(s, ts, new_type);
4261 }
4262 
4263 /* Mark a temporary as dead.  */
4264 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4265 {
4266     temp_free_or_dead(s, ts, 1);
4267 }
4268 
4269 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4270    registers needs to be allocated to store a constant.  If 'free_or_dead'
4271    is non-zero, subsequently release the temporary; if it is positive, the
4272    temp is dead; if it is negative, the temp is free.  */
4273 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4274                       TCGRegSet preferred_regs, int free_or_dead)
4275 {
4276     if (!temp_readonly(ts) && !ts->mem_coherent) {
4277         if (!ts->mem_allocated) {
4278             temp_allocate_frame(s, ts);
4279         }
4280         switch (ts->val_type) {
4281         case TEMP_VAL_CONST:
4282             /* If we're going to free the temp immediately, then we won't
4283                require it later in a register, so attempt to store the
4284                constant to memory directly.  */
4285             if (free_or_dead
4286                 && tcg_out_sti(s, ts->type, ts->val,
4287                                ts->mem_base->reg, ts->mem_offset)) {
4288                 break;
4289             }
4290             temp_load(s, ts, tcg_target_available_regs[ts->type],
4291                       allocated_regs, preferred_regs);
4292             /* fallthrough */
4293 
4294         case TEMP_VAL_REG:
4295             tcg_out_st(s, ts->type, ts->reg,
4296                        ts->mem_base->reg, ts->mem_offset);
4297             break;
4298 
4299         case TEMP_VAL_MEM:
4300             break;
4301 
4302         case TEMP_VAL_DEAD:
4303         default:
4304             g_assert_not_reached();
4305         }
4306         ts->mem_coherent = 1;
4307     }
4308     if (free_or_dead) {
4309         temp_free_or_dead(s, ts, free_or_dead);
4310     }
4311 }
4312 
4313 /* free register 'reg' by spilling the corresponding temporary if necessary */
4314 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4315 {
4316     TCGTemp *ts = s->reg_to_temp[reg];
4317     if (ts != NULL) {
4318         temp_sync(s, ts, allocated_regs, 0, -1);
4319     }
4320 }
4321 
4322 /**
4323  * tcg_reg_alloc:
4324  * @required_regs: Set of registers in which we must allocate.
4325  * @allocated_regs: Set of registers which must be avoided.
4326  * @preferred_regs: Set of registers we should prefer.
4327  * @rev: True if we search the registers in "indirect" order.
4328  *
4329  * The allocated register must be in @required_regs & ~@allocated_regs,
4330  * but if we can put it in @preferred_regs we may save a move later.
4331  */
4332 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4333                             TCGRegSet allocated_regs,
4334                             TCGRegSet preferred_regs, bool rev)
4335 {
4336     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4337     TCGRegSet reg_ct[2];
4338     const int *order;
4339 
4340     reg_ct[1] = required_regs & ~allocated_regs;
4341     tcg_debug_assert(reg_ct[1] != 0);
4342     reg_ct[0] = reg_ct[1] & preferred_regs;
4343 
4344     /* Skip the preferred_regs option if it cannot be satisfied,
4345        or if the preference made no difference.  */
4346     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4347 
4348     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4349 
4350     /* Try free registers, preferences first.  */
4351     for (j = f; j < 2; j++) {
4352         TCGRegSet set = reg_ct[j];
4353 
4354         if (tcg_regset_single(set)) {
4355             /* One register in the set.  */
4356             TCGReg reg = tcg_regset_first(set);
4357             if (s->reg_to_temp[reg] == NULL) {
4358                 return reg;
4359             }
4360         } else {
4361             for (i = 0; i < n; i++) {
4362                 TCGReg reg = order[i];
4363                 if (s->reg_to_temp[reg] == NULL &&
4364                     tcg_regset_test_reg(set, reg)) {
4365                     return reg;
4366                 }
4367             }
4368         }
4369     }
4370 
4371     /* We must spill something.  */
4372     for (j = f; j < 2; j++) {
4373         TCGRegSet set = reg_ct[j];
4374 
4375         if (tcg_regset_single(set)) {
4376             /* One register in the set.  */
4377             TCGReg reg = tcg_regset_first(set);
4378             tcg_reg_free(s, reg, allocated_regs);
4379             return reg;
4380         } else {
4381             for (i = 0; i < n; i++) {
4382                 TCGReg reg = order[i];
4383                 if (tcg_regset_test_reg(set, reg)) {
4384                     tcg_reg_free(s, reg, allocated_regs);
4385                     return reg;
4386                 }
4387             }
4388         }
4389     }
4390 
4391     g_assert_not_reached();
4392 }
4393 
4394 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4395                                  TCGRegSet allocated_regs,
4396                                  TCGRegSet preferred_regs, bool rev)
4397 {
4398     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4399     TCGRegSet reg_ct[2];
4400     const int *order;
4401 
4402     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4403     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4404     tcg_debug_assert(reg_ct[1] != 0);
4405     reg_ct[0] = reg_ct[1] & preferred_regs;
4406 
4407     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4408 
4409     /*
4410      * Skip the preferred_regs option if it cannot be satisfied,
4411      * or if the preference made no difference.
4412      */
4413     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4414 
4415     /*
4416      * Minimize the number of flushes by looking for 2 free registers first,
4417      * then a single flush, then two flushes.
4418      */
4419     for (fmin = 2; fmin >= 0; fmin--) {
4420         for (j = k; j < 2; j++) {
4421             TCGRegSet set = reg_ct[j];
4422 
4423             for (i = 0; i < n; i++) {
4424                 TCGReg reg = order[i];
4425 
4426                 if (tcg_regset_test_reg(set, reg)) {
4427                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4428                     if (f >= fmin) {
4429                         tcg_reg_free(s, reg, allocated_regs);
4430                         tcg_reg_free(s, reg + 1, allocated_regs);
4431                         return reg;
4432                     }
4433                 }
4434             }
4435         }
4436     }
4437     g_assert_not_reached();
4438 }
4439 
4440 /* Make sure the temporary is in a register.  If needed, allocate the register
4441    from DESIRED while avoiding ALLOCATED.  */
4442 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4443                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4444 {
4445     TCGReg reg;
4446 
4447     switch (ts->val_type) {
4448     case TEMP_VAL_REG:
4449         return;
4450     case TEMP_VAL_CONST:
4451         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4452                             preferred_regs, ts->indirect_base);
4453         if (ts->type <= TCG_TYPE_I64) {
4454             tcg_out_movi(s, ts->type, reg, ts->val);
4455         } else {
4456             uint64_t val = ts->val;
4457             MemOp vece = MO_64;
4458 
4459             /*
4460              * Find the minimal vector element that matches the constant.
4461              * The targets will, in general, have to do this search anyway,
4462              * do this generically.
4463              */
4464             if (val == dup_const(MO_8, val)) {
4465                 vece = MO_8;
4466             } else if (val == dup_const(MO_16, val)) {
4467                 vece = MO_16;
4468             } else if (val == dup_const(MO_32, val)) {
4469                 vece = MO_32;
4470             }
4471 
4472             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4473         }
4474         ts->mem_coherent = 0;
4475         break;
4476     case TEMP_VAL_MEM:
4477         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4478                             preferred_regs, ts->indirect_base);
4479         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4480         ts->mem_coherent = 1;
4481         break;
4482     case TEMP_VAL_DEAD:
4483     default:
4484         g_assert_not_reached();
4485     }
4486     set_temp_val_reg(s, ts, reg);
4487 }
4488 
4489 /* Save a temporary to memory. 'allocated_regs' is used in case a
4490    temporary registers needs to be allocated to store a constant.  */
4491 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4492 {
4493     /* The liveness analysis already ensures that globals are back
4494        in memory. Keep an tcg_debug_assert for safety. */
4495     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4496 }
4497 
4498 /* save globals to their canonical location and assume they can be
4499    modified be the following code. 'allocated_regs' is used in case a
4500    temporary registers needs to be allocated to store a constant. */
4501 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4502 {
4503     int i, n;
4504 
4505     for (i = 0, n = s->nb_globals; i < n; i++) {
4506         temp_save(s, &s->temps[i], allocated_regs);
4507     }
4508 }
4509 
4510 /* sync globals to their canonical location and assume they can be
4511    read by the following code. 'allocated_regs' is used in case a
4512    temporary registers needs to be allocated to store a constant. */
4513 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4514 {
4515     int i, n;
4516 
4517     for (i = 0, n = s->nb_globals; i < n; i++) {
4518         TCGTemp *ts = &s->temps[i];
4519         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4520                          || ts->kind == TEMP_FIXED
4521                          || ts->mem_coherent);
4522     }
4523 }
4524 
4525 /* at the end of a basic block, we assume all temporaries are dead and
4526    all globals are stored at their canonical location. */
4527 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4528 {
4529     int i;
4530 
4531     for (i = s->nb_globals; i < s->nb_temps; i++) {
4532         TCGTemp *ts = &s->temps[i];
4533 
4534         switch (ts->kind) {
4535         case TEMP_TB:
4536             temp_save(s, ts, allocated_regs);
4537             break;
4538         case TEMP_EBB:
4539             /* The liveness analysis already ensures that temps are dead.
4540                Keep an tcg_debug_assert for safety. */
4541             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4542             break;
4543         case TEMP_CONST:
4544             /* Similarly, we should have freed any allocated register. */
4545             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4546             break;
4547         default:
4548             g_assert_not_reached();
4549         }
4550     }
4551 
4552     save_globals(s, allocated_regs);
4553 }
4554 
4555 /*
4556  * At a conditional branch, we assume all temporaries are dead unless
4557  * explicitly live-across-conditional-branch; all globals and local
4558  * temps are synced to their location.
4559  */
4560 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4561 {
4562     sync_globals(s, allocated_regs);
4563 
4564     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4565         TCGTemp *ts = &s->temps[i];
4566         /*
4567          * The liveness analysis already ensures that temps are dead.
4568          * Keep tcg_debug_asserts for safety.
4569          */
4570         switch (ts->kind) {
4571         case TEMP_TB:
4572             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4573             break;
4574         case TEMP_EBB:
4575         case TEMP_CONST:
4576             break;
4577         default:
4578             g_assert_not_reached();
4579         }
4580     }
4581 }
4582 
4583 /*
4584  * Specialized code generation for INDEX_op_mov_* with a constant.
4585  */
4586 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4587                                   tcg_target_ulong val, TCGLifeData arg_life,
4588                                   TCGRegSet preferred_regs)
4589 {
4590     /* ENV should not be modified.  */
4591     tcg_debug_assert(!temp_readonly(ots));
4592 
4593     /* The movi is not explicitly generated here.  */
4594     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4595     ots->val = val;
4596     ots->mem_coherent = 0;
4597     if (NEED_SYNC_ARG(0)) {
4598         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4599     } else if (IS_DEAD_ARG(0)) {
4600         temp_dead(s, ots);
4601     }
4602 }
4603 
4604 /*
4605  * Specialized code generation for INDEX_op_mov_*.
4606  */
4607 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4608 {
4609     const TCGLifeData arg_life = op->life;
4610     TCGRegSet allocated_regs, preferred_regs;
4611     TCGTemp *ts, *ots;
4612     TCGType otype, itype;
4613     TCGReg oreg, ireg;
4614 
4615     allocated_regs = s->reserved_regs;
4616     preferred_regs = output_pref(op, 0);
4617     ots = arg_temp(op->args[0]);
4618     ts = arg_temp(op->args[1]);
4619 
4620     /* ENV should not be modified.  */
4621     tcg_debug_assert(!temp_readonly(ots));
4622 
4623     /* Note that otype != itype for no-op truncation.  */
4624     otype = ots->type;
4625     itype = ts->type;
4626 
4627     if (ts->val_type == TEMP_VAL_CONST) {
4628         /* propagate constant or generate sti */
4629         tcg_target_ulong val = ts->val;
4630         if (IS_DEAD_ARG(1)) {
4631             temp_dead(s, ts);
4632         }
4633         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4634         return;
4635     }
4636 
4637     /* If the source value is in memory we're going to be forced
4638        to have it in a register in order to perform the copy.  Copy
4639        the SOURCE value into its own register first, that way we
4640        don't have to reload SOURCE the next time it is used. */
4641     if (ts->val_type == TEMP_VAL_MEM) {
4642         temp_load(s, ts, tcg_target_available_regs[itype],
4643                   allocated_regs, preferred_regs);
4644     }
4645     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4646     ireg = ts->reg;
4647 
4648     if (IS_DEAD_ARG(0)) {
4649         /* mov to a non-saved dead register makes no sense (even with
4650            liveness analysis disabled). */
4651         tcg_debug_assert(NEED_SYNC_ARG(0));
4652         if (!ots->mem_allocated) {
4653             temp_allocate_frame(s, ots);
4654         }
4655         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4656         if (IS_DEAD_ARG(1)) {
4657             temp_dead(s, ts);
4658         }
4659         temp_dead(s, ots);
4660         return;
4661     }
4662 
4663     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4664         /*
4665          * The mov can be suppressed.  Kill input first, so that it
4666          * is unlinked from reg_to_temp, then set the output to the
4667          * reg that we saved from the input.
4668          */
4669         temp_dead(s, ts);
4670         oreg = ireg;
4671     } else {
4672         if (ots->val_type == TEMP_VAL_REG) {
4673             oreg = ots->reg;
4674         } else {
4675             /* Make sure to not spill the input register during allocation. */
4676             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4677                                  allocated_regs | ((TCGRegSet)1 << ireg),
4678                                  preferred_regs, ots->indirect_base);
4679         }
4680         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4681             /*
4682              * Cross register class move not supported.
4683              * Store the source register into the destination slot
4684              * and leave the destination temp as TEMP_VAL_MEM.
4685              */
4686             assert(!temp_readonly(ots));
4687             if (!ts->mem_allocated) {
4688                 temp_allocate_frame(s, ots);
4689             }
4690             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4691             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4692             ots->mem_coherent = 1;
4693             return;
4694         }
4695     }
4696     set_temp_val_reg(s, ots, oreg);
4697     ots->mem_coherent = 0;
4698 
4699     if (NEED_SYNC_ARG(0)) {
4700         temp_sync(s, ots, allocated_regs, 0, 0);
4701     }
4702 }
4703 
4704 /*
4705  * Specialized code generation for INDEX_op_dup_vec.
4706  */
4707 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4708 {
4709     const TCGLifeData arg_life = op->life;
4710     TCGRegSet dup_out_regs, dup_in_regs;
4711     TCGTemp *its, *ots;
4712     TCGType itype, vtype;
4713     unsigned vece;
4714     int lowpart_ofs;
4715     bool ok;
4716 
4717     ots = arg_temp(op->args[0]);
4718     its = arg_temp(op->args[1]);
4719 
4720     /* ENV should not be modified.  */
4721     tcg_debug_assert(!temp_readonly(ots));
4722 
4723     itype = its->type;
4724     vece = TCGOP_VECE(op);
4725     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4726 
4727     if (its->val_type == TEMP_VAL_CONST) {
4728         /* Propagate constant via movi -> dupi.  */
4729         tcg_target_ulong val = its->val;
4730         if (IS_DEAD_ARG(1)) {
4731             temp_dead(s, its);
4732         }
4733         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4734         return;
4735     }
4736 
4737     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4738     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4739 
4740     /* Allocate the output register now.  */
4741     if (ots->val_type != TEMP_VAL_REG) {
4742         TCGRegSet allocated_regs = s->reserved_regs;
4743         TCGReg oreg;
4744 
4745         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4746             /* Make sure to not spill the input register. */
4747             tcg_regset_set_reg(allocated_regs, its->reg);
4748         }
4749         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4750                              output_pref(op, 0), ots->indirect_base);
4751         set_temp_val_reg(s, ots, oreg);
4752     }
4753 
4754     switch (its->val_type) {
4755     case TEMP_VAL_REG:
4756         /*
4757          * The dup constriaints must be broad, covering all possible VECE.
4758          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4759          * to fail, indicating that extra moves are required for that case.
4760          */
4761         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4762             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4763                 goto done;
4764             }
4765             /* Try again from memory or a vector input register.  */
4766         }
4767         if (!its->mem_coherent) {
4768             /*
4769              * The input register is not synced, and so an extra store
4770              * would be required to use memory.  Attempt an integer-vector
4771              * register move first.  We do not have a TCGRegSet for this.
4772              */
4773             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4774                 break;
4775             }
4776             /* Sync the temp back to its slot and load from there.  */
4777             temp_sync(s, its, s->reserved_regs, 0, 0);
4778         }
4779         /* fall through */
4780 
4781     case TEMP_VAL_MEM:
4782         lowpart_ofs = 0;
4783         if (HOST_BIG_ENDIAN) {
4784             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4785         }
4786         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4787                              its->mem_offset + lowpart_ofs)) {
4788             goto done;
4789         }
4790         /* Load the input into the destination vector register. */
4791         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4792         break;
4793 
4794     default:
4795         g_assert_not_reached();
4796     }
4797 
4798     /* We now have a vector input register, so dup must succeed. */
4799     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4800     tcg_debug_assert(ok);
4801 
4802  done:
4803     ots->mem_coherent = 0;
4804     if (IS_DEAD_ARG(1)) {
4805         temp_dead(s, its);
4806     }
4807     if (NEED_SYNC_ARG(0)) {
4808         temp_sync(s, ots, s->reserved_regs, 0, 0);
4809     }
4810     if (IS_DEAD_ARG(0)) {
4811         temp_dead(s, ots);
4812     }
4813 }
4814 
4815 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4816 {
4817     const TCGLifeData arg_life = op->life;
4818     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4819     TCGRegSet i_allocated_regs;
4820     TCGRegSet o_allocated_regs;
4821     int i, k, nb_iargs, nb_oargs;
4822     TCGReg reg;
4823     TCGArg arg;
4824     const TCGArgConstraint *arg_ct;
4825     TCGTemp *ts;
4826     TCGArg new_args[TCG_MAX_OP_ARGS];
4827     int const_args[TCG_MAX_OP_ARGS];
4828     TCGCond op_cond;
4829 
4830     nb_oargs = def->nb_oargs;
4831     nb_iargs = def->nb_iargs;
4832 
4833     /* copy constants */
4834     memcpy(new_args + nb_oargs + nb_iargs,
4835            op->args + nb_oargs + nb_iargs,
4836            sizeof(TCGArg) * def->nb_cargs);
4837 
4838     i_allocated_regs = s->reserved_regs;
4839     o_allocated_regs = s->reserved_regs;
4840 
4841     switch (op->opc) {
4842     case INDEX_op_brcond_i32:
4843     case INDEX_op_brcond_i64:
4844         op_cond = op->args[2];
4845         break;
4846     case INDEX_op_setcond_i32:
4847     case INDEX_op_setcond_i64:
4848     case INDEX_op_negsetcond_i32:
4849     case INDEX_op_negsetcond_i64:
4850     case INDEX_op_cmp_vec:
4851         op_cond = op->args[3];
4852         break;
4853     case INDEX_op_brcond2_i32:
4854         op_cond = op->args[4];
4855         break;
4856     case INDEX_op_movcond_i32:
4857     case INDEX_op_movcond_i64:
4858     case INDEX_op_setcond2_i32:
4859     case INDEX_op_cmpsel_vec:
4860         op_cond = op->args[5];
4861         break;
4862     default:
4863         /* No condition within opcode. */
4864         op_cond = TCG_COND_ALWAYS;
4865         break;
4866     }
4867 
4868     /* satisfy input constraints */
4869     for (k = 0; k < nb_iargs; k++) {
4870         TCGRegSet i_preferred_regs, i_required_regs;
4871         bool allocate_new_reg, copyto_new_reg;
4872         TCGTemp *ts2;
4873         int i1, i2;
4874 
4875         i = def->args_ct[nb_oargs + k].sort_index;
4876         arg = op->args[i];
4877         arg_ct = &def->args_ct[i];
4878         ts = arg_temp(arg);
4879 
4880         if (ts->val_type == TEMP_VAL_CONST
4881             && tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
4882                                       op_cond, TCGOP_VECE(op))) {
4883             /* constant is OK for instruction */
4884             const_args[i] = 1;
4885             new_args[i] = ts->val;
4886             continue;
4887         }
4888 
4889         reg = ts->reg;
4890         i_preferred_regs = 0;
4891         i_required_regs = arg_ct->regs;
4892         allocate_new_reg = false;
4893         copyto_new_reg = false;
4894 
4895         switch (arg_ct->pair) {
4896         case 0: /* not paired */
4897             if (arg_ct->ialias) {
4898                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4899 
4900                 /*
4901                  * If the input is readonly, then it cannot also be an
4902                  * output and aliased to itself.  If the input is not
4903                  * dead after the instruction, we must allocate a new
4904                  * register and move it.
4905                  */
4906                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4907                     || def->args_ct[arg_ct->alias_index].newreg) {
4908                     allocate_new_reg = true;
4909                 } else if (ts->val_type == TEMP_VAL_REG) {
4910                     /*
4911                      * Check if the current register has already been
4912                      * allocated for another input.
4913                      */
4914                     allocate_new_reg =
4915                         tcg_regset_test_reg(i_allocated_regs, reg);
4916                 }
4917             }
4918             if (!allocate_new_reg) {
4919                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4920                           i_preferred_regs);
4921                 reg = ts->reg;
4922                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4923             }
4924             if (allocate_new_reg) {
4925                 /*
4926                  * Allocate a new register matching the constraint
4927                  * and move the temporary register into it.
4928                  */
4929                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4930                           i_allocated_regs, 0);
4931                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4932                                     i_preferred_regs, ts->indirect_base);
4933                 copyto_new_reg = true;
4934             }
4935             break;
4936 
4937         case 1:
4938             /* First of an input pair; if i1 == i2, the second is an output. */
4939             i1 = i;
4940             i2 = arg_ct->pair_index;
4941             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4942 
4943             /*
4944              * It is easier to default to allocating a new pair
4945              * and to identify a few cases where it's not required.
4946              */
4947             if (arg_ct->ialias) {
4948                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4949                 if (IS_DEAD_ARG(i1) &&
4950                     IS_DEAD_ARG(i2) &&
4951                     !temp_readonly(ts) &&
4952                     ts->val_type == TEMP_VAL_REG &&
4953                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4954                     tcg_regset_test_reg(i_required_regs, reg) &&
4955                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4956                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4957                     (ts2
4958                      ? ts2->val_type == TEMP_VAL_REG &&
4959                        ts2->reg == reg + 1 &&
4960                        !temp_readonly(ts2)
4961                      : s->reg_to_temp[reg + 1] == NULL)) {
4962                     break;
4963                 }
4964             } else {
4965                 /* Without aliasing, the pair must also be an input. */
4966                 tcg_debug_assert(ts2);
4967                 if (ts->val_type == TEMP_VAL_REG &&
4968                     ts2->val_type == TEMP_VAL_REG &&
4969                     ts2->reg == reg + 1 &&
4970                     tcg_regset_test_reg(i_required_regs, reg)) {
4971                     break;
4972                 }
4973             }
4974             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4975                                      0, ts->indirect_base);
4976             goto do_pair;
4977 
4978         case 2: /* pair second */
4979             reg = new_args[arg_ct->pair_index] + 1;
4980             goto do_pair;
4981 
4982         case 3: /* ialias with second output, no first input */
4983             tcg_debug_assert(arg_ct->ialias);
4984             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4985 
4986             if (IS_DEAD_ARG(i) &&
4987                 !temp_readonly(ts) &&
4988                 ts->val_type == TEMP_VAL_REG &&
4989                 reg > 0 &&
4990                 s->reg_to_temp[reg - 1] == NULL &&
4991                 tcg_regset_test_reg(i_required_regs, reg) &&
4992                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4993                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4994                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4995                 break;
4996             }
4997             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4998                                      i_allocated_regs, 0,
4999                                      ts->indirect_base);
5000             tcg_regset_set_reg(i_allocated_regs, reg);
5001             reg += 1;
5002             goto do_pair;
5003 
5004         do_pair:
5005             /*
5006              * If an aliased input is not dead after the instruction,
5007              * we must allocate a new register and move it.
5008              */
5009             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5010                 TCGRegSet t_allocated_regs = i_allocated_regs;
5011 
5012                 /*
5013                  * Because of the alias, and the continued life, make sure
5014                  * that the temp is somewhere *other* than the reg pair,
5015                  * and we get a copy in reg.
5016                  */
5017                 tcg_regset_set_reg(t_allocated_regs, reg);
5018                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5019                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5020                     /* If ts was already in reg, copy it somewhere else. */
5021                     TCGReg nr;
5022                     bool ok;
5023 
5024                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5025                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5026                                        t_allocated_regs, 0, ts->indirect_base);
5027                     ok = tcg_out_mov(s, ts->type, nr, reg);
5028                     tcg_debug_assert(ok);
5029 
5030                     set_temp_val_reg(s, ts, nr);
5031                 } else {
5032                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5033                               t_allocated_regs, 0);
5034                     copyto_new_reg = true;
5035                 }
5036             } else {
5037                 /* Preferably allocate to reg, otherwise copy. */
5038                 i_required_regs = (TCGRegSet)1 << reg;
5039                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5040                           i_preferred_regs);
5041                 copyto_new_reg = ts->reg != reg;
5042             }
5043             break;
5044 
5045         default:
5046             g_assert_not_reached();
5047         }
5048 
5049         if (copyto_new_reg) {
5050             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5051                 /*
5052                  * Cross register class move not supported.  Sync the
5053                  * temp back to its slot and load from there.
5054                  */
5055                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5056                 tcg_out_ld(s, ts->type, reg,
5057                            ts->mem_base->reg, ts->mem_offset);
5058             }
5059         }
5060         new_args[i] = reg;
5061         const_args[i] = 0;
5062         tcg_regset_set_reg(i_allocated_regs, reg);
5063     }
5064 
5065     /* mark dead temporaries and free the associated registers */
5066     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5067         if (IS_DEAD_ARG(i)) {
5068             temp_dead(s, arg_temp(op->args[i]));
5069         }
5070     }
5071 
5072     if (def->flags & TCG_OPF_COND_BRANCH) {
5073         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5074     } else if (def->flags & TCG_OPF_BB_END) {
5075         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5076     } else {
5077         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5078             /* XXX: permit generic clobber register list ? */
5079             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5080                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5081                     tcg_reg_free(s, i, i_allocated_regs);
5082                 }
5083             }
5084         }
5085         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5086             /* sync globals if the op has side effects and might trigger
5087                an exception. */
5088             sync_globals(s, i_allocated_regs);
5089         }
5090 
5091         /* satisfy the output constraints */
5092         for(k = 0; k < nb_oargs; k++) {
5093             i = def->args_ct[k].sort_index;
5094             arg = op->args[i];
5095             arg_ct = &def->args_ct[i];
5096             ts = arg_temp(arg);
5097 
5098             /* ENV should not be modified.  */
5099             tcg_debug_assert(!temp_readonly(ts));
5100 
5101             switch (arg_ct->pair) {
5102             case 0: /* not paired */
5103                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5104                     reg = new_args[arg_ct->alias_index];
5105                 } else if (arg_ct->newreg) {
5106                     reg = tcg_reg_alloc(s, arg_ct->regs,
5107                                         i_allocated_regs | o_allocated_regs,
5108                                         output_pref(op, k), ts->indirect_base);
5109                 } else {
5110                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5111                                         output_pref(op, k), ts->indirect_base);
5112                 }
5113                 break;
5114 
5115             case 1: /* first of pair */
5116                 if (arg_ct->oalias) {
5117                     reg = new_args[arg_ct->alias_index];
5118                 } else if (arg_ct->newreg) {
5119                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5120                                              i_allocated_regs | o_allocated_regs,
5121                                              output_pref(op, k),
5122                                              ts->indirect_base);
5123                 } else {
5124                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5125                                              output_pref(op, k),
5126                                              ts->indirect_base);
5127                 }
5128                 break;
5129 
5130             case 2: /* second of pair */
5131                 if (arg_ct->oalias) {
5132                     reg = new_args[arg_ct->alias_index];
5133                 } else {
5134                     reg = new_args[arg_ct->pair_index] + 1;
5135                 }
5136                 break;
5137 
5138             case 3: /* first of pair, aliasing with a second input */
5139                 tcg_debug_assert(!arg_ct->newreg);
5140                 reg = new_args[arg_ct->pair_index] - 1;
5141                 break;
5142 
5143             default:
5144                 g_assert_not_reached();
5145             }
5146             tcg_regset_set_reg(o_allocated_regs, reg);
5147             set_temp_val_reg(s, ts, reg);
5148             ts->mem_coherent = 0;
5149             new_args[i] = reg;
5150         }
5151     }
5152 
5153     /* emit instruction */
5154     switch (op->opc) {
5155     case INDEX_op_ext8s_i32:
5156         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5157         break;
5158     case INDEX_op_ext8s_i64:
5159         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5160         break;
5161     case INDEX_op_ext8u_i32:
5162     case INDEX_op_ext8u_i64:
5163         tcg_out_ext8u(s, new_args[0], new_args[1]);
5164         break;
5165     case INDEX_op_ext16s_i32:
5166         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5167         break;
5168     case INDEX_op_ext16s_i64:
5169         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5170         break;
5171     case INDEX_op_ext16u_i32:
5172     case INDEX_op_ext16u_i64:
5173         tcg_out_ext16u(s, new_args[0], new_args[1]);
5174         break;
5175     case INDEX_op_ext32s_i64:
5176         tcg_out_ext32s(s, new_args[0], new_args[1]);
5177         break;
5178     case INDEX_op_ext32u_i64:
5179         tcg_out_ext32u(s, new_args[0], new_args[1]);
5180         break;
5181     case INDEX_op_ext_i32_i64:
5182         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5183         break;
5184     case INDEX_op_extu_i32_i64:
5185         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5186         break;
5187     case INDEX_op_extrl_i64_i32:
5188         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5189         break;
5190     default:
5191         if (def->flags & TCG_OPF_VECTOR) {
5192             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
5193                            new_args, const_args);
5194         } else {
5195             tcg_out_op(s, op->opc, new_args, const_args);
5196         }
5197         break;
5198     }
5199 
5200     /* move the outputs in the correct register if needed */
5201     for(i = 0; i < nb_oargs; i++) {
5202         ts = arg_temp(op->args[i]);
5203 
5204         /* ENV should not be modified.  */
5205         tcg_debug_assert(!temp_readonly(ts));
5206 
5207         if (NEED_SYNC_ARG(i)) {
5208             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5209         } else if (IS_DEAD_ARG(i)) {
5210             temp_dead(s, ts);
5211         }
5212     }
5213 }
5214 
5215 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5216 {
5217     const TCGLifeData arg_life = op->life;
5218     TCGTemp *ots, *itsl, *itsh;
5219     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5220 
5221     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5222     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5223     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5224 
5225     ots = arg_temp(op->args[0]);
5226     itsl = arg_temp(op->args[1]);
5227     itsh = arg_temp(op->args[2]);
5228 
5229     /* ENV should not be modified.  */
5230     tcg_debug_assert(!temp_readonly(ots));
5231 
5232     /* Allocate the output register now.  */
5233     if (ots->val_type != TEMP_VAL_REG) {
5234         TCGRegSet allocated_regs = s->reserved_regs;
5235         TCGRegSet dup_out_regs =
5236             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5237         TCGReg oreg;
5238 
5239         /* Make sure to not spill the input registers. */
5240         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5241             tcg_regset_set_reg(allocated_regs, itsl->reg);
5242         }
5243         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5244             tcg_regset_set_reg(allocated_regs, itsh->reg);
5245         }
5246 
5247         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5248                              output_pref(op, 0), ots->indirect_base);
5249         set_temp_val_reg(s, ots, oreg);
5250     }
5251 
5252     /* Promote dup2 of immediates to dupi_vec. */
5253     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5254         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5255         MemOp vece = MO_64;
5256 
5257         if (val == dup_const(MO_8, val)) {
5258             vece = MO_8;
5259         } else if (val == dup_const(MO_16, val)) {
5260             vece = MO_16;
5261         } else if (val == dup_const(MO_32, val)) {
5262             vece = MO_32;
5263         }
5264 
5265         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5266         goto done;
5267     }
5268 
5269     /* If the two inputs form one 64-bit value, try dupm_vec. */
5270     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5271         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5272         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5273         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5274 
5275         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5276         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5277 
5278         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5279                              its->mem_base->reg, its->mem_offset)) {
5280             goto done;
5281         }
5282     }
5283 
5284     /* Fall back to generic expansion. */
5285     return false;
5286 
5287  done:
5288     ots->mem_coherent = 0;
5289     if (IS_DEAD_ARG(1)) {
5290         temp_dead(s, itsl);
5291     }
5292     if (IS_DEAD_ARG(2)) {
5293         temp_dead(s, itsh);
5294     }
5295     if (NEED_SYNC_ARG(0)) {
5296         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5297     } else if (IS_DEAD_ARG(0)) {
5298         temp_dead(s, ots);
5299     }
5300     return true;
5301 }
5302 
5303 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5304                          TCGRegSet allocated_regs)
5305 {
5306     if (ts->val_type == TEMP_VAL_REG) {
5307         if (ts->reg != reg) {
5308             tcg_reg_free(s, reg, allocated_regs);
5309             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5310                 /*
5311                  * Cross register class move not supported.  Sync the
5312                  * temp back to its slot and load from there.
5313                  */
5314                 temp_sync(s, ts, allocated_regs, 0, 0);
5315                 tcg_out_ld(s, ts->type, reg,
5316                            ts->mem_base->reg, ts->mem_offset);
5317             }
5318         }
5319     } else {
5320         TCGRegSet arg_set = 0;
5321 
5322         tcg_reg_free(s, reg, allocated_regs);
5323         tcg_regset_set_reg(arg_set, reg);
5324         temp_load(s, ts, arg_set, allocated_regs, 0);
5325     }
5326 }
5327 
5328 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5329                          TCGRegSet allocated_regs)
5330 {
5331     /*
5332      * When the destination is on the stack, load up the temp and store.
5333      * If there are many call-saved registers, the temp might live to
5334      * see another use; otherwise it'll be discarded.
5335      */
5336     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5337     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5338                arg_slot_stk_ofs(arg_slot));
5339 }
5340 
5341 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5342                             TCGTemp *ts, TCGRegSet *allocated_regs)
5343 {
5344     if (arg_slot_reg_p(l->arg_slot)) {
5345         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5346         load_arg_reg(s, reg, ts, *allocated_regs);
5347         tcg_regset_set_reg(*allocated_regs, reg);
5348     } else {
5349         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5350     }
5351 }
5352 
5353 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5354                          intptr_t ref_off, TCGRegSet *allocated_regs)
5355 {
5356     TCGReg reg;
5357 
5358     if (arg_slot_reg_p(arg_slot)) {
5359         reg = tcg_target_call_iarg_regs[arg_slot];
5360         tcg_reg_free(s, reg, *allocated_regs);
5361         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5362         tcg_regset_set_reg(*allocated_regs, reg);
5363     } else {
5364         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5365                             *allocated_regs, 0, false);
5366         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5367         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5368                    arg_slot_stk_ofs(arg_slot));
5369     }
5370 }
5371 
5372 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5373 {
5374     const int nb_oargs = TCGOP_CALLO(op);
5375     const int nb_iargs = TCGOP_CALLI(op);
5376     const TCGLifeData arg_life = op->life;
5377     const TCGHelperInfo *info = tcg_call_info(op);
5378     TCGRegSet allocated_regs = s->reserved_regs;
5379     int i;
5380 
5381     /*
5382      * Move inputs into place in reverse order,
5383      * so that we place stacked arguments first.
5384      */
5385     for (i = nb_iargs - 1; i >= 0; --i) {
5386         const TCGCallArgumentLoc *loc = &info->in[i];
5387         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5388 
5389         switch (loc->kind) {
5390         case TCG_CALL_ARG_NORMAL:
5391         case TCG_CALL_ARG_EXTEND_U:
5392         case TCG_CALL_ARG_EXTEND_S:
5393             load_arg_normal(s, loc, ts, &allocated_regs);
5394             break;
5395         case TCG_CALL_ARG_BY_REF:
5396             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5397             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5398                          arg_slot_stk_ofs(loc->ref_slot),
5399                          &allocated_regs);
5400             break;
5401         case TCG_CALL_ARG_BY_REF_N:
5402             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5403             break;
5404         default:
5405             g_assert_not_reached();
5406         }
5407     }
5408 
5409     /* Mark dead temporaries and free the associated registers.  */
5410     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5411         if (IS_DEAD_ARG(i)) {
5412             temp_dead(s, arg_temp(op->args[i]));
5413         }
5414     }
5415 
5416     /* Clobber call registers.  */
5417     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5418         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5419             tcg_reg_free(s, i, allocated_regs);
5420         }
5421     }
5422 
5423     /*
5424      * Save globals if they might be written by the helper,
5425      * sync them if they might be read.
5426      */
5427     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5428         /* Nothing to do */
5429     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5430         sync_globals(s, allocated_regs);
5431     } else {
5432         save_globals(s, allocated_regs);
5433     }
5434 
5435     /*
5436      * If the ABI passes a pointer to the returned struct as the first
5437      * argument, load that now.  Pass a pointer to the output home slot.
5438      */
5439     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5440         TCGTemp *ts = arg_temp(op->args[0]);
5441 
5442         if (!ts->mem_allocated) {
5443             temp_allocate_frame(s, ts);
5444         }
5445         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5446     }
5447 
5448     tcg_out_call(s, tcg_call_func(op), info);
5449 
5450     /* Assign output registers and emit moves if needed.  */
5451     switch (info->out_kind) {
5452     case TCG_CALL_RET_NORMAL:
5453         for (i = 0; i < nb_oargs; i++) {
5454             TCGTemp *ts = arg_temp(op->args[i]);
5455             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5456 
5457             /* ENV should not be modified.  */
5458             tcg_debug_assert(!temp_readonly(ts));
5459 
5460             set_temp_val_reg(s, ts, reg);
5461             ts->mem_coherent = 0;
5462         }
5463         break;
5464 
5465     case TCG_CALL_RET_BY_VEC:
5466         {
5467             TCGTemp *ts = arg_temp(op->args[0]);
5468 
5469             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5470             tcg_debug_assert(ts->temp_subindex == 0);
5471             if (!ts->mem_allocated) {
5472                 temp_allocate_frame(s, ts);
5473             }
5474             tcg_out_st(s, TCG_TYPE_V128,
5475                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5476                        ts->mem_base->reg, ts->mem_offset);
5477         }
5478         /* fall through to mark all parts in memory */
5479 
5480     case TCG_CALL_RET_BY_REF:
5481         /* The callee has performed a write through the reference. */
5482         for (i = 0; i < nb_oargs; i++) {
5483             TCGTemp *ts = arg_temp(op->args[i]);
5484             ts->val_type = TEMP_VAL_MEM;
5485         }
5486         break;
5487 
5488     default:
5489         g_assert_not_reached();
5490     }
5491 
5492     /* Flush or discard output registers as needed. */
5493     for (i = 0; i < nb_oargs; i++) {
5494         TCGTemp *ts = arg_temp(op->args[i]);
5495         if (NEED_SYNC_ARG(i)) {
5496             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5497         } else if (IS_DEAD_ARG(i)) {
5498             temp_dead(s, ts);
5499         }
5500     }
5501 }
5502 
5503 /**
5504  * atom_and_align_for_opc:
5505  * @s: tcg context
5506  * @opc: memory operation code
5507  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5508  * @allow_two_ops: true if we are prepared to issue two operations
5509  *
5510  * Return the alignment and atomicity to use for the inline fast path
5511  * for the given memory operation.  The alignment may be larger than
5512  * that specified in @opc, and the correct alignment will be diagnosed
5513  * by the slow path helper.
5514  *
5515  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5516  * and issue two loads or stores for subalignment.
5517  */
5518 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5519                                            MemOp host_atom, bool allow_two_ops)
5520 {
5521     MemOp align = get_alignment_bits(opc);
5522     MemOp size = opc & MO_SIZE;
5523     MemOp half = size ? size - 1 : 0;
5524     MemOp atom = opc & MO_ATOM_MASK;
5525     MemOp atmax;
5526 
5527     switch (atom) {
5528     case MO_ATOM_NONE:
5529         /* The operation requires no specific atomicity. */
5530         atmax = MO_8;
5531         break;
5532 
5533     case MO_ATOM_IFALIGN:
5534         atmax = size;
5535         break;
5536 
5537     case MO_ATOM_IFALIGN_PAIR:
5538         atmax = half;
5539         break;
5540 
5541     case MO_ATOM_WITHIN16:
5542         atmax = size;
5543         if (size == MO_128) {
5544             /* Misalignment implies !within16, and therefore no atomicity. */
5545         } else if (host_atom != MO_ATOM_WITHIN16) {
5546             /* The host does not implement within16, so require alignment. */
5547             align = MAX(align, size);
5548         }
5549         break;
5550 
5551     case MO_ATOM_WITHIN16_PAIR:
5552         atmax = size;
5553         /*
5554          * Misalignment implies !within16, and therefore half atomicity.
5555          * Any host prepared for two operations can implement this with
5556          * half alignment.
5557          */
5558         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5559             align = MAX(align, half);
5560         }
5561         break;
5562 
5563     case MO_ATOM_SUBALIGN:
5564         atmax = size;
5565         if (host_atom != MO_ATOM_SUBALIGN) {
5566             /* If unaligned but not odd, there are subobjects up to half. */
5567             if (allow_two_ops) {
5568                 align = MAX(align, half);
5569             } else {
5570                 align = MAX(align, size);
5571             }
5572         }
5573         break;
5574 
5575     default:
5576         g_assert_not_reached();
5577     }
5578 
5579     return (TCGAtomAlign){ .atom = atmax, .align = align };
5580 }
5581 
5582 /*
5583  * Similarly for qemu_ld/st slow path helpers.
5584  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5585  * using only the provided backend tcg_out_* functions.
5586  */
5587 
5588 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5589 {
5590     int ofs = arg_slot_stk_ofs(slot);
5591 
5592     /*
5593      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5594      * require extension to uint64_t, adjust the address for uint32_t.
5595      */
5596     if (HOST_BIG_ENDIAN &&
5597         TCG_TARGET_REG_BITS == 64 &&
5598         type == TCG_TYPE_I32) {
5599         ofs += 4;
5600     }
5601     return ofs;
5602 }
5603 
5604 static void tcg_out_helper_load_slots(TCGContext *s,
5605                                       unsigned nmov, TCGMovExtend *mov,
5606                                       const TCGLdstHelperParam *parm)
5607 {
5608     unsigned i;
5609     TCGReg dst3;
5610 
5611     /*
5612      * Start from the end, storing to the stack first.
5613      * This frees those registers, so we need not consider overlap.
5614      */
5615     for (i = nmov; i-- > 0; ) {
5616         unsigned slot = mov[i].dst;
5617 
5618         if (arg_slot_reg_p(slot)) {
5619             goto found_reg;
5620         }
5621 
5622         TCGReg src = mov[i].src;
5623         TCGType dst_type = mov[i].dst_type;
5624         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5625 
5626         /* The argument is going onto the stack; extend into scratch. */
5627         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5628             tcg_debug_assert(parm->ntmp != 0);
5629             mov[i].dst = src = parm->tmp[0];
5630             tcg_out_movext1(s, &mov[i]);
5631         }
5632 
5633         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5634                    tcg_out_helper_stk_ofs(dst_type, slot));
5635     }
5636     return;
5637 
5638  found_reg:
5639     /*
5640      * The remaining arguments are in registers.
5641      * Convert slot numbers to argument registers.
5642      */
5643     nmov = i + 1;
5644     for (i = 0; i < nmov; ++i) {
5645         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5646     }
5647 
5648     switch (nmov) {
5649     case 4:
5650         /* The backend must have provided enough temps for the worst case. */
5651         tcg_debug_assert(parm->ntmp >= 2);
5652 
5653         dst3 = mov[3].dst;
5654         for (unsigned j = 0; j < 3; ++j) {
5655             if (dst3 == mov[j].src) {
5656                 /*
5657                  * Conflict. Copy the source to a temporary, perform the
5658                  * remaining moves, then the extension from our scratch
5659                  * on the way out.
5660                  */
5661                 TCGReg scratch = parm->tmp[1];
5662 
5663                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5664                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5665                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5666                 break;
5667             }
5668         }
5669 
5670         /* No conflicts: perform this move and continue. */
5671         tcg_out_movext1(s, &mov[3]);
5672         /* fall through */
5673 
5674     case 3:
5675         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5676                         parm->ntmp ? parm->tmp[0] : -1);
5677         break;
5678     case 2:
5679         tcg_out_movext2(s, mov, mov + 1,
5680                         parm->ntmp ? parm->tmp[0] : -1);
5681         break;
5682     case 1:
5683         tcg_out_movext1(s, mov);
5684         break;
5685     default:
5686         g_assert_not_reached();
5687     }
5688 }
5689 
5690 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5691                                     TCGType type, tcg_target_long imm,
5692                                     const TCGLdstHelperParam *parm)
5693 {
5694     if (arg_slot_reg_p(slot)) {
5695         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5696     } else {
5697         int ofs = tcg_out_helper_stk_ofs(type, slot);
5698         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5699             tcg_debug_assert(parm->ntmp != 0);
5700             tcg_out_movi(s, type, parm->tmp[0], imm);
5701             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5702         }
5703     }
5704 }
5705 
5706 static void tcg_out_helper_load_common_args(TCGContext *s,
5707                                             const TCGLabelQemuLdst *ldst,
5708                                             const TCGLdstHelperParam *parm,
5709                                             const TCGHelperInfo *info,
5710                                             unsigned next_arg)
5711 {
5712     TCGMovExtend ptr_mov = {
5713         .dst_type = TCG_TYPE_PTR,
5714         .src_type = TCG_TYPE_PTR,
5715         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5716     };
5717     const TCGCallArgumentLoc *loc = &info->in[0];
5718     TCGType type;
5719     unsigned slot;
5720     tcg_target_ulong imm;
5721 
5722     /*
5723      * Handle env, which is always first.
5724      */
5725     ptr_mov.dst = loc->arg_slot;
5726     ptr_mov.src = TCG_AREG0;
5727     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5728 
5729     /*
5730      * Handle oi.
5731      */
5732     imm = ldst->oi;
5733     loc = &info->in[next_arg];
5734     type = TCG_TYPE_I32;
5735     switch (loc->kind) {
5736     case TCG_CALL_ARG_NORMAL:
5737         break;
5738     case TCG_CALL_ARG_EXTEND_U:
5739     case TCG_CALL_ARG_EXTEND_S:
5740         /* No extension required for MemOpIdx. */
5741         tcg_debug_assert(imm <= INT32_MAX);
5742         type = TCG_TYPE_REG;
5743         break;
5744     default:
5745         g_assert_not_reached();
5746     }
5747     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5748     next_arg++;
5749 
5750     /*
5751      * Handle ra.
5752      */
5753     loc = &info->in[next_arg];
5754     slot = loc->arg_slot;
5755     if (parm->ra_gen) {
5756         int arg_reg = -1;
5757         TCGReg ra_reg;
5758 
5759         if (arg_slot_reg_p(slot)) {
5760             arg_reg = tcg_target_call_iarg_regs[slot];
5761         }
5762         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5763 
5764         ptr_mov.dst = slot;
5765         ptr_mov.src = ra_reg;
5766         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5767     } else {
5768         imm = (uintptr_t)ldst->raddr;
5769         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5770     }
5771 }
5772 
5773 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5774                                        const TCGCallArgumentLoc *loc,
5775                                        TCGType dst_type, TCGType src_type,
5776                                        TCGReg lo, TCGReg hi)
5777 {
5778     MemOp reg_mo;
5779 
5780     if (dst_type <= TCG_TYPE_REG) {
5781         MemOp src_ext;
5782 
5783         switch (loc->kind) {
5784         case TCG_CALL_ARG_NORMAL:
5785             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5786             break;
5787         case TCG_CALL_ARG_EXTEND_U:
5788             dst_type = TCG_TYPE_REG;
5789             src_ext = MO_UL;
5790             break;
5791         case TCG_CALL_ARG_EXTEND_S:
5792             dst_type = TCG_TYPE_REG;
5793             src_ext = MO_SL;
5794             break;
5795         default:
5796             g_assert_not_reached();
5797         }
5798 
5799         mov[0].dst = loc->arg_slot;
5800         mov[0].dst_type = dst_type;
5801         mov[0].src = lo;
5802         mov[0].src_type = src_type;
5803         mov[0].src_ext = src_ext;
5804         return 1;
5805     }
5806 
5807     if (TCG_TARGET_REG_BITS == 32) {
5808         assert(dst_type == TCG_TYPE_I64);
5809         reg_mo = MO_32;
5810     } else {
5811         assert(dst_type == TCG_TYPE_I128);
5812         reg_mo = MO_64;
5813     }
5814 
5815     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5816     mov[0].src = lo;
5817     mov[0].dst_type = TCG_TYPE_REG;
5818     mov[0].src_type = TCG_TYPE_REG;
5819     mov[0].src_ext = reg_mo;
5820 
5821     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5822     mov[1].src = hi;
5823     mov[1].dst_type = TCG_TYPE_REG;
5824     mov[1].src_type = TCG_TYPE_REG;
5825     mov[1].src_ext = reg_mo;
5826 
5827     return 2;
5828 }
5829 
5830 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5831                                    const TCGLdstHelperParam *parm)
5832 {
5833     const TCGHelperInfo *info;
5834     const TCGCallArgumentLoc *loc;
5835     TCGMovExtend mov[2];
5836     unsigned next_arg, nmov;
5837     MemOp mop = get_memop(ldst->oi);
5838 
5839     switch (mop & MO_SIZE) {
5840     case MO_8:
5841     case MO_16:
5842     case MO_32:
5843         info = &info_helper_ld32_mmu;
5844         break;
5845     case MO_64:
5846         info = &info_helper_ld64_mmu;
5847         break;
5848     case MO_128:
5849         info = &info_helper_ld128_mmu;
5850         break;
5851     default:
5852         g_assert_not_reached();
5853     }
5854 
5855     /* Defer env argument. */
5856     next_arg = 1;
5857 
5858     loc = &info->in[next_arg];
5859     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5860         /*
5861          * 32-bit host with 32-bit guest: zero-extend the guest address
5862          * to 64-bits for the helper by storing the low part, then
5863          * load a zero for the high part.
5864          */
5865         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5866                                TCG_TYPE_I32, TCG_TYPE_I32,
5867                                ldst->addrlo_reg, -1);
5868         tcg_out_helper_load_slots(s, 1, mov, parm);
5869 
5870         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5871                                 TCG_TYPE_I32, 0, parm);
5872         next_arg += 2;
5873     } else {
5874         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5875                                       ldst->addrlo_reg, ldst->addrhi_reg);
5876         tcg_out_helper_load_slots(s, nmov, mov, parm);
5877         next_arg += nmov;
5878     }
5879 
5880     switch (info->out_kind) {
5881     case TCG_CALL_RET_NORMAL:
5882     case TCG_CALL_RET_BY_VEC:
5883         break;
5884     case TCG_CALL_RET_BY_REF:
5885         /*
5886          * The return reference is in the first argument slot.
5887          * We need memory in which to return: re-use the top of stack.
5888          */
5889         {
5890             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5891 
5892             if (arg_slot_reg_p(0)) {
5893                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5894                                  TCG_REG_CALL_STACK, ofs_slot0);
5895             } else {
5896                 tcg_debug_assert(parm->ntmp != 0);
5897                 tcg_out_addi_ptr(s, parm->tmp[0],
5898                                  TCG_REG_CALL_STACK, ofs_slot0);
5899                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5900                            TCG_REG_CALL_STACK, ofs_slot0);
5901             }
5902         }
5903         break;
5904     default:
5905         g_assert_not_reached();
5906     }
5907 
5908     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5909 }
5910 
5911 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5912                                   bool load_sign,
5913                                   const TCGLdstHelperParam *parm)
5914 {
5915     MemOp mop = get_memop(ldst->oi);
5916     TCGMovExtend mov[2];
5917     int ofs_slot0;
5918 
5919     switch (ldst->type) {
5920     case TCG_TYPE_I64:
5921         if (TCG_TARGET_REG_BITS == 32) {
5922             break;
5923         }
5924         /* fall through */
5925 
5926     case TCG_TYPE_I32:
5927         mov[0].dst = ldst->datalo_reg;
5928         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5929         mov[0].dst_type = ldst->type;
5930         mov[0].src_type = TCG_TYPE_REG;
5931 
5932         /*
5933          * If load_sign, then we allowed the helper to perform the
5934          * appropriate sign extension to tcg_target_ulong, and all
5935          * we need now is a plain move.
5936          *
5937          * If they do not, then we expect the relevant extension
5938          * instruction to be no more expensive than a move, and
5939          * we thus save the icache etc by only using one of two
5940          * helper functions.
5941          */
5942         if (load_sign || !(mop & MO_SIGN)) {
5943             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5944                 mov[0].src_ext = MO_32;
5945             } else {
5946                 mov[0].src_ext = MO_64;
5947             }
5948         } else {
5949             mov[0].src_ext = mop & MO_SSIZE;
5950         }
5951         tcg_out_movext1(s, mov);
5952         return;
5953 
5954     case TCG_TYPE_I128:
5955         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5956         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5957         switch (TCG_TARGET_CALL_RET_I128) {
5958         case TCG_CALL_RET_NORMAL:
5959             break;
5960         case TCG_CALL_RET_BY_VEC:
5961             tcg_out_st(s, TCG_TYPE_V128,
5962                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5963                        TCG_REG_CALL_STACK, ofs_slot0);
5964             /* fall through */
5965         case TCG_CALL_RET_BY_REF:
5966             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5967                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5968             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5969                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5970             return;
5971         default:
5972             g_assert_not_reached();
5973         }
5974         break;
5975 
5976     default:
5977         g_assert_not_reached();
5978     }
5979 
5980     mov[0].dst = ldst->datalo_reg;
5981     mov[0].src =
5982         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5983     mov[0].dst_type = TCG_TYPE_REG;
5984     mov[0].src_type = TCG_TYPE_REG;
5985     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5986 
5987     mov[1].dst = ldst->datahi_reg;
5988     mov[1].src =
5989         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5990     mov[1].dst_type = TCG_TYPE_REG;
5991     mov[1].src_type = TCG_TYPE_REG;
5992     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5993 
5994     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5995 }
5996 
5997 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5998                                    const TCGLdstHelperParam *parm)
5999 {
6000     const TCGHelperInfo *info;
6001     const TCGCallArgumentLoc *loc;
6002     TCGMovExtend mov[4];
6003     TCGType data_type;
6004     unsigned next_arg, nmov, n;
6005     MemOp mop = get_memop(ldst->oi);
6006 
6007     switch (mop & MO_SIZE) {
6008     case MO_8:
6009     case MO_16:
6010     case MO_32:
6011         info = &info_helper_st32_mmu;
6012         data_type = TCG_TYPE_I32;
6013         break;
6014     case MO_64:
6015         info = &info_helper_st64_mmu;
6016         data_type = TCG_TYPE_I64;
6017         break;
6018     case MO_128:
6019         info = &info_helper_st128_mmu;
6020         data_type = TCG_TYPE_I128;
6021         break;
6022     default:
6023         g_assert_not_reached();
6024     }
6025 
6026     /* Defer env argument. */
6027     next_arg = 1;
6028     nmov = 0;
6029 
6030     /* Handle addr argument. */
6031     loc = &info->in[next_arg];
6032     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6033         /*
6034          * 32-bit host with 32-bit guest: zero-extend the guest address
6035          * to 64-bits for the helper by storing the low part.  Later,
6036          * after we have processed the register inputs, we will load a
6037          * zero for the high part.
6038          */
6039         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6040                                TCG_TYPE_I32, TCG_TYPE_I32,
6041                                ldst->addrlo_reg, -1);
6042         next_arg += 2;
6043         nmov += 1;
6044     } else {
6045         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6046                                    ldst->addrlo_reg, ldst->addrhi_reg);
6047         next_arg += n;
6048         nmov += n;
6049     }
6050 
6051     /* Handle data argument. */
6052     loc = &info->in[next_arg];
6053     switch (loc->kind) {
6054     case TCG_CALL_ARG_NORMAL:
6055     case TCG_CALL_ARG_EXTEND_U:
6056     case TCG_CALL_ARG_EXTEND_S:
6057         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6058                                    ldst->datalo_reg, ldst->datahi_reg);
6059         next_arg += n;
6060         nmov += n;
6061         tcg_out_helper_load_slots(s, nmov, mov, parm);
6062         break;
6063 
6064     case TCG_CALL_ARG_BY_REF:
6065         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6066         tcg_debug_assert(data_type == TCG_TYPE_I128);
6067         tcg_out_st(s, TCG_TYPE_I64,
6068                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6069                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6070         tcg_out_st(s, TCG_TYPE_I64,
6071                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6072                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6073 
6074         tcg_out_helper_load_slots(s, nmov, mov, parm);
6075 
6076         if (arg_slot_reg_p(loc->arg_slot)) {
6077             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6078                              TCG_REG_CALL_STACK,
6079                              arg_slot_stk_ofs(loc->ref_slot));
6080         } else {
6081             tcg_debug_assert(parm->ntmp != 0);
6082             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6083                              arg_slot_stk_ofs(loc->ref_slot));
6084             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6085                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6086         }
6087         next_arg += 2;
6088         break;
6089 
6090     default:
6091         g_assert_not_reached();
6092     }
6093 
6094     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6095         /* Zero extend the address by loading a zero for the high part. */
6096         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6097         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6098     }
6099 
6100     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6101 }
6102 
6103 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6104 {
6105     int i, start_words, num_insns;
6106     TCGOp *op;
6107 
6108     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6109                  && qemu_log_in_addr_range(pc_start))) {
6110         FILE *logfile = qemu_log_trylock();
6111         if (logfile) {
6112             fprintf(logfile, "OP:\n");
6113             tcg_dump_ops(s, logfile, false);
6114             fprintf(logfile, "\n");
6115             qemu_log_unlock(logfile);
6116         }
6117     }
6118 
6119 #ifdef CONFIG_DEBUG_TCG
6120     /* Ensure all labels referenced have been emitted.  */
6121     {
6122         TCGLabel *l;
6123         bool error = false;
6124 
6125         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6126             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6127                 qemu_log_mask(CPU_LOG_TB_OP,
6128                               "$L%d referenced but not present.\n", l->id);
6129                 error = true;
6130             }
6131         }
6132         assert(!error);
6133     }
6134 #endif
6135 
6136     tcg_optimize(s);
6137 
6138     reachable_code_pass(s);
6139     liveness_pass_0(s);
6140     liveness_pass_1(s);
6141 
6142     if (s->nb_indirects > 0) {
6143         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6144                      && qemu_log_in_addr_range(pc_start))) {
6145             FILE *logfile = qemu_log_trylock();
6146             if (logfile) {
6147                 fprintf(logfile, "OP before indirect lowering:\n");
6148                 tcg_dump_ops(s, logfile, false);
6149                 fprintf(logfile, "\n");
6150                 qemu_log_unlock(logfile);
6151             }
6152         }
6153 
6154         /* Replace indirect temps with direct temps.  */
6155         if (liveness_pass_2(s)) {
6156             /* If changes were made, re-run liveness.  */
6157             liveness_pass_1(s);
6158         }
6159     }
6160 
6161     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6162                  && qemu_log_in_addr_range(pc_start))) {
6163         FILE *logfile = qemu_log_trylock();
6164         if (logfile) {
6165             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6166             tcg_dump_ops(s, logfile, true);
6167             fprintf(logfile, "\n");
6168             qemu_log_unlock(logfile);
6169         }
6170     }
6171 
6172     /* Initialize goto_tb jump offsets. */
6173     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6174     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6175     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6176     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6177 
6178     tcg_reg_alloc_start(s);
6179 
6180     /*
6181      * Reset the buffer pointers when restarting after overflow.
6182      * TODO: Move this into translate-all.c with the rest of the
6183      * buffer management.  Having only this done here is confusing.
6184      */
6185     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6186     s->code_ptr = s->code_buf;
6187 
6188 #ifdef TCG_TARGET_NEED_LDST_LABELS
6189     QSIMPLEQ_INIT(&s->ldst_labels);
6190 #endif
6191 #ifdef TCG_TARGET_NEED_POOL_LABELS
6192     s->pool_labels = NULL;
6193 #endif
6194 
6195     start_words = s->insn_start_words;
6196     s->gen_insn_data =
6197         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6198 
6199     tcg_out_tb_start(s);
6200 
6201     num_insns = -1;
6202     QTAILQ_FOREACH(op, &s->ops, link) {
6203         TCGOpcode opc = op->opc;
6204 
6205         switch (opc) {
6206         case INDEX_op_mov_i32:
6207         case INDEX_op_mov_i64:
6208         case INDEX_op_mov_vec:
6209             tcg_reg_alloc_mov(s, op);
6210             break;
6211         case INDEX_op_dup_vec:
6212             tcg_reg_alloc_dup(s, op);
6213             break;
6214         case INDEX_op_insn_start:
6215             if (num_insns >= 0) {
6216                 size_t off = tcg_current_code_size(s);
6217                 s->gen_insn_end_off[num_insns] = off;
6218                 /* Assert that we do not overflow our stored offset.  */
6219                 assert(s->gen_insn_end_off[num_insns] == off);
6220             }
6221             num_insns++;
6222             for (i = 0; i < start_words; ++i) {
6223                 s->gen_insn_data[num_insns * start_words + i] =
6224                     tcg_get_insn_start_param(op, i);
6225             }
6226             break;
6227         case INDEX_op_discard:
6228             temp_dead(s, arg_temp(op->args[0]));
6229             break;
6230         case INDEX_op_set_label:
6231             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6232             tcg_out_label(s, arg_label(op->args[0]));
6233             break;
6234         case INDEX_op_call:
6235             tcg_reg_alloc_call(s, op);
6236             break;
6237         case INDEX_op_exit_tb:
6238             tcg_out_exit_tb(s, op->args[0]);
6239             break;
6240         case INDEX_op_goto_tb:
6241             tcg_out_goto_tb(s, op->args[0]);
6242             break;
6243         case INDEX_op_dup2_vec:
6244             if (tcg_reg_alloc_dup2(s, op)) {
6245                 break;
6246             }
6247             /* fall through */
6248         default:
6249             /* Sanity check that we've not introduced any unhandled opcodes. */
6250             tcg_debug_assert(tcg_op_supported(opc));
6251             /* Note: in order to speed up the code, it would be much
6252                faster to have specialized register allocator functions for
6253                some common argument patterns */
6254             tcg_reg_alloc_op(s, op);
6255             break;
6256         }
6257         /* Test for (pending) buffer overflow.  The assumption is that any
6258            one operation beginning below the high water mark cannot overrun
6259            the buffer completely.  Thus we can test for overflow after
6260            generating code without having to check during generation.  */
6261         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6262             return -1;
6263         }
6264         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6265         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6266             return -2;
6267         }
6268     }
6269     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6270     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6271 
6272     /* Generate TB finalization at the end of block */
6273 #ifdef TCG_TARGET_NEED_LDST_LABELS
6274     i = tcg_out_ldst_finalize(s);
6275     if (i < 0) {
6276         return i;
6277     }
6278 #endif
6279 #ifdef TCG_TARGET_NEED_POOL_LABELS
6280     i = tcg_out_pool_finalize(s);
6281     if (i < 0) {
6282         return i;
6283     }
6284 #endif
6285     if (!tcg_resolve_relocs(s)) {
6286         return -2;
6287     }
6288 
6289 #ifndef CONFIG_TCG_INTERPRETER
6290     /* flush instruction cache */
6291     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6292                         (uintptr_t)s->code_buf,
6293                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6294 #endif
6295 
6296     return tcg_current_code_size(s);
6297 }
6298 
6299 #ifdef ELF_HOST_MACHINE
6300 /* In order to use this feature, the backend needs to do three things:
6301 
6302    (1) Define ELF_HOST_MACHINE to indicate both what value to
6303        put into the ELF image and to indicate support for the feature.
6304 
6305    (2) Define tcg_register_jit.  This should create a buffer containing
6306        the contents of a .debug_frame section that describes the post-
6307        prologue unwind info for the tcg machine.
6308 
6309    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6310 */
6311 
6312 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6313 typedef enum {
6314     JIT_NOACTION = 0,
6315     JIT_REGISTER_FN,
6316     JIT_UNREGISTER_FN
6317 } jit_actions_t;
6318 
6319 struct jit_code_entry {
6320     struct jit_code_entry *next_entry;
6321     struct jit_code_entry *prev_entry;
6322     const void *symfile_addr;
6323     uint64_t symfile_size;
6324 };
6325 
6326 struct jit_descriptor {
6327     uint32_t version;
6328     uint32_t action_flag;
6329     struct jit_code_entry *relevant_entry;
6330     struct jit_code_entry *first_entry;
6331 };
6332 
6333 void __jit_debug_register_code(void) __attribute__((noinline));
6334 void __jit_debug_register_code(void)
6335 {
6336     asm("");
6337 }
6338 
6339 /* Must statically initialize the version, because GDB may check
6340    the version before we can set it.  */
6341 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6342 
6343 /* End GDB interface.  */
6344 
6345 static int find_string(const char *strtab, const char *str)
6346 {
6347     const char *p = strtab + 1;
6348 
6349     while (1) {
6350         if (strcmp(p, str) == 0) {
6351             return p - strtab;
6352         }
6353         p += strlen(p) + 1;
6354     }
6355 }
6356 
6357 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6358                                  const void *debug_frame,
6359                                  size_t debug_frame_size)
6360 {
6361     struct __attribute__((packed)) DebugInfo {
6362         uint32_t  len;
6363         uint16_t  version;
6364         uint32_t  abbrev;
6365         uint8_t   ptr_size;
6366         uint8_t   cu_die;
6367         uint16_t  cu_lang;
6368         uintptr_t cu_low_pc;
6369         uintptr_t cu_high_pc;
6370         uint8_t   fn_die;
6371         char      fn_name[16];
6372         uintptr_t fn_low_pc;
6373         uintptr_t fn_high_pc;
6374         uint8_t   cu_eoc;
6375     };
6376 
6377     struct ElfImage {
6378         ElfW(Ehdr) ehdr;
6379         ElfW(Phdr) phdr;
6380         ElfW(Shdr) shdr[7];
6381         ElfW(Sym)  sym[2];
6382         struct DebugInfo di;
6383         uint8_t    da[24];
6384         char       str[80];
6385     };
6386 
6387     struct ElfImage *img;
6388 
6389     static const struct ElfImage img_template = {
6390         .ehdr = {
6391             .e_ident[EI_MAG0] = ELFMAG0,
6392             .e_ident[EI_MAG1] = ELFMAG1,
6393             .e_ident[EI_MAG2] = ELFMAG2,
6394             .e_ident[EI_MAG3] = ELFMAG3,
6395             .e_ident[EI_CLASS] = ELF_CLASS,
6396             .e_ident[EI_DATA] = ELF_DATA,
6397             .e_ident[EI_VERSION] = EV_CURRENT,
6398             .e_type = ET_EXEC,
6399             .e_machine = ELF_HOST_MACHINE,
6400             .e_version = EV_CURRENT,
6401             .e_phoff = offsetof(struct ElfImage, phdr),
6402             .e_shoff = offsetof(struct ElfImage, shdr),
6403             .e_ehsize = sizeof(ElfW(Shdr)),
6404             .e_phentsize = sizeof(ElfW(Phdr)),
6405             .e_phnum = 1,
6406             .e_shentsize = sizeof(ElfW(Shdr)),
6407             .e_shnum = ARRAY_SIZE(img->shdr),
6408             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6409 #ifdef ELF_HOST_FLAGS
6410             .e_flags = ELF_HOST_FLAGS,
6411 #endif
6412 #ifdef ELF_OSABI
6413             .e_ident[EI_OSABI] = ELF_OSABI,
6414 #endif
6415         },
6416         .phdr = {
6417             .p_type = PT_LOAD,
6418             .p_flags = PF_X,
6419         },
6420         .shdr = {
6421             [0] = { .sh_type = SHT_NULL },
6422             /* Trick: The contents of code_gen_buffer are not present in
6423                this fake ELF file; that got allocated elsewhere.  Therefore
6424                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6425                will not look for contents.  We can record any address.  */
6426             [1] = { /* .text */
6427                 .sh_type = SHT_NOBITS,
6428                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6429             },
6430             [2] = { /* .debug_info */
6431                 .sh_type = SHT_PROGBITS,
6432                 .sh_offset = offsetof(struct ElfImage, di),
6433                 .sh_size = sizeof(struct DebugInfo),
6434             },
6435             [3] = { /* .debug_abbrev */
6436                 .sh_type = SHT_PROGBITS,
6437                 .sh_offset = offsetof(struct ElfImage, da),
6438                 .sh_size = sizeof(img->da),
6439             },
6440             [4] = { /* .debug_frame */
6441                 .sh_type = SHT_PROGBITS,
6442                 .sh_offset = sizeof(struct ElfImage),
6443             },
6444             [5] = { /* .symtab */
6445                 .sh_type = SHT_SYMTAB,
6446                 .sh_offset = offsetof(struct ElfImage, sym),
6447                 .sh_size = sizeof(img->sym),
6448                 .sh_info = 1,
6449                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6450                 .sh_entsize = sizeof(ElfW(Sym)),
6451             },
6452             [6] = { /* .strtab */
6453                 .sh_type = SHT_STRTAB,
6454                 .sh_offset = offsetof(struct ElfImage, str),
6455                 .sh_size = sizeof(img->str),
6456             }
6457         },
6458         .sym = {
6459             [1] = { /* code_gen_buffer */
6460                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6461                 .st_shndx = 1,
6462             }
6463         },
6464         .di = {
6465             .len = sizeof(struct DebugInfo) - 4,
6466             .version = 2,
6467             .ptr_size = sizeof(void *),
6468             .cu_die = 1,
6469             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6470             .fn_die = 2,
6471             .fn_name = "code_gen_buffer"
6472         },
6473         .da = {
6474             1,          /* abbrev number (the cu) */
6475             0x11, 1,    /* DW_TAG_compile_unit, has children */
6476             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6477             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6478             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6479             0, 0,       /* end of abbrev */
6480             2,          /* abbrev number (the fn) */
6481             0x2e, 0,    /* DW_TAG_subprogram, no children */
6482             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6483             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6484             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6485             0, 0,       /* end of abbrev */
6486             0           /* no more abbrev */
6487         },
6488         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6489                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6490     };
6491 
6492     /* We only need a single jit entry; statically allocate it.  */
6493     static struct jit_code_entry one_entry;
6494 
6495     uintptr_t buf = (uintptr_t)buf_ptr;
6496     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6497     DebugFrameHeader *dfh;
6498 
6499     img = g_malloc(img_size);
6500     *img = img_template;
6501 
6502     img->phdr.p_vaddr = buf;
6503     img->phdr.p_paddr = buf;
6504     img->phdr.p_memsz = buf_size;
6505 
6506     img->shdr[1].sh_name = find_string(img->str, ".text");
6507     img->shdr[1].sh_addr = buf;
6508     img->shdr[1].sh_size = buf_size;
6509 
6510     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6511     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6512 
6513     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6514     img->shdr[4].sh_size = debug_frame_size;
6515 
6516     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6517     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6518 
6519     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6520     img->sym[1].st_value = buf;
6521     img->sym[1].st_size = buf_size;
6522 
6523     img->di.cu_low_pc = buf;
6524     img->di.cu_high_pc = buf + buf_size;
6525     img->di.fn_low_pc = buf;
6526     img->di.fn_high_pc = buf + buf_size;
6527 
6528     dfh = (DebugFrameHeader *)(img + 1);
6529     memcpy(dfh, debug_frame, debug_frame_size);
6530     dfh->fde.func_start = buf;
6531     dfh->fde.func_len = buf_size;
6532 
6533 #ifdef DEBUG_JIT
6534     /* Enable this block to be able to debug the ELF image file creation.
6535        One can use readelf, objdump, or other inspection utilities.  */
6536     {
6537         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6538         FILE *f = fopen(jit, "w+b");
6539         if (f) {
6540             if (fwrite(img, img_size, 1, f) != img_size) {
6541                 /* Avoid stupid unused return value warning for fwrite.  */
6542             }
6543             fclose(f);
6544         }
6545     }
6546 #endif
6547 
6548     one_entry.symfile_addr = img;
6549     one_entry.symfile_size = img_size;
6550 
6551     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6552     __jit_debug_descriptor.relevant_entry = &one_entry;
6553     __jit_debug_descriptor.first_entry = &one_entry;
6554     __jit_debug_register_code();
6555 }
6556 #else
6557 /* No support for the feature.  Provide the entry point expected by exec.c,
6558    and implement the internal function we declared earlier.  */
6559 
6560 static void tcg_register_jit_int(const void *buf, size_t size,
6561                                  const void *debug_frame,
6562                                  size_t debug_frame_size)
6563 {
6564 }
6565 
6566 void tcg_register_jit(const void *buf, size_t buf_size)
6567 {
6568 }
6569 #endif /* ELF_HOST_MACHINE */
6570 
6571 #if !TCG_TARGET_MAYBE_vec
6572 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6573 {
6574     g_assert_not_reached();
6575 }
6576 #endif
6577