xref: /openbmc/qemu/tcg/tcg.c (revision 83a0ad26737b9bca3b09fc8d27163ef6a0f28bd9)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #ifdef CONFIG_USER_ONLY
60 #include "user/guest-base.h"
61 #endif
62 
63 /* Forward declarations for functions declared in tcg-target.c.inc and
64    used here. */
65 static void tcg_target_init(TCGContext *s);
66 static void tcg_target_qemu_prologue(TCGContext *s);
67 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
68                         intptr_t value, intptr_t addend);
69 
70 /* The CIE and FDE header definitions will be common to all hosts.  */
71 typedef struct {
72     uint32_t len __attribute__((aligned((sizeof(void *)))));
73     uint32_t id;
74     uint8_t version;
75     char augmentation[1];
76     uint8_t code_align;
77     uint8_t data_align;
78     uint8_t return_column;
79 } DebugFrameCIE;
80 
81 typedef struct QEMU_PACKED {
82     uint32_t len __attribute__((aligned((sizeof(void *)))));
83     uint32_t cie_offset;
84     uintptr_t func_start;
85     uintptr_t func_len;
86 } DebugFrameFDEHeader;
87 
88 typedef struct QEMU_PACKED {
89     DebugFrameCIE cie;
90     DebugFrameFDEHeader fde;
91 } DebugFrameHeader;
92 
93 typedef struct TCGLabelQemuLdst {
94     bool is_ld;             /* qemu_ld: true, qemu_st: false */
95     MemOpIdx oi;
96     TCGType type;           /* result type of a load */
97     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
98     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
99     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
100     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
101     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
102     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
103     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
104 } TCGLabelQemuLdst;
105 
106 static void tcg_register_jit_int(const void *buf, size_t size,
107                                  const void *debug_frame,
108                                  size_t debug_frame_size)
109     __attribute__((unused));
110 
111 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
112 static void tcg_out_tb_start(TCGContext *s);
113 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
114                        intptr_t arg2);
115 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
116 static void tcg_out_movi(TCGContext *s, TCGType type,
117                          TCGReg ret, tcg_target_long arg);
118 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
120 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
121 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
122 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
123 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
128 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
129 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
130 static void tcg_out_goto_tb(TCGContext *s, int which);
131 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
132                        const TCGArg args[TCG_MAX_OP_ARGS],
133                        const int const_args[TCG_MAX_OP_ARGS]);
134 #if TCG_TARGET_MAYBE_vec
135 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
136                             TCGReg dst, TCGReg src);
137 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
138                              TCGReg dst, TCGReg base, intptr_t offset);
139 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
140                              TCGReg dst, int64_t arg);
141 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
142                            unsigned vecl, unsigned vece,
143                            const TCGArg args[TCG_MAX_OP_ARGS],
144                            const int const_args[TCG_MAX_OP_ARGS]);
145 #else
146 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
147                                    TCGReg dst, TCGReg src)
148 {
149     g_assert_not_reached();
150 }
151 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
152                                     TCGReg dst, TCGReg base, intptr_t offset)
153 {
154     g_assert_not_reached();
155 }
156 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, int64_t arg)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
162                                   unsigned vecl, unsigned vece,
163                                   const TCGArg args[TCG_MAX_OP_ARGS],
164                                   const int const_args[TCG_MAX_OP_ARGS])
165 {
166     g_assert_not_reached();
167 }
168 #endif
169 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
170                        intptr_t arg2);
171 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
172                         TCGReg base, intptr_t ofs);
173 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
174                          const TCGHelperInfo *info);
175 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
176 static bool tcg_target_const_match(int64_t val, int ct,
177                                    TCGType type, TCGCond cond, int vece);
178 #ifdef TCG_TARGET_NEED_LDST_LABELS
179 static int tcg_out_ldst_finalize(TCGContext *s);
180 #endif
181 
182 #ifndef CONFIG_USER_ONLY
183 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
184 #endif
185 
186 typedef struct TCGLdstHelperParam {
187     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
188     unsigned ntmp;
189     int tmp[3];
190 } TCGLdstHelperParam;
191 
192 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
193                                    const TCGLdstHelperParam *p)
194     __attribute__((unused));
195 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
196                                   bool load_sign, const TCGLdstHelperParam *p)
197     __attribute__((unused));
198 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 
202 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
203     [MO_UB] = helper_ldub_mmu,
204     [MO_SB] = helper_ldsb_mmu,
205     [MO_UW] = helper_lduw_mmu,
206     [MO_SW] = helper_ldsw_mmu,
207     [MO_UL] = helper_ldul_mmu,
208     [MO_UQ] = helper_ldq_mmu,
209 #if TCG_TARGET_REG_BITS == 64
210     [MO_SL] = helper_ldsl_mmu,
211     [MO_128] = helper_ld16_mmu,
212 #endif
213 };
214 
215 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
216     [MO_8]  = helper_stb_mmu,
217     [MO_16] = helper_stw_mmu,
218     [MO_32] = helper_stl_mmu,
219     [MO_64] = helper_stq_mmu,
220 #if TCG_TARGET_REG_BITS == 64
221     [MO_128] = helper_st16_mmu,
222 #endif
223 };
224 
225 typedef struct {
226     MemOp atom;   /* lg2 bits of atomicity required */
227     MemOp align;  /* lg2 bits of alignment to use */
228 } TCGAtomAlign;
229 
230 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
231                                            MemOp host_atom, bool allow_two_ops)
232     __attribute__((unused));
233 
234 #ifdef CONFIG_USER_ONLY
235 bool tcg_use_softmmu;
236 #endif
237 
238 TCGContext tcg_init_ctx;
239 __thread TCGContext *tcg_ctx;
240 
241 TCGContext **tcg_ctxs;
242 unsigned int tcg_cur_ctxs;
243 unsigned int tcg_max_ctxs;
244 TCGv_env tcg_env;
245 const void *tcg_code_gen_epilogue;
246 uintptr_t tcg_splitwx_diff;
247 
248 #ifndef CONFIG_TCG_INTERPRETER
249 tcg_prologue_fn *tcg_qemu_tb_exec;
250 #endif
251 
252 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
253 static TCGRegSet tcg_target_call_clobber_regs;
254 
255 #if TCG_TARGET_INSN_UNIT_SIZE == 1
256 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
257 {
258     *s->code_ptr++ = v;
259 }
260 
261 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
262                                                       uint8_t v)
263 {
264     *p = v;
265 }
266 #endif
267 
268 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
269 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
270 {
271     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
272         *s->code_ptr++ = v;
273     } else {
274         tcg_insn_unit *p = s->code_ptr;
275         memcpy(p, &v, sizeof(v));
276         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
277     }
278 }
279 
280 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
281                                                        uint16_t v)
282 {
283     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
284         *p = v;
285     } else {
286         memcpy(p, &v, sizeof(v));
287     }
288 }
289 #endif
290 
291 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
292 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
293 {
294     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
295         *s->code_ptr++ = v;
296     } else {
297         tcg_insn_unit *p = s->code_ptr;
298         memcpy(p, &v, sizeof(v));
299         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
300     }
301 }
302 
303 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
304                                                        uint32_t v)
305 {
306     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
307         *p = v;
308     } else {
309         memcpy(p, &v, sizeof(v));
310     }
311 }
312 #endif
313 
314 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
315 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
316 {
317     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
318         *s->code_ptr++ = v;
319     } else {
320         tcg_insn_unit *p = s->code_ptr;
321         memcpy(p, &v, sizeof(v));
322         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
323     }
324 }
325 
326 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
327                                                        uint64_t v)
328 {
329     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
330         *p = v;
331     } else {
332         memcpy(p, &v, sizeof(v));
333     }
334 }
335 #endif
336 
337 /* label relocation processing */
338 
339 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
340                           TCGLabel *l, intptr_t addend)
341 {
342     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
343 
344     r->type = type;
345     r->ptr = code_ptr;
346     r->addend = addend;
347     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
348 }
349 
350 static void tcg_out_label(TCGContext *s, TCGLabel *l)
351 {
352     tcg_debug_assert(!l->has_value);
353     l->has_value = 1;
354     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
355 }
356 
357 TCGLabel *gen_new_label(void)
358 {
359     TCGContext *s = tcg_ctx;
360     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
361 
362     memset(l, 0, sizeof(TCGLabel));
363     l->id = s->nb_labels++;
364     QSIMPLEQ_INIT(&l->branches);
365     QSIMPLEQ_INIT(&l->relocs);
366 
367     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
368 
369     return l;
370 }
371 
372 static bool tcg_resolve_relocs(TCGContext *s)
373 {
374     TCGLabel *l;
375 
376     QSIMPLEQ_FOREACH(l, &s->labels, next) {
377         TCGRelocation *r;
378         uintptr_t value = l->u.value;
379 
380         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
381             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
382                 return false;
383             }
384         }
385     }
386     return true;
387 }
388 
389 static void set_jmp_reset_offset(TCGContext *s, int which)
390 {
391     /*
392      * We will check for overflow at the end of the opcode loop in
393      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
394      */
395     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
396 }
397 
398 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
399 {
400     /*
401      * We will check for overflow at the end of the opcode loop in
402      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
403      */
404     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
405 }
406 
407 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
408 {
409     /*
410      * Return the read-execute version of the pointer, for the benefit
411      * of any pc-relative addressing mode.
412      */
413     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
414 }
415 
416 static int __attribute__((unused))
417 tlb_mask_table_ofs(TCGContext *s, int which)
418 {
419     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
420             sizeof(CPUNegativeOffsetState));
421 }
422 
423 /* Signal overflow, starting over with fewer guest insns. */
424 static G_NORETURN
425 void tcg_raise_tb_overflow(TCGContext *s)
426 {
427     siglongjmp(s->jmp_trans, -2);
428 }
429 
430 /*
431  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
432  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
433  *
434  * However, tcg_out_helper_load_slots reuses this field to hold an
435  * argument slot number (which may designate a argument register or an
436  * argument stack slot), converting to TCGReg once all arguments that
437  * are destined for the stack are processed.
438  */
439 typedef struct TCGMovExtend {
440     unsigned dst;
441     TCGReg src;
442     TCGType dst_type;
443     TCGType src_type;
444     MemOp src_ext;
445 } TCGMovExtend;
446 
447 /**
448  * tcg_out_movext -- move and extend
449  * @s: tcg context
450  * @dst_type: integral type for destination
451  * @dst: destination register
452  * @src_type: integral type for source
453  * @src_ext: extension to apply to source
454  * @src: source register
455  *
456  * Move or extend @src into @dst, depending on @src_ext and the types.
457  */
458 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
459                            TCGType src_type, MemOp src_ext, TCGReg src)
460 {
461     switch (src_ext) {
462     case MO_UB:
463         tcg_out_ext8u(s, dst, src);
464         break;
465     case MO_SB:
466         tcg_out_ext8s(s, dst_type, dst, src);
467         break;
468     case MO_UW:
469         tcg_out_ext16u(s, dst, src);
470         break;
471     case MO_SW:
472         tcg_out_ext16s(s, dst_type, dst, src);
473         break;
474     case MO_UL:
475     case MO_SL:
476         if (dst_type == TCG_TYPE_I32) {
477             if (src_type == TCG_TYPE_I32) {
478                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
479             } else {
480                 tcg_out_extrl_i64_i32(s, dst, src);
481             }
482         } else if (src_type == TCG_TYPE_I32) {
483             if (src_ext & MO_SIGN) {
484                 tcg_out_exts_i32_i64(s, dst, src);
485             } else {
486                 tcg_out_extu_i32_i64(s, dst, src);
487             }
488         } else {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_ext32s(s, dst, src);
491             } else {
492                 tcg_out_ext32u(s, dst, src);
493             }
494         }
495         break;
496     case MO_UQ:
497         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
498         if (dst_type == TCG_TYPE_I32) {
499             tcg_out_extrl_i64_i32(s, dst, src);
500         } else {
501             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
502         }
503         break;
504     default:
505         g_assert_not_reached();
506     }
507 }
508 
509 /* Minor variations on a theme, using a structure. */
510 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
511                                     TCGReg src)
512 {
513     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
514 }
515 
516 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
517 {
518     tcg_out_movext1_new_src(s, i, i->src);
519 }
520 
521 /**
522  * tcg_out_movext2 -- move and extend two pair
523  * @s: tcg context
524  * @i1: first move description
525  * @i2: second move description
526  * @scratch: temporary register, or -1 for none
527  *
528  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
529  * between the sources and destinations.
530  */
531 
532 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
533                             const TCGMovExtend *i2, int scratch)
534 {
535     TCGReg src1 = i1->src;
536     TCGReg src2 = i2->src;
537 
538     if (i1->dst != src2) {
539         tcg_out_movext1(s, i1);
540         tcg_out_movext1(s, i2);
541         return;
542     }
543     if (i2->dst == src1) {
544         TCGType src1_type = i1->src_type;
545         TCGType src2_type = i2->src_type;
546 
547         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
548             /* The data is now in the correct registers, now extend. */
549             src1 = i2->src;
550             src2 = i1->src;
551         } else {
552             tcg_debug_assert(scratch >= 0);
553             tcg_out_mov(s, src1_type, scratch, src1);
554             src1 = scratch;
555         }
556     }
557     tcg_out_movext1_new_src(s, i2, src2);
558     tcg_out_movext1_new_src(s, i1, src1);
559 }
560 
561 /**
562  * tcg_out_movext3 -- move and extend three pair
563  * @s: tcg context
564  * @i1: first move description
565  * @i2: second move description
566  * @i3: third move description
567  * @scratch: temporary register, or -1 for none
568  *
569  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
570  * between the sources and destinations.
571  */
572 
573 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
574                             const TCGMovExtend *i2, const TCGMovExtend *i3,
575                             int scratch)
576 {
577     TCGReg src1 = i1->src;
578     TCGReg src2 = i2->src;
579     TCGReg src3 = i3->src;
580 
581     if (i1->dst != src2 && i1->dst != src3) {
582         tcg_out_movext1(s, i1);
583         tcg_out_movext2(s, i2, i3, scratch);
584         return;
585     }
586     if (i2->dst != src1 && i2->dst != src3) {
587         tcg_out_movext1(s, i2);
588         tcg_out_movext2(s, i1, i3, scratch);
589         return;
590     }
591     if (i3->dst != src1 && i3->dst != src2) {
592         tcg_out_movext1(s, i3);
593         tcg_out_movext2(s, i1, i2, scratch);
594         return;
595     }
596 
597     /*
598      * There is a cycle.  Since there are only 3 nodes, the cycle is
599      * either "clockwise" or "anti-clockwise", and can be solved with
600      * a single scratch or two xchg.
601      */
602     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
603         /* "Clockwise" */
604         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
605             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
606             /* The data is now in the correct registers, now extend. */
607             tcg_out_movext1_new_src(s, i1, i1->dst);
608             tcg_out_movext1_new_src(s, i2, i2->dst);
609             tcg_out_movext1_new_src(s, i3, i3->dst);
610         } else {
611             tcg_debug_assert(scratch >= 0);
612             tcg_out_mov(s, i1->src_type, scratch, src1);
613             tcg_out_movext1(s, i3);
614             tcg_out_movext1(s, i2);
615             tcg_out_movext1_new_src(s, i1, scratch);
616         }
617     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
618         /* "Anti-clockwise" */
619         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
620             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
621             /* The data is now in the correct registers, now extend. */
622             tcg_out_movext1_new_src(s, i1, i1->dst);
623             tcg_out_movext1_new_src(s, i2, i2->dst);
624             tcg_out_movext1_new_src(s, i3, i3->dst);
625         } else {
626             tcg_debug_assert(scratch >= 0);
627             tcg_out_mov(s, i1->src_type, scratch, src1);
628             tcg_out_movext1(s, i2);
629             tcg_out_movext1(s, i3);
630             tcg_out_movext1_new_src(s, i1, scratch);
631         }
632     } else {
633         g_assert_not_reached();
634     }
635 }
636 
637 #define C_PFX1(P, A)                    P##A
638 #define C_PFX2(P, A, B)                 P##A##_##B
639 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
640 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
641 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
642 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
643 
644 /* Define an enumeration for the various combinations. */
645 
646 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
647 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
648 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
649 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
650 
651 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
652 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
653 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
654 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
655 
656 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
657 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
658 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
659 
660 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
661 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
662 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
663 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
664 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
665 
666 typedef enum {
667 #include "tcg-target-con-set.h"
668 } TCGConstraintSetIndex;
669 
670 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
671 
672 #undef C_O0_I1
673 #undef C_O0_I2
674 #undef C_O0_I3
675 #undef C_O0_I4
676 #undef C_O1_I1
677 #undef C_O1_I2
678 #undef C_O1_I3
679 #undef C_O1_I4
680 #undef C_N1_I2
681 #undef C_N1O1_I1
682 #undef C_N2_I1
683 #undef C_O2_I1
684 #undef C_O2_I2
685 #undef C_O2_I3
686 #undef C_O2_I4
687 #undef C_N1_O1_I4
688 
689 /* Put all of the constraint sets into an array, indexed by the enum. */
690 
691 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
692 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
693 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
694 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
695 
696 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
697 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
698 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
699 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
700 
701 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
702 #define C_N1O1_I1(O1, O2, I1)           { .args_ct_str = { "&" #O1, #O2, #I1 } },
703 #define C_N2_I1(O1, O2, I1)             { .args_ct_str = { "&" #O1, "&" #O2, #I1 } },
704 
705 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
706 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
707 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
708 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
709 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
710 
711 static const TCGTargetOpDef constraint_sets[] = {
712 #include "tcg-target-con-set.h"
713 };
714 
715 
716 #undef C_O0_I1
717 #undef C_O0_I2
718 #undef C_O0_I3
719 #undef C_O0_I4
720 #undef C_O1_I1
721 #undef C_O1_I2
722 #undef C_O1_I3
723 #undef C_O1_I4
724 #undef C_N1_I2
725 #undef C_N1O1_I1
726 #undef C_N2_I1
727 #undef C_O2_I1
728 #undef C_O2_I2
729 #undef C_O2_I3
730 #undef C_O2_I4
731 #undef C_N1_O1_I4
732 
733 /* Expand the enumerator to be returned from tcg_target_op_def(). */
734 
735 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
736 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
737 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
738 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
739 
740 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
741 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
742 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
743 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
744 
745 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
746 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
747 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
748 
749 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
750 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
751 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
752 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
753 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
754 
755 #include "tcg-target.c.inc"
756 
757 #ifndef CONFIG_TCG_INTERPRETER
758 /* Validate CPUTLBDescFast placement. */
759 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
760                         sizeof(CPUNegativeOffsetState))
761                   < MIN_TLB_MASK_TABLE_OFS);
762 #endif
763 
764 static void alloc_tcg_plugin_context(TCGContext *s)
765 {
766 #ifdef CONFIG_PLUGIN
767     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
768     s->plugin_tb->insns =
769         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
770 #endif
771 }
772 
773 /*
774  * All TCG threads except the parent (i.e. the one that called tcg_context_init
775  * and registered the target's TCG globals) must register with this function
776  * before initiating translation.
777  *
778  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
779  * of tcg_region_init() for the reasoning behind this.
780  *
781  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
782  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
783  * is not used anymore for translation once this function is called.
784  *
785  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
786  * iterates over the array (e.g. tcg_code_size() the same for both system/user
787  * modes.
788  */
789 #ifdef CONFIG_USER_ONLY
790 void tcg_register_thread(void)
791 {
792     tcg_ctx = &tcg_init_ctx;
793 }
794 #else
795 void tcg_register_thread(void)
796 {
797     TCGContext *s = g_malloc(sizeof(*s));
798     unsigned int i, n;
799 
800     *s = tcg_init_ctx;
801 
802     /* Relink mem_base.  */
803     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
804         if (tcg_init_ctx.temps[i].mem_base) {
805             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
806             tcg_debug_assert(b >= 0 && b < n);
807             s->temps[i].mem_base = &s->temps[b];
808         }
809     }
810 
811     /* Claim an entry in tcg_ctxs */
812     n = qatomic_fetch_inc(&tcg_cur_ctxs);
813     g_assert(n < tcg_max_ctxs);
814     qatomic_set(&tcg_ctxs[n], s);
815 
816     if (n > 0) {
817         alloc_tcg_plugin_context(s);
818         tcg_region_initial_alloc(s);
819     }
820 
821     tcg_ctx = s;
822 }
823 #endif /* !CONFIG_USER_ONLY */
824 
825 /* pool based memory allocation */
826 void *tcg_malloc_internal(TCGContext *s, int size)
827 {
828     TCGPool *p;
829     int pool_size;
830 
831     if (size > TCG_POOL_CHUNK_SIZE) {
832         /* big malloc: insert a new pool (XXX: could optimize) */
833         p = g_malloc(sizeof(TCGPool) + size);
834         p->size = size;
835         p->next = s->pool_first_large;
836         s->pool_first_large = p;
837         return p->data;
838     } else {
839         p = s->pool_current;
840         if (!p) {
841             p = s->pool_first;
842             if (!p)
843                 goto new_pool;
844         } else {
845             if (!p->next) {
846             new_pool:
847                 pool_size = TCG_POOL_CHUNK_SIZE;
848                 p = g_malloc(sizeof(TCGPool) + pool_size);
849                 p->size = pool_size;
850                 p->next = NULL;
851                 if (s->pool_current) {
852                     s->pool_current->next = p;
853                 } else {
854                     s->pool_first = p;
855                 }
856             } else {
857                 p = p->next;
858             }
859         }
860     }
861     s->pool_current = p;
862     s->pool_cur = p->data + size;
863     s->pool_end = p->data + p->size;
864     return p->data;
865 }
866 
867 void tcg_pool_reset(TCGContext *s)
868 {
869     TCGPool *p, *t;
870     for (p = s->pool_first_large; p; p = t) {
871         t = p->next;
872         g_free(p);
873     }
874     s->pool_first_large = NULL;
875     s->pool_cur = s->pool_end = NULL;
876     s->pool_current = NULL;
877 }
878 
879 /*
880  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
881  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
882  * We only use these for layout in tcg_out_ld_helper_ret and
883  * tcg_out_st_helper_args, and share them between several of
884  * the helpers, with the end result that it's easier to build manually.
885  */
886 
887 #if TCG_TARGET_REG_BITS == 32
888 # define dh_typecode_ttl  dh_typecode_i32
889 #else
890 # define dh_typecode_ttl  dh_typecode_i64
891 #endif
892 
893 static TCGHelperInfo info_helper_ld32_mmu = {
894     .flags = TCG_CALL_NO_WG,
895     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
896               | dh_typemask(env, 1)
897               | dh_typemask(i64, 2)  /* uint64_t addr */
898               | dh_typemask(i32, 3)  /* unsigned oi */
899               | dh_typemask(ptr, 4)  /* uintptr_t ra */
900 };
901 
902 static TCGHelperInfo info_helper_ld64_mmu = {
903     .flags = TCG_CALL_NO_WG,
904     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
905               | dh_typemask(env, 1)
906               | dh_typemask(i64, 2)  /* uint64_t addr */
907               | dh_typemask(i32, 3)  /* unsigned oi */
908               | dh_typemask(ptr, 4)  /* uintptr_t ra */
909 };
910 
911 static TCGHelperInfo info_helper_ld128_mmu = {
912     .flags = TCG_CALL_NO_WG,
913     .typemask = dh_typemask(i128, 0) /* return Int128 */
914               | dh_typemask(env, 1)
915               | dh_typemask(i64, 2)  /* uint64_t addr */
916               | dh_typemask(i32, 3)  /* unsigned oi */
917               | dh_typemask(ptr, 4)  /* uintptr_t ra */
918 };
919 
920 static TCGHelperInfo info_helper_st32_mmu = {
921     .flags = TCG_CALL_NO_WG,
922     .typemask = dh_typemask(void, 0)
923               | dh_typemask(env, 1)
924               | dh_typemask(i64, 2)  /* uint64_t addr */
925               | dh_typemask(i32, 3)  /* uint32_t data */
926               | dh_typemask(i32, 4)  /* unsigned oi */
927               | dh_typemask(ptr, 5)  /* uintptr_t ra */
928 };
929 
930 static TCGHelperInfo info_helper_st64_mmu = {
931     .flags = TCG_CALL_NO_WG,
932     .typemask = dh_typemask(void, 0)
933               | dh_typemask(env, 1)
934               | dh_typemask(i64, 2)  /* uint64_t addr */
935               | dh_typemask(i64, 3)  /* uint64_t data */
936               | dh_typemask(i32, 4)  /* unsigned oi */
937               | dh_typemask(ptr, 5)  /* uintptr_t ra */
938 };
939 
940 static TCGHelperInfo info_helper_st128_mmu = {
941     .flags = TCG_CALL_NO_WG,
942     .typemask = dh_typemask(void, 0)
943               | dh_typemask(env, 1)
944               | dh_typemask(i64, 2)  /* uint64_t addr */
945               | dh_typemask(i128, 3) /* Int128 data */
946               | dh_typemask(i32, 4)  /* unsigned oi */
947               | dh_typemask(ptr, 5)  /* uintptr_t ra */
948 };
949 
950 #ifdef CONFIG_TCG_INTERPRETER
951 static ffi_type *typecode_to_ffi(int argmask)
952 {
953     /*
954      * libffi does not support __int128_t, so we have forced Int128
955      * to use the structure definition instead of the builtin type.
956      */
957     static ffi_type *ffi_type_i128_elements[3] = {
958         &ffi_type_uint64,
959         &ffi_type_uint64,
960         NULL
961     };
962     static ffi_type ffi_type_i128 = {
963         .size = 16,
964         .alignment = __alignof__(Int128),
965         .type = FFI_TYPE_STRUCT,
966         .elements = ffi_type_i128_elements,
967     };
968 
969     switch (argmask) {
970     case dh_typecode_void:
971         return &ffi_type_void;
972     case dh_typecode_i32:
973         return &ffi_type_uint32;
974     case dh_typecode_s32:
975         return &ffi_type_sint32;
976     case dh_typecode_i64:
977         return &ffi_type_uint64;
978     case dh_typecode_s64:
979         return &ffi_type_sint64;
980     case dh_typecode_ptr:
981         return &ffi_type_pointer;
982     case dh_typecode_i128:
983         return &ffi_type_i128;
984     }
985     g_assert_not_reached();
986 }
987 
988 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
989 {
990     unsigned typemask = info->typemask;
991     struct {
992         ffi_cif cif;
993         ffi_type *args[];
994     } *ca;
995     ffi_status status;
996     int nargs;
997 
998     /* Ignoring the return type, find the last non-zero field. */
999     nargs = 32 - clz32(typemask >> 3);
1000     nargs = DIV_ROUND_UP(nargs, 3);
1001     assert(nargs <= MAX_CALL_IARGS);
1002 
1003     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1004     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1005     ca->cif.nargs = nargs;
1006 
1007     if (nargs != 0) {
1008         ca->cif.arg_types = ca->args;
1009         for (int j = 0; j < nargs; ++j) {
1010             int typecode = extract32(typemask, (j + 1) * 3, 3);
1011             ca->args[j] = typecode_to_ffi(typecode);
1012         }
1013     }
1014 
1015     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1016                           ca->cif.rtype, ca->cif.arg_types);
1017     assert(status == FFI_OK);
1018 
1019     return &ca->cif;
1020 }
1021 
1022 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1023 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1024 #else
1025 #define HELPER_INFO_INIT(I)      (&(I)->init)
1026 #define HELPER_INFO_INIT_VAL(I)  1
1027 #endif /* CONFIG_TCG_INTERPRETER */
1028 
1029 static inline bool arg_slot_reg_p(unsigned arg_slot)
1030 {
1031     /*
1032      * Split the sizeof away from the comparison to avoid Werror from
1033      * "unsigned < 0 is always false", when iarg_regs is empty.
1034      */
1035     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1036     return arg_slot < nreg;
1037 }
1038 
1039 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1040 {
1041     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1042     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1043 
1044     tcg_debug_assert(stk_slot < max);
1045     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1046 }
1047 
1048 typedef struct TCGCumulativeArgs {
1049     int arg_idx;                /* tcg_gen_callN args[] */
1050     int info_in_idx;            /* TCGHelperInfo in[] */
1051     int arg_slot;               /* regs+stack slot */
1052     int ref_slot;               /* stack slots for references */
1053 } TCGCumulativeArgs;
1054 
1055 static void layout_arg_even(TCGCumulativeArgs *cum)
1056 {
1057     cum->arg_slot += cum->arg_slot & 1;
1058 }
1059 
1060 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1061                          TCGCallArgumentKind kind)
1062 {
1063     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1064 
1065     *loc = (TCGCallArgumentLoc){
1066         .kind = kind,
1067         .arg_idx = cum->arg_idx,
1068         .arg_slot = cum->arg_slot,
1069     };
1070     cum->info_in_idx++;
1071     cum->arg_slot++;
1072 }
1073 
1074 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1075                                 TCGHelperInfo *info, int n)
1076 {
1077     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1078 
1079     for (int i = 0; i < n; ++i) {
1080         /* Layout all using the same arg_idx, adjusting the subindex. */
1081         loc[i] = (TCGCallArgumentLoc){
1082             .kind = TCG_CALL_ARG_NORMAL,
1083             .arg_idx = cum->arg_idx,
1084             .tmp_subindex = i,
1085             .arg_slot = cum->arg_slot + i,
1086         };
1087     }
1088     cum->info_in_idx += n;
1089     cum->arg_slot += n;
1090 }
1091 
1092 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1093 {
1094     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1095     int n = 128 / TCG_TARGET_REG_BITS;
1096 
1097     /* The first subindex carries the pointer. */
1098     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1099 
1100     /*
1101      * The callee is allowed to clobber memory associated with
1102      * structure pass by-reference.  Therefore we must make copies.
1103      * Allocate space from "ref_slot", which will be adjusted to
1104      * follow the parameters on the stack.
1105      */
1106     loc[0].ref_slot = cum->ref_slot;
1107 
1108     /*
1109      * Subsequent words also go into the reference slot, but
1110      * do not accumulate into the regular arguments.
1111      */
1112     for (int i = 1; i < n; ++i) {
1113         loc[i] = (TCGCallArgumentLoc){
1114             .kind = TCG_CALL_ARG_BY_REF_N,
1115             .arg_idx = cum->arg_idx,
1116             .tmp_subindex = i,
1117             .ref_slot = cum->ref_slot + i,
1118         };
1119     }
1120     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1121     cum->ref_slot += n;
1122 }
1123 
1124 static void init_call_layout(TCGHelperInfo *info)
1125 {
1126     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1127     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1128     unsigned typemask = info->typemask;
1129     unsigned typecode;
1130     TCGCumulativeArgs cum = { };
1131 
1132     /*
1133      * Parse and place any function return value.
1134      */
1135     typecode = typemask & 7;
1136     switch (typecode) {
1137     case dh_typecode_void:
1138         info->nr_out = 0;
1139         break;
1140     case dh_typecode_i32:
1141     case dh_typecode_s32:
1142     case dh_typecode_ptr:
1143         info->nr_out = 1;
1144         info->out_kind = TCG_CALL_RET_NORMAL;
1145         break;
1146     case dh_typecode_i64:
1147     case dh_typecode_s64:
1148         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1149         info->out_kind = TCG_CALL_RET_NORMAL;
1150         /* Query the last register now to trigger any assert early. */
1151         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1152         break;
1153     case dh_typecode_i128:
1154         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1155         info->out_kind = TCG_TARGET_CALL_RET_I128;
1156         switch (TCG_TARGET_CALL_RET_I128) {
1157         case TCG_CALL_RET_NORMAL:
1158             /* Query the last register now to trigger any assert early. */
1159             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1160             break;
1161         case TCG_CALL_RET_BY_VEC:
1162             /* Query the single register now to trigger any assert early. */
1163             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1164             break;
1165         case TCG_CALL_RET_BY_REF:
1166             /*
1167              * Allocate the first argument to the output.
1168              * We don't need to store this anywhere, just make it
1169              * unavailable for use in the input loop below.
1170              */
1171             cum.arg_slot = 1;
1172             break;
1173         default:
1174             qemu_build_not_reached();
1175         }
1176         break;
1177     default:
1178         g_assert_not_reached();
1179     }
1180 
1181     /*
1182      * Parse and place function arguments.
1183      */
1184     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1185         TCGCallArgumentKind kind;
1186         TCGType type;
1187 
1188         typecode = typemask & 7;
1189         switch (typecode) {
1190         case dh_typecode_i32:
1191         case dh_typecode_s32:
1192             type = TCG_TYPE_I32;
1193             break;
1194         case dh_typecode_i64:
1195         case dh_typecode_s64:
1196             type = TCG_TYPE_I64;
1197             break;
1198         case dh_typecode_ptr:
1199             type = TCG_TYPE_PTR;
1200             break;
1201         case dh_typecode_i128:
1202             type = TCG_TYPE_I128;
1203             break;
1204         default:
1205             g_assert_not_reached();
1206         }
1207 
1208         switch (type) {
1209         case TCG_TYPE_I32:
1210             switch (TCG_TARGET_CALL_ARG_I32) {
1211             case TCG_CALL_ARG_EVEN:
1212                 layout_arg_even(&cum);
1213                 /* fall through */
1214             case TCG_CALL_ARG_NORMAL:
1215                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1216                 break;
1217             case TCG_CALL_ARG_EXTEND:
1218                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1219                 layout_arg_1(&cum, info, kind);
1220                 break;
1221             default:
1222                 qemu_build_not_reached();
1223             }
1224             break;
1225 
1226         case TCG_TYPE_I64:
1227             switch (TCG_TARGET_CALL_ARG_I64) {
1228             case TCG_CALL_ARG_EVEN:
1229                 layout_arg_even(&cum);
1230                 /* fall through */
1231             case TCG_CALL_ARG_NORMAL:
1232                 if (TCG_TARGET_REG_BITS == 32) {
1233                     layout_arg_normal_n(&cum, info, 2);
1234                 } else {
1235                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1236                 }
1237                 break;
1238             default:
1239                 qemu_build_not_reached();
1240             }
1241             break;
1242 
1243         case TCG_TYPE_I128:
1244             switch (TCG_TARGET_CALL_ARG_I128) {
1245             case TCG_CALL_ARG_EVEN:
1246                 layout_arg_even(&cum);
1247                 /* fall through */
1248             case TCG_CALL_ARG_NORMAL:
1249                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1250                 break;
1251             case TCG_CALL_ARG_BY_REF:
1252                 layout_arg_by_ref(&cum, info);
1253                 break;
1254             default:
1255                 qemu_build_not_reached();
1256             }
1257             break;
1258 
1259         default:
1260             g_assert_not_reached();
1261         }
1262     }
1263     info->nr_in = cum.info_in_idx;
1264 
1265     /* Validate that we didn't overrun the input array. */
1266     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1267     /* Validate the backend has enough argument space. */
1268     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1269 
1270     /*
1271      * Relocate the "ref_slot" area to the end of the parameters.
1272      * Minimizing this stack offset helps code size for x86,
1273      * which has a signed 8-bit offset encoding.
1274      */
1275     if (cum.ref_slot != 0) {
1276         int ref_base = 0;
1277 
1278         if (cum.arg_slot > max_reg_slots) {
1279             int align = __alignof(Int128) / sizeof(tcg_target_long);
1280 
1281             ref_base = cum.arg_slot - max_reg_slots;
1282             if (align > 1) {
1283                 ref_base = ROUND_UP(ref_base, align);
1284             }
1285         }
1286         assert(ref_base + cum.ref_slot <= max_stk_slots);
1287         ref_base += max_reg_slots;
1288 
1289         if (ref_base != 0) {
1290             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1291                 TCGCallArgumentLoc *loc = &info->in[i];
1292                 switch (loc->kind) {
1293                 case TCG_CALL_ARG_BY_REF:
1294                 case TCG_CALL_ARG_BY_REF_N:
1295                     loc->ref_slot += ref_base;
1296                     break;
1297                 default:
1298                     break;
1299                 }
1300             }
1301         }
1302     }
1303 }
1304 
1305 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1306 static void process_op_defs(TCGContext *s);
1307 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1308                                             TCGReg reg, const char *name);
1309 
1310 static void tcg_context_init(unsigned max_cpus)
1311 {
1312     TCGContext *s = &tcg_init_ctx;
1313     int op, total_args, n, i;
1314     TCGOpDef *def;
1315     TCGArgConstraint *args_ct;
1316     TCGTemp *ts;
1317 
1318     memset(s, 0, sizeof(*s));
1319     s->nb_globals = 0;
1320 
1321     /* Count total number of arguments and allocate the corresponding
1322        space */
1323     total_args = 0;
1324     for(op = 0; op < NB_OPS; op++) {
1325         def = &tcg_op_defs[op];
1326         n = def->nb_iargs + def->nb_oargs;
1327         total_args += n;
1328     }
1329 
1330     args_ct = g_new0(TCGArgConstraint, total_args);
1331 
1332     for(op = 0; op < NB_OPS; op++) {
1333         def = &tcg_op_defs[op];
1334         def->args_ct = args_ct;
1335         n = def->nb_iargs + def->nb_oargs;
1336         args_ct += n;
1337     }
1338 
1339     init_call_layout(&info_helper_ld32_mmu);
1340     init_call_layout(&info_helper_ld64_mmu);
1341     init_call_layout(&info_helper_ld128_mmu);
1342     init_call_layout(&info_helper_st32_mmu);
1343     init_call_layout(&info_helper_st64_mmu);
1344     init_call_layout(&info_helper_st128_mmu);
1345 
1346     tcg_target_init(s);
1347     process_op_defs(s);
1348 
1349     /* Reverse the order of the saved registers, assuming they're all at
1350        the start of tcg_target_reg_alloc_order.  */
1351     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1352         int r = tcg_target_reg_alloc_order[n];
1353         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1354             break;
1355         }
1356     }
1357     for (i = 0; i < n; ++i) {
1358         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1359     }
1360     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1361         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1362     }
1363 
1364     alloc_tcg_plugin_context(s);
1365 
1366     tcg_ctx = s;
1367     /*
1368      * In user-mode we simply share the init context among threads, since we
1369      * use a single region. See the documentation tcg_region_init() for the
1370      * reasoning behind this.
1371      * In system-mode we will have at most max_cpus TCG threads.
1372      */
1373 #ifdef CONFIG_USER_ONLY
1374     tcg_ctxs = &tcg_ctx;
1375     tcg_cur_ctxs = 1;
1376     tcg_max_ctxs = 1;
1377 #else
1378     tcg_max_ctxs = max_cpus;
1379     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1380 #endif
1381 
1382     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1383     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1384     tcg_env = temp_tcgv_ptr(ts);
1385 }
1386 
1387 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1388 {
1389     tcg_context_init(max_cpus);
1390     tcg_region_init(tb_size, splitwx, max_cpus);
1391 }
1392 
1393 /*
1394  * Allocate TBs right before their corresponding translated code, making
1395  * sure that TBs and code are on different cache lines.
1396  */
1397 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1398 {
1399     uintptr_t align = qemu_icache_linesize;
1400     TranslationBlock *tb;
1401     void *next;
1402 
1403  retry:
1404     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1405     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1406 
1407     if (unlikely(next > s->code_gen_highwater)) {
1408         if (tcg_region_alloc(s)) {
1409             return NULL;
1410         }
1411         goto retry;
1412     }
1413     qatomic_set(&s->code_gen_ptr, next);
1414     s->data_gen_ptr = NULL;
1415     return tb;
1416 }
1417 
1418 void tcg_prologue_init(void)
1419 {
1420     TCGContext *s = tcg_ctx;
1421     size_t prologue_size;
1422 
1423     s->code_ptr = s->code_gen_ptr;
1424     s->code_buf = s->code_gen_ptr;
1425     s->data_gen_ptr = NULL;
1426 
1427 #ifndef CONFIG_TCG_INTERPRETER
1428     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1429 #endif
1430 
1431 #ifdef TCG_TARGET_NEED_POOL_LABELS
1432     s->pool_labels = NULL;
1433 #endif
1434 
1435     qemu_thread_jit_write();
1436     /* Generate the prologue.  */
1437     tcg_target_qemu_prologue(s);
1438 
1439 #ifdef TCG_TARGET_NEED_POOL_LABELS
1440     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1441     {
1442         int result = tcg_out_pool_finalize(s);
1443         tcg_debug_assert(result == 0);
1444     }
1445 #endif
1446 
1447     prologue_size = tcg_current_code_size(s);
1448     perf_report_prologue(s->code_gen_ptr, prologue_size);
1449 
1450 #ifndef CONFIG_TCG_INTERPRETER
1451     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1452                         (uintptr_t)s->code_buf, prologue_size);
1453 #endif
1454 
1455     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1456         FILE *logfile = qemu_log_trylock();
1457         if (logfile) {
1458             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1459             if (s->data_gen_ptr) {
1460                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1461                 size_t data_size = prologue_size - code_size;
1462                 size_t i;
1463 
1464                 disas(logfile, s->code_gen_ptr, code_size);
1465 
1466                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1467                     if (sizeof(tcg_target_ulong) == 8) {
1468                         fprintf(logfile,
1469                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1470                                 (uintptr_t)s->data_gen_ptr + i,
1471                                 *(uint64_t *)(s->data_gen_ptr + i));
1472                     } else {
1473                         fprintf(logfile,
1474                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1475                                 (uintptr_t)s->data_gen_ptr + i,
1476                                 *(uint32_t *)(s->data_gen_ptr + i));
1477                     }
1478                 }
1479             } else {
1480                 disas(logfile, s->code_gen_ptr, prologue_size);
1481             }
1482             fprintf(logfile, "\n");
1483             qemu_log_unlock(logfile);
1484         }
1485     }
1486 
1487 #ifndef CONFIG_TCG_INTERPRETER
1488     /*
1489      * Assert that goto_ptr is implemented completely, setting an epilogue.
1490      * For tci, we use NULL as the signal to return from the interpreter,
1491      * so skip this check.
1492      */
1493     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1494 #endif
1495 
1496     tcg_region_prologue_set(s);
1497 }
1498 
1499 void tcg_func_start(TCGContext *s)
1500 {
1501     tcg_pool_reset(s);
1502     s->nb_temps = s->nb_globals;
1503 
1504     /* No temps have been previously allocated for size or locality.  */
1505     memset(s->free_temps, 0, sizeof(s->free_temps));
1506 
1507     /* No constant temps have been previously allocated. */
1508     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1509         if (s->const_table[i]) {
1510             g_hash_table_remove_all(s->const_table[i]);
1511         }
1512     }
1513 
1514     s->nb_ops = 0;
1515     s->nb_labels = 0;
1516     s->current_frame_offset = s->frame_start;
1517 
1518 #ifdef CONFIG_DEBUG_TCG
1519     s->goto_tb_issue_mask = 0;
1520 #endif
1521 
1522     QTAILQ_INIT(&s->ops);
1523     QTAILQ_INIT(&s->free_ops);
1524     s->emit_before_op = NULL;
1525     QSIMPLEQ_INIT(&s->labels);
1526 
1527     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1528                      s->addr_type == TCG_TYPE_I64);
1529 
1530     tcg_debug_assert(s->insn_start_words > 0);
1531 }
1532 
1533 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1534 {
1535     int n = s->nb_temps++;
1536 
1537     if (n >= TCG_MAX_TEMPS) {
1538         tcg_raise_tb_overflow(s);
1539     }
1540     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1541 }
1542 
1543 static TCGTemp *tcg_global_alloc(TCGContext *s)
1544 {
1545     TCGTemp *ts;
1546 
1547     tcg_debug_assert(s->nb_globals == s->nb_temps);
1548     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1549     s->nb_globals++;
1550     ts = tcg_temp_alloc(s);
1551     ts->kind = TEMP_GLOBAL;
1552 
1553     return ts;
1554 }
1555 
1556 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1557                                             TCGReg reg, const char *name)
1558 {
1559     TCGTemp *ts;
1560 
1561     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1562 
1563     ts = tcg_global_alloc(s);
1564     ts->base_type = type;
1565     ts->type = type;
1566     ts->kind = TEMP_FIXED;
1567     ts->reg = reg;
1568     ts->name = name;
1569     tcg_regset_set_reg(s->reserved_regs, reg);
1570 
1571     return ts;
1572 }
1573 
1574 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1575 {
1576     s->frame_start = start;
1577     s->frame_end = start + size;
1578     s->frame_temp
1579         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1580 }
1581 
1582 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1583                                             const char *name, TCGType type)
1584 {
1585     TCGContext *s = tcg_ctx;
1586     TCGTemp *base_ts = tcgv_ptr_temp(base);
1587     TCGTemp *ts = tcg_global_alloc(s);
1588     int indirect_reg = 0;
1589 
1590     switch (base_ts->kind) {
1591     case TEMP_FIXED:
1592         break;
1593     case TEMP_GLOBAL:
1594         /* We do not support double-indirect registers.  */
1595         tcg_debug_assert(!base_ts->indirect_reg);
1596         base_ts->indirect_base = 1;
1597         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1598                             ? 2 : 1);
1599         indirect_reg = 1;
1600         break;
1601     default:
1602         g_assert_not_reached();
1603     }
1604 
1605     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1606         TCGTemp *ts2 = tcg_global_alloc(s);
1607         char buf[64];
1608 
1609         ts->base_type = TCG_TYPE_I64;
1610         ts->type = TCG_TYPE_I32;
1611         ts->indirect_reg = indirect_reg;
1612         ts->mem_allocated = 1;
1613         ts->mem_base = base_ts;
1614         ts->mem_offset = offset;
1615         pstrcpy(buf, sizeof(buf), name);
1616         pstrcat(buf, sizeof(buf), "_0");
1617         ts->name = strdup(buf);
1618 
1619         tcg_debug_assert(ts2 == ts + 1);
1620         ts2->base_type = TCG_TYPE_I64;
1621         ts2->type = TCG_TYPE_I32;
1622         ts2->indirect_reg = indirect_reg;
1623         ts2->mem_allocated = 1;
1624         ts2->mem_base = base_ts;
1625         ts2->mem_offset = offset + 4;
1626         ts2->temp_subindex = 1;
1627         pstrcpy(buf, sizeof(buf), name);
1628         pstrcat(buf, sizeof(buf), "_1");
1629         ts2->name = strdup(buf);
1630     } else {
1631         ts->base_type = type;
1632         ts->type = type;
1633         ts->indirect_reg = indirect_reg;
1634         ts->mem_allocated = 1;
1635         ts->mem_base = base_ts;
1636         ts->mem_offset = offset;
1637         ts->name = name;
1638     }
1639     return ts;
1640 }
1641 
1642 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1643 {
1644     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1645     return temp_tcgv_i32(ts);
1646 }
1647 
1648 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1649 {
1650     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1651     return temp_tcgv_i64(ts);
1652 }
1653 
1654 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1655 {
1656     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1657     return temp_tcgv_ptr(ts);
1658 }
1659 
1660 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1661 {
1662     TCGContext *s = tcg_ctx;
1663     TCGTemp *ts;
1664     int n;
1665 
1666     if (kind == TEMP_EBB) {
1667         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1668 
1669         if (idx < TCG_MAX_TEMPS) {
1670             /* There is already an available temp with the right type.  */
1671             clear_bit(idx, s->free_temps[type].l);
1672 
1673             ts = &s->temps[idx];
1674             ts->temp_allocated = 1;
1675             tcg_debug_assert(ts->base_type == type);
1676             tcg_debug_assert(ts->kind == kind);
1677             return ts;
1678         }
1679     } else {
1680         tcg_debug_assert(kind == TEMP_TB);
1681     }
1682 
1683     switch (type) {
1684     case TCG_TYPE_I32:
1685     case TCG_TYPE_V64:
1686     case TCG_TYPE_V128:
1687     case TCG_TYPE_V256:
1688         n = 1;
1689         break;
1690     case TCG_TYPE_I64:
1691         n = 64 / TCG_TARGET_REG_BITS;
1692         break;
1693     case TCG_TYPE_I128:
1694         n = 128 / TCG_TARGET_REG_BITS;
1695         break;
1696     default:
1697         g_assert_not_reached();
1698     }
1699 
1700     ts = tcg_temp_alloc(s);
1701     ts->base_type = type;
1702     ts->temp_allocated = 1;
1703     ts->kind = kind;
1704 
1705     if (n == 1) {
1706         ts->type = type;
1707     } else {
1708         ts->type = TCG_TYPE_REG;
1709 
1710         for (int i = 1; i < n; ++i) {
1711             TCGTemp *ts2 = tcg_temp_alloc(s);
1712 
1713             tcg_debug_assert(ts2 == ts + i);
1714             ts2->base_type = type;
1715             ts2->type = TCG_TYPE_REG;
1716             ts2->temp_allocated = 1;
1717             ts2->temp_subindex = i;
1718             ts2->kind = kind;
1719         }
1720     }
1721     return ts;
1722 }
1723 
1724 TCGv_i32 tcg_temp_new_i32(void)
1725 {
1726     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1727 }
1728 
1729 TCGv_i32 tcg_temp_ebb_new_i32(void)
1730 {
1731     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1732 }
1733 
1734 TCGv_i64 tcg_temp_new_i64(void)
1735 {
1736     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1737 }
1738 
1739 TCGv_i64 tcg_temp_ebb_new_i64(void)
1740 {
1741     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1742 }
1743 
1744 TCGv_ptr tcg_temp_new_ptr(void)
1745 {
1746     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1747 }
1748 
1749 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1750 {
1751     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1752 }
1753 
1754 TCGv_i128 tcg_temp_new_i128(void)
1755 {
1756     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1757 }
1758 
1759 TCGv_i128 tcg_temp_ebb_new_i128(void)
1760 {
1761     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1762 }
1763 
1764 TCGv_vec tcg_temp_new_vec(TCGType type)
1765 {
1766     TCGTemp *t;
1767 
1768 #ifdef CONFIG_DEBUG_TCG
1769     switch (type) {
1770     case TCG_TYPE_V64:
1771         assert(TCG_TARGET_HAS_v64);
1772         break;
1773     case TCG_TYPE_V128:
1774         assert(TCG_TARGET_HAS_v128);
1775         break;
1776     case TCG_TYPE_V256:
1777         assert(TCG_TARGET_HAS_v256);
1778         break;
1779     default:
1780         g_assert_not_reached();
1781     }
1782 #endif
1783 
1784     t = tcg_temp_new_internal(type, TEMP_EBB);
1785     return temp_tcgv_vec(t);
1786 }
1787 
1788 /* Create a new temp of the same type as an existing temp.  */
1789 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1790 {
1791     TCGTemp *t = tcgv_vec_temp(match);
1792 
1793     tcg_debug_assert(t->temp_allocated != 0);
1794 
1795     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1796     return temp_tcgv_vec(t);
1797 }
1798 
1799 void tcg_temp_free_internal(TCGTemp *ts)
1800 {
1801     TCGContext *s = tcg_ctx;
1802 
1803     switch (ts->kind) {
1804     case TEMP_CONST:
1805     case TEMP_TB:
1806         /* Silently ignore free. */
1807         break;
1808     case TEMP_EBB:
1809         tcg_debug_assert(ts->temp_allocated != 0);
1810         ts->temp_allocated = 0;
1811         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1812         break;
1813     default:
1814         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1815         g_assert_not_reached();
1816     }
1817 }
1818 
1819 void tcg_temp_free_i32(TCGv_i32 arg)
1820 {
1821     tcg_temp_free_internal(tcgv_i32_temp(arg));
1822 }
1823 
1824 void tcg_temp_free_i64(TCGv_i64 arg)
1825 {
1826     tcg_temp_free_internal(tcgv_i64_temp(arg));
1827 }
1828 
1829 void tcg_temp_free_i128(TCGv_i128 arg)
1830 {
1831     tcg_temp_free_internal(tcgv_i128_temp(arg));
1832 }
1833 
1834 void tcg_temp_free_ptr(TCGv_ptr arg)
1835 {
1836     tcg_temp_free_internal(tcgv_ptr_temp(arg));
1837 }
1838 
1839 void tcg_temp_free_vec(TCGv_vec arg)
1840 {
1841     tcg_temp_free_internal(tcgv_vec_temp(arg));
1842 }
1843 
1844 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1845 {
1846     TCGContext *s = tcg_ctx;
1847     GHashTable *h = s->const_table[type];
1848     TCGTemp *ts;
1849 
1850     if (h == NULL) {
1851         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1852         s->const_table[type] = h;
1853     }
1854 
1855     ts = g_hash_table_lookup(h, &val);
1856     if (ts == NULL) {
1857         int64_t *val_ptr;
1858 
1859         ts = tcg_temp_alloc(s);
1860 
1861         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1862             TCGTemp *ts2 = tcg_temp_alloc(s);
1863 
1864             tcg_debug_assert(ts2 == ts + 1);
1865 
1866             ts->base_type = TCG_TYPE_I64;
1867             ts->type = TCG_TYPE_I32;
1868             ts->kind = TEMP_CONST;
1869             ts->temp_allocated = 1;
1870 
1871             ts2->base_type = TCG_TYPE_I64;
1872             ts2->type = TCG_TYPE_I32;
1873             ts2->kind = TEMP_CONST;
1874             ts2->temp_allocated = 1;
1875             ts2->temp_subindex = 1;
1876 
1877             /*
1878              * Retain the full value of the 64-bit constant in the low
1879              * part, so that the hash table works.  Actual uses will
1880              * truncate the value to the low part.
1881              */
1882             ts[HOST_BIG_ENDIAN].val = val;
1883             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1884             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1885         } else {
1886             ts->base_type = type;
1887             ts->type = type;
1888             ts->kind = TEMP_CONST;
1889             ts->temp_allocated = 1;
1890             ts->val = val;
1891             val_ptr = &ts->val;
1892         }
1893         g_hash_table_insert(h, val_ptr, ts);
1894     }
1895 
1896     return ts;
1897 }
1898 
1899 TCGv_i32 tcg_constant_i32(int32_t val)
1900 {
1901     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
1902 }
1903 
1904 TCGv_i64 tcg_constant_i64(int64_t val)
1905 {
1906     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
1907 }
1908 
1909 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
1910 {
1911     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
1912 }
1913 
1914 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1915 {
1916     val = dup_const(vece, val);
1917     return temp_tcgv_vec(tcg_constant_internal(type, val));
1918 }
1919 
1920 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1921 {
1922     TCGTemp *t = tcgv_vec_temp(match);
1923 
1924     tcg_debug_assert(t->temp_allocated != 0);
1925     return tcg_constant_vec(t->base_type, vece, val);
1926 }
1927 
1928 #ifdef CONFIG_DEBUG_TCG
1929 size_t temp_idx(TCGTemp *ts)
1930 {
1931     ptrdiff_t n = ts - tcg_ctx->temps;
1932     assert(n >= 0 && n < tcg_ctx->nb_temps);
1933     return n;
1934 }
1935 
1936 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1937 {
1938     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1939 
1940     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1941     assert(o % sizeof(TCGTemp) == 0);
1942 
1943     return (void *)tcg_ctx + (uintptr_t)v;
1944 }
1945 #endif /* CONFIG_DEBUG_TCG */
1946 
1947 /* Return true if OP may appear in the opcode stream.
1948    Test the runtime variable that controls each opcode.  */
1949 bool tcg_op_supported(TCGOpcode op)
1950 {
1951     const bool have_vec
1952         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1953 
1954     switch (op) {
1955     case INDEX_op_discard:
1956     case INDEX_op_set_label:
1957     case INDEX_op_call:
1958     case INDEX_op_br:
1959     case INDEX_op_mb:
1960     case INDEX_op_insn_start:
1961     case INDEX_op_exit_tb:
1962     case INDEX_op_goto_tb:
1963     case INDEX_op_goto_ptr:
1964     case INDEX_op_qemu_ld_a32_i32:
1965     case INDEX_op_qemu_ld_a64_i32:
1966     case INDEX_op_qemu_st_a32_i32:
1967     case INDEX_op_qemu_st_a64_i32:
1968     case INDEX_op_qemu_ld_a32_i64:
1969     case INDEX_op_qemu_ld_a64_i64:
1970     case INDEX_op_qemu_st_a32_i64:
1971     case INDEX_op_qemu_st_a64_i64:
1972         return true;
1973 
1974     case INDEX_op_qemu_st8_a32_i32:
1975     case INDEX_op_qemu_st8_a64_i32:
1976         return TCG_TARGET_HAS_qemu_st8_i32;
1977 
1978     case INDEX_op_qemu_ld_a32_i128:
1979     case INDEX_op_qemu_ld_a64_i128:
1980     case INDEX_op_qemu_st_a32_i128:
1981     case INDEX_op_qemu_st_a64_i128:
1982         return TCG_TARGET_HAS_qemu_ldst_i128;
1983 
1984     case INDEX_op_mov_i32:
1985     case INDEX_op_setcond_i32:
1986     case INDEX_op_brcond_i32:
1987     case INDEX_op_movcond_i32:
1988     case INDEX_op_ld8u_i32:
1989     case INDEX_op_ld8s_i32:
1990     case INDEX_op_ld16u_i32:
1991     case INDEX_op_ld16s_i32:
1992     case INDEX_op_ld_i32:
1993     case INDEX_op_st8_i32:
1994     case INDEX_op_st16_i32:
1995     case INDEX_op_st_i32:
1996     case INDEX_op_add_i32:
1997     case INDEX_op_sub_i32:
1998     case INDEX_op_neg_i32:
1999     case INDEX_op_mul_i32:
2000     case INDEX_op_and_i32:
2001     case INDEX_op_or_i32:
2002     case INDEX_op_xor_i32:
2003     case INDEX_op_shl_i32:
2004     case INDEX_op_shr_i32:
2005     case INDEX_op_sar_i32:
2006         return true;
2007 
2008     case INDEX_op_negsetcond_i32:
2009         return TCG_TARGET_HAS_negsetcond_i32;
2010     case INDEX_op_div_i32:
2011     case INDEX_op_divu_i32:
2012         return TCG_TARGET_HAS_div_i32;
2013     case INDEX_op_rem_i32:
2014     case INDEX_op_remu_i32:
2015         return TCG_TARGET_HAS_rem_i32;
2016     case INDEX_op_div2_i32:
2017     case INDEX_op_divu2_i32:
2018         return TCG_TARGET_HAS_div2_i32;
2019     case INDEX_op_rotl_i32:
2020     case INDEX_op_rotr_i32:
2021         return TCG_TARGET_HAS_rot_i32;
2022     case INDEX_op_deposit_i32:
2023         return TCG_TARGET_HAS_deposit_i32;
2024     case INDEX_op_extract_i32:
2025         return TCG_TARGET_HAS_extract_i32;
2026     case INDEX_op_sextract_i32:
2027         return TCG_TARGET_HAS_sextract_i32;
2028     case INDEX_op_extract2_i32:
2029         return TCG_TARGET_HAS_extract2_i32;
2030     case INDEX_op_add2_i32:
2031         return TCG_TARGET_HAS_add2_i32;
2032     case INDEX_op_sub2_i32:
2033         return TCG_TARGET_HAS_sub2_i32;
2034     case INDEX_op_mulu2_i32:
2035         return TCG_TARGET_HAS_mulu2_i32;
2036     case INDEX_op_muls2_i32:
2037         return TCG_TARGET_HAS_muls2_i32;
2038     case INDEX_op_muluh_i32:
2039         return TCG_TARGET_HAS_muluh_i32;
2040     case INDEX_op_mulsh_i32:
2041         return TCG_TARGET_HAS_mulsh_i32;
2042     case INDEX_op_ext8s_i32:
2043         return TCG_TARGET_HAS_ext8s_i32;
2044     case INDEX_op_ext16s_i32:
2045         return TCG_TARGET_HAS_ext16s_i32;
2046     case INDEX_op_ext8u_i32:
2047         return TCG_TARGET_HAS_ext8u_i32;
2048     case INDEX_op_ext16u_i32:
2049         return TCG_TARGET_HAS_ext16u_i32;
2050     case INDEX_op_bswap16_i32:
2051         return TCG_TARGET_HAS_bswap16_i32;
2052     case INDEX_op_bswap32_i32:
2053         return TCG_TARGET_HAS_bswap32_i32;
2054     case INDEX_op_not_i32:
2055         return TCG_TARGET_HAS_not_i32;
2056     case INDEX_op_andc_i32:
2057         return TCG_TARGET_HAS_andc_i32;
2058     case INDEX_op_orc_i32:
2059         return TCG_TARGET_HAS_orc_i32;
2060     case INDEX_op_eqv_i32:
2061         return TCG_TARGET_HAS_eqv_i32;
2062     case INDEX_op_nand_i32:
2063         return TCG_TARGET_HAS_nand_i32;
2064     case INDEX_op_nor_i32:
2065         return TCG_TARGET_HAS_nor_i32;
2066     case INDEX_op_clz_i32:
2067         return TCG_TARGET_HAS_clz_i32;
2068     case INDEX_op_ctz_i32:
2069         return TCG_TARGET_HAS_ctz_i32;
2070     case INDEX_op_ctpop_i32:
2071         return TCG_TARGET_HAS_ctpop_i32;
2072 
2073     case INDEX_op_brcond2_i32:
2074     case INDEX_op_setcond2_i32:
2075         return TCG_TARGET_REG_BITS == 32;
2076 
2077     case INDEX_op_mov_i64:
2078     case INDEX_op_setcond_i64:
2079     case INDEX_op_brcond_i64:
2080     case INDEX_op_movcond_i64:
2081     case INDEX_op_ld8u_i64:
2082     case INDEX_op_ld8s_i64:
2083     case INDEX_op_ld16u_i64:
2084     case INDEX_op_ld16s_i64:
2085     case INDEX_op_ld32u_i64:
2086     case INDEX_op_ld32s_i64:
2087     case INDEX_op_ld_i64:
2088     case INDEX_op_st8_i64:
2089     case INDEX_op_st16_i64:
2090     case INDEX_op_st32_i64:
2091     case INDEX_op_st_i64:
2092     case INDEX_op_add_i64:
2093     case INDEX_op_sub_i64:
2094     case INDEX_op_neg_i64:
2095     case INDEX_op_mul_i64:
2096     case INDEX_op_and_i64:
2097     case INDEX_op_or_i64:
2098     case INDEX_op_xor_i64:
2099     case INDEX_op_shl_i64:
2100     case INDEX_op_shr_i64:
2101     case INDEX_op_sar_i64:
2102     case INDEX_op_ext_i32_i64:
2103     case INDEX_op_extu_i32_i64:
2104         return TCG_TARGET_REG_BITS == 64;
2105 
2106     case INDEX_op_negsetcond_i64:
2107         return TCG_TARGET_HAS_negsetcond_i64;
2108     case INDEX_op_div_i64:
2109     case INDEX_op_divu_i64:
2110         return TCG_TARGET_HAS_div_i64;
2111     case INDEX_op_rem_i64:
2112     case INDEX_op_remu_i64:
2113         return TCG_TARGET_HAS_rem_i64;
2114     case INDEX_op_div2_i64:
2115     case INDEX_op_divu2_i64:
2116         return TCG_TARGET_HAS_div2_i64;
2117     case INDEX_op_rotl_i64:
2118     case INDEX_op_rotr_i64:
2119         return TCG_TARGET_HAS_rot_i64;
2120     case INDEX_op_deposit_i64:
2121         return TCG_TARGET_HAS_deposit_i64;
2122     case INDEX_op_extract_i64:
2123         return TCG_TARGET_HAS_extract_i64;
2124     case INDEX_op_sextract_i64:
2125         return TCG_TARGET_HAS_sextract_i64;
2126     case INDEX_op_extract2_i64:
2127         return TCG_TARGET_HAS_extract2_i64;
2128     case INDEX_op_extrl_i64_i32:
2129     case INDEX_op_extrh_i64_i32:
2130         return TCG_TARGET_HAS_extr_i64_i32;
2131     case INDEX_op_ext8s_i64:
2132         return TCG_TARGET_HAS_ext8s_i64;
2133     case INDEX_op_ext16s_i64:
2134         return TCG_TARGET_HAS_ext16s_i64;
2135     case INDEX_op_ext32s_i64:
2136         return TCG_TARGET_HAS_ext32s_i64;
2137     case INDEX_op_ext8u_i64:
2138         return TCG_TARGET_HAS_ext8u_i64;
2139     case INDEX_op_ext16u_i64:
2140         return TCG_TARGET_HAS_ext16u_i64;
2141     case INDEX_op_ext32u_i64:
2142         return TCG_TARGET_HAS_ext32u_i64;
2143     case INDEX_op_bswap16_i64:
2144         return TCG_TARGET_HAS_bswap16_i64;
2145     case INDEX_op_bswap32_i64:
2146         return TCG_TARGET_HAS_bswap32_i64;
2147     case INDEX_op_bswap64_i64:
2148         return TCG_TARGET_HAS_bswap64_i64;
2149     case INDEX_op_not_i64:
2150         return TCG_TARGET_HAS_not_i64;
2151     case INDEX_op_andc_i64:
2152         return TCG_TARGET_HAS_andc_i64;
2153     case INDEX_op_orc_i64:
2154         return TCG_TARGET_HAS_orc_i64;
2155     case INDEX_op_eqv_i64:
2156         return TCG_TARGET_HAS_eqv_i64;
2157     case INDEX_op_nand_i64:
2158         return TCG_TARGET_HAS_nand_i64;
2159     case INDEX_op_nor_i64:
2160         return TCG_TARGET_HAS_nor_i64;
2161     case INDEX_op_clz_i64:
2162         return TCG_TARGET_HAS_clz_i64;
2163     case INDEX_op_ctz_i64:
2164         return TCG_TARGET_HAS_ctz_i64;
2165     case INDEX_op_ctpop_i64:
2166         return TCG_TARGET_HAS_ctpop_i64;
2167     case INDEX_op_add2_i64:
2168         return TCG_TARGET_HAS_add2_i64;
2169     case INDEX_op_sub2_i64:
2170         return TCG_TARGET_HAS_sub2_i64;
2171     case INDEX_op_mulu2_i64:
2172         return TCG_TARGET_HAS_mulu2_i64;
2173     case INDEX_op_muls2_i64:
2174         return TCG_TARGET_HAS_muls2_i64;
2175     case INDEX_op_muluh_i64:
2176         return TCG_TARGET_HAS_muluh_i64;
2177     case INDEX_op_mulsh_i64:
2178         return TCG_TARGET_HAS_mulsh_i64;
2179 
2180     case INDEX_op_mov_vec:
2181     case INDEX_op_dup_vec:
2182     case INDEX_op_dupm_vec:
2183     case INDEX_op_ld_vec:
2184     case INDEX_op_st_vec:
2185     case INDEX_op_add_vec:
2186     case INDEX_op_sub_vec:
2187     case INDEX_op_and_vec:
2188     case INDEX_op_or_vec:
2189     case INDEX_op_xor_vec:
2190     case INDEX_op_cmp_vec:
2191         return have_vec;
2192     case INDEX_op_dup2_vec:
2193         return have_vec && TCG_TARGET_REG_BITS == 32;
2194     case INDEX_op_not_vec:
2195         return have_vec && TCG_TARGET_HAS_not_vec;
2196     case INDEX_op_neg_vec:
2197         return have_vec && TCG_TARGET_HAS_neg_vec;
2198     case INDEX_op_abs_vec:
2199         return have_vec && TCG_TARGET_HAS_abs_vec;
2200     case INDEX_op_andc_vec:
2201         return have_vec && TCG_TARGET_HAS_andc_vec;
2202     case INDEX_op_orc_vec:
2203         return have_vec && TCG_TARGET_HAS_orc_vec;
2204     case INDEX_op_nand_vec:
2205         return have_vec && TCG_TARGET_HAS_nand_vec;
2206     case INDEX_op_nor_vec:
2207         return have_vec && TCG_TARGET_HAS_nor_vec;
2208     case INDEX_op_eqv_vec:
2209         return have_vec && TCG_TARGET_HAS_eqv_vec;
2210     case INDEX_op_mul_vec:
2211         return have_vec && TCG_TARGET_HAS_mul_vec;
2212     case INDEX_op_shli_vec:
2213     case INDEX_op_shri_vec:
2214     case INDEX_op_sari_vec:
2215         return have_vec && TCG_TARGET_HAS_shi_vec;
2216     case INDEX_op_shls_vec:
2217     case INDEX_op_shrs_vec:
2218     case INDEX_op_sars_vec:
2219         return have_vec && TCG_TARGET_HAS_shs_vec;
2220     case INDEX_op_shlv_vec:
2221     case INDEX_op_shrv_vec:
2222     case INDEX_op_sarv_vec:
2223         return have_vec && TCG_TARGET_HAS_shv_vec;
2224     case INDEX_op_rotli_vec:
2225         return have_vec && TCG_TARGET_HAS_roti_vec;
2226     case INDEX_op_rotls_vec:
2227         return have_vec && TCG_TARGET_HAS_rots_vec;
2228     case INDEX_op_rotlv_vec:
2229     case INDEX_op_rotrv_vec:
2230         return have_vec && TCG_TARGET_HAS_rotv_vec;
2231     case INDEX_op_ssadd_vec:
2232     case INDEX_op_usadd_vec:
2233     case INDEX_op_sssub_vec:
2234     case INDEX_op_ussub_vec:
2235         return have_vec && TCG_TARGET_HAS_sat_vec;
2236     case INDEX_op_smin_vec:
2237     case INDEX_op_umin_vec:
2238     case INDEX_op_smax_vec:
2239     case INDEX_op_umax_vec:
2240         return have_vec && TCG_TARGET_HAS_minmax_vec;
2241     case INDEX_op_bitsel_vec:
2242         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2243     case INDEX_op_cmpsel_vec:
2244         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2245 
2246     default:
2247         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2248         return true;
2249     }
2250 }
2251 
2252 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2253 
2254 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2255                           TCGTemp *ret, TCGTemp **args)
2256 {
2257     TCGv_i64 extend_free[MAX_CALL_IARGS];
2258     int n_extend = 0;
2259     TCGOp *op;
2260     int i, n, pi = 0, total_args;
2261 
2262     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2263         init_call_layout(info);
2264         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2265     }
2266 
2267     total_args = info->nr_out + info->nr_in + 2;
2268     op = tcg_op_alloc(INDEX_op_call, total_args);
2269 
2270 #ifdef CONFIG_PLUGIN
2271     /* Flag helpers that may affect guest state */
2272     if (tcg_ctx->plugin_insn &&
2273         !(info->flags & TCG_CALL_PLUGIN) &&
2274         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2275         tcg_ctx->plugin_insn->calls_helpers = true;
2276     }
2277 #endif
2278 
2279     TCGOP_CALLO(op) = n = info->nr_out;
2280     switch (n) {
2281     case 0:
2282         tcg_debug_assert(ret == NULL);
2283         break;
2284     case 1:
2285         tcg_debug_assert(ret != NULL);
2286         op->args[pi++] = temp_arg(ret);
2287         break;
2288     case 2:
2289     case 4:
2290         tcg_debug_assert(ret != NULL);
2291         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2292         tcg_debug_assert(ret->temp_subindex == 0);
2293         for (i = 0; i < n; ++i) {
2294             op->args[pi++] = temp_arg(ret + i);
2295         }
2296         break;
2297     default:
2298         g_assert_not_reached();
2299     }
2300 
2301     TCGOP_CALLI(op) = n = info->nr_in;
2302     for (i = 0; i < n; i++) {
2303         const TCGCallArgumentLoc *loc = &info->in[i];
2304         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2305 
2306         switch (loc->kind) {
2307         case TCG_CALL_ARG_NORMAL:
2308         case TCG_CALL_ARG_BY_REF:
2309         case TCG_CALL_ARG_BY_REF_N:
2310             op->args[pi++] = temp_arg(ts);
2311             break;
2312 
2313         case TCG_CALL_ARG_EXTEND_U:
2314         case TCG_CALL_ARG_EXTEND_S:
2315             {
2316                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2317                 TCGv_i32 orig = temp_tcgv_i32(ts);
2318 
2319                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2320                     tcg_gen_ext_i32_i64(temp, orig);
2321                 } else {
2322                     tcg_gen_extu_i32_i64(temp, orig);
2323                 }
2324                 op->args[pi++] = tcgv_i64_arg(temp);
2325                 extend_free[n_extend++] = temp;
2326             }
2327             break;
2328 
2329         default:
2330             g_assert_not_reached();
2331         }
2332     }
2333     op->args[pi++] = (uintptr_t)func;
2334     op->args[pi++] = (uintptr_t)info;
2335     tcg_debug_assert(pi == total_args);
2336 
2337     if (tcg_ctx->emit_before_op) {
2338         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2339     } else {
2340         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2341     }
2342 
2343     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2344     for (i = 0; i < n_extend; ++i) {
2345         tcg_temp_free_i64(extend_free[i]);
2346     }
2347 }
2348 
2349 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2350 {
2351     tcg_gen_callN(func, info, ret, NULL);
2352 }
2353 
2354 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2355 {
2356     tcg_gen_callN(func, info, ret, &t1);
2357 }
2358 
2359 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2360                    TCGTemp *t1, TCGTemp *t2)
2361 {
2362     TCGTemp *args[2] = { t1, t2 };
2363     tcg_gen_callN(func, info, ret, args);
2364 }
2365 
2366 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2367                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2368 {
2369     TCGTemp *args[3] = { t1, t2, t3 };
2370     tcg_gen_callN(func, info, ret, args);
2371 }
2372 
2373 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2374                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2375 {
2376     TCGTemp *args[4] = { t1, t2, t3, t4 };
2377     tcg_gen_callN(func, info, ret, args);
2378 }
2379 
2380 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2381                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2382 {
2383     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2384     tcg_gen_callN(func, info, ret, args);
2385 }
2386 
2387 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2388                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2389                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2390 {
2391     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2392     tcg_gen_callN(func, info, ret, args);
2393 }
2394 
2395 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2396                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2397                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2398 {
2399     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2400     tcg_gen_callN(func, info, ret, args);
2401 }
2402 
2403 static void tcg_reg_alloc_start(TCGContext *s)
2404 {
2405     int i, n;
2406 
2407     for (i = 0, n = s->nb_temps; i < n; i++) {
2408         TCGTemp *ts = &s->temps[i];
2409         TCGTempVal val = TEMP_VAL_MEM;
2410 
2411         switch (ts->kind) {
2412         case TEMP_CONST:
2413             val = TEMP_VAL_CONST;
2414             break;
2415         case TEMP_FIXED:
2416             val = TEMP_VAL_REG;
2417             break;
2418         case TEMP_GLOBAL:
2419             break;
2420         case TEMP_EBB:
2421             val = TEMP_VAL_DEAD;
2422             /* fall through */
2423         case TEMP_TB:
2424             ts->mem_allocated = 0;
2425             break;
2426         default:
2427             g_assert_not_reached();
2428         }
2429         ts->val_type = val;
2430     }
2431 
2432     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2433 }
2434 
2435 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2436                                  TCGTemp *ts)
2437 {
2438     int idx = temp_idx(ts);
2439 
2440     switch (ts->kind) {
2441     case TEMP_FIXED:
2442     case TEMP_GLOBAL:
2443         pstrcpy(buf, buf_size, ts->name);
2444         break;
2445     case TEMP_TB:
2446         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2447         break;
2448     case TEMP_EBB:
2449         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2450         break;
2451     case TEMP_CONST:
2452         switch (ts->type) {
2453         case TCG_TYPE_I32:
2454             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2455             break;
2456 #if TCG_TARGET_REG_BITS > 32
2457         case TCG_TYPE_I64:
2458             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2459             break;
2460 #endif
2461         case TCG_TYPE_V64:
2462         case TCG_TYPE_V128:
2463         case TCG_TYPE_V256:
2464             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2465                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2466             break;
2467         default:
2468             g_assert_not_reached();
2469         }
2470         break;
2471     }
2472     return buf;
2473 }
2474 
2475 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2476                              int buf_size, TCGArg arg)
2477 {
2478     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2479 }
2480 
2481 static const char * const cond_name[] =
2482 {
2483     [TCG_COND_NEVER] = "never",
2484     [TCG_COND_ALWAYS] = "always",
2485     [TCG_COND_EQ] = "eq",
2486     [TCG_COND_NE] = "ne",
2487     [TCG_COND_LT] = "lt",
2488     [TCG_COND_GE] = "ge",
2489     [TCG_COND_LE] = "le",
2490     [TCG_COND_GT] = "gt",
2491     [TCG_COND_LTU] = "ltu",
2492     [TCG_COND_GEU] = "geu",
2493     [TCG_COND_LEU] = "leu",
2494     [TCG_COND_GTU] = "gtu",
2495     [TCG_COND_TSTEQ] = "tsteq",
2496     [TCG_COND_TSTNE] = "tstne",
2497 };
2498 
2499 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2500 {
2501     [MO_UB]   = "ub",
2502     [MO_SB]   = "sb",
2503     [MO_LEUW] = "leuw",
2504     [MO_LESW] = "lesw",
2505     [MO_LEUL] = "leul",
2506     [MO_LESL] = "lesl",
2507     [MO_LEUQ] = "leq",
2508     [MO_BEUW] = "beuw",
2509     [MO_BESW] = "besw",
2510     [MO_BEUL] = "beul",
2511     [MO_BESL] = "besl",
2512     [MO_BEUQ] = "beq",
2513     [MO_128 + MO_BE] = "beo",
2514     [MO_128 + MO_LE] = "leo",
2515 };
2516 
2517 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2518     [MO_UNALN >> MO_ASHIFT]    = "un+",
2519     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2520     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2521     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2522     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2523     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2524     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2525     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2526 };
2527 
2528 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2529     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2530     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2531     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2532     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2533     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2534     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2535 };
2536 
2537 static const char bswap_flag_name[][6] = {
2538     [TCG_BSWAP_IZ] = "iz",
2539     [TCG_BSWAP_OZ] = "oz",
2540     [TCG_BSWAP_OS] = "os",
2541     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2542     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2543 };
2544 
2545 static inline bool tcg_regset_single(TCGRegSet d)
2546 {
2547     return (d & (d - 1)) == 0;
2548 }
2549 
2550 static inline TCGReg tcg_regset_first(TCGRegSet d)
2551 {
2552     if (TCG_TARGET_NB_REGS <= 32) {
2553         return ctz32(d);
2554     } else {
2555         return ctz64(d);
2556     }
2557 }
2558 
2559 /* Return only the number of characters output -- no error return. */
2560 #define ne_fprintf(...) \
2561     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2562 
2563 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2564 {
2565     char buf[128];
2566     TCGOp *op;
2567 
2568     QTAILQ_FOREACH(op, &s->ops, link) {
2569         int i, k, nb_oargs, nb_iargs, nb_cargs;
2570         const TCGOpDef *def;
2571         TCGOpcode c;
2572         int col = 0;
2573 
2574         c = op->opc;
2575         def = &tcg_op_defs[c];
2576 
2577         if (c == INDEX_op_insn_start) {
2578             nb_oargs = 0;
2579             col += ne_fprintf(f, "\n ----");
2580 
2581             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2582                 col += ne_fprintf(f, " %016" PRIx64,
2583                                   tcg_get_insn_start_param(op, i));
2584             }
2585         } else if (c == INDEX_op_call) {
2586             const TCGHelperInfo *info = tcg_call_info(op);
2587             void *func = tcg_call_func(op);
2588 
2589             /* variable number of arguments */
2590             nb_oargs = TCGOP_CALLO(op);
2591             nb_iargs = TCGOP_CALLI(op);
2592             nb_cargs = def->nb_cargs;
2593 
2594             col += ne_fprintf(f, " %s ", def->name);
2595 
2596             /*
2597              * Print the function name from TCGHelperInfo, if available.
2598              * Note that plugins have a template function for the info,
2599              * but the actual function pointer comes from the plugin.
2600              */
2601             if (func == info->func) {
2602                 col += ne_fprintf(f, "%s", info->name);
2603             } else {
2604                 col += ne_fprintf(f, "plugin(%p)", func);
2605             }
2606 
2607             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2608             for (i = 0; i < nb_oargs; i++) {
2609                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2610                                                             op->args[i]));
2611             }
2612             for (i = 0; i < nb_iargs; i++) {
2613                 TCGArg arg = op->args[nb_oargs + i];
2614                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2615                 col += ne_fprintf(f, ",%s", t);
2616             }
2617         } else {
2618             col += ne_fprintf(f, " %s ", def->name);
2619 
2620             nb_oargs = def->nb_oargs;
2621             nb_iargs = def->nb_iargs;
2622             nb_cargs = def->nb_cargs;
2623 
2624             if (def->flags & TCG_OPF_VECTOR) {
2625                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2626                                   8 << TCGOP_VECE(op));
2627             }
2628 
2629             k = 0;
2630             for (i = 0; i < nb_oargs; i++) {
2631                 const char *sep =  k ? "," : "";
2632                 col += ne_fprintf(f, "%s%s", sep,
2633                                   tcg_get_arg_str(s, buf, sizeof(buf),
2634                                                   op->args[k++]));
2635             }
2636             for (i = 0; i < nb_iargs; i++) {
2637                 const char *sep =  k ? "," : "";
2638                 col += ne_fprintf(f, "%s%s", sep,
2639                                   tcg_get_arg_str(s, buf, sizeof(buf),
2640                                                   op->args[k++]));
2641             }
2642             switch (c) {
2643             case INDEX_op_brcond_i32:
2644             case INDEX_op_setcond_i32:
2645             case INDEX_op_negsetcond_i32:
2646             case INDEX_op_movcond_i32:
2647             case INDEX_op_brcond2_i32:
2648             case INDEX_op_setcond2_i32:
2649             case INDEX_op_brcond_i64:
2650             case INDEX_op_setcond_i64:
2651             case INDEX_op_negsetcond_i64:
2652             case INDEX_op_movcond_i64:
2653             case INDEX_op_cmp_vec:
2654             case INDEX_op_cmpsel_vec:
2655                 if (op->args[k] < ARRAY_SIZE(cond_name)
2656                     && cond_name[op->args[k]]) {
2657                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2658                 } else {
2659                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2660                 }
2661                 i = 1;
2662                 break;
2663             case INDEX_op_qemu_ld_a32_i32:
2664             case INDEX_op_qemu_ld_a64_i32:
2665             case INDEX_op_qemu_st_a32_i32:
2666             case INDEX_op_qemu_st_a64_i32:
2667             case INDEX_op_qemu_st8_a32_i32:
2668             case INDEX_op_qemu_st8_a64_i32:
2669             case INDEX_op_qemu_ld_a32_i64:
2670             case INDEX_op_qemu_ld_a64_i64:
2671             case INDEX_op_qemu_st_a32_i64:
2672             case INDEX_op_qemu_st_a64_i64:
2673             case INDEX_op_qemu_ld_a32_i128:
2674             case INDEX_op_qemu_ld_a64_i128:
2675             case INDEX_op_qemu_st_a32_i128:
2676             case INDEX_op_qemu_st_a64_i128:
2677                 {
2678                     const char *s_al, *s_op, *s_at;
2679                     MemOpIdx oi = op->args[k++];
2680                     MemOp mop = get_memop(oi);
2681                     unsigned ix = get_mmuidx(oi);
2682 
2683                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2684                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2685                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2686                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2687 
2688                     /* If all fields are accounted for, print symbolically. */
2689                     if (!mop && s_al && s_op && s_at) {
2690                         col += ne_fprintf(f, ",%s%s%s,%u",
2691                                           s_at, s_al, s_op, ix);
2692                     } else {
2693                         mop = get_memop(oi);
2694                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2695                     }
2696                     i = 1;
2697                 }
2698                 break;
2699             case INDEX_op_bswap16_i32:
2700             case INDEX_op_bswap16_i64:
2701             case INDEX_op_bswap32_i32:
2702             case INDEX_op_bswap32_i64:
2703             case INDEX_op_bswap64_i64:
2704                 {
2705                     TCGArg flags = op->args[k];
2706                     const char *name = NULL;
2707 
2708                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2709                         name = bswap_flag_name[flags];
2710                     }
2711                     if (name) {
2712                         col += ne_fprintf(f, ",%s", name);
2713                     } else {
2714                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2715                     }
2716                     i = k = 1;
2717                 }
2718                 break;
2719             default:
2720                 i = 0;
2721                 break;
2722             }
2723             switch (c) {
2724             case INDEX_op_set_label:
2725             case INDEX_op_br:
2726             case INDEX_op_brcond_i32:
2727             case INDEX_op_brcond_i64:
2728             case INDEX_op_brcond2_i32:
2729                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2730                                   arg_label(op->args[k])->id);
2731                 i++, k++;
2732                 break;
2733             case INDEX_op_mb:
2734                 {
2735                     TCGBar membar = op->args[k];
2736                     const char *b_op, *m_op;
2737 
2738                     switch (membar & TCG_BAR_SC) {
2739                     case 0:
2740                         b_op = "none";
2741                         break;
2742                     case TCG_BAR_LDAQ:
2743                         b_op = "acq";
2744                         break;
2745                     case TCG_BAR_STRL:
2746                         b_op = "rel";
2747                         break;
2748                     case TCG_BAR_SC:
2749                         b_op = "seq";
2750                         break;
2751                     default:
2752                         g_assert_not_reached();
2753                     }
2754 
2755                     switch (membar & TCG_MO_ALL) {
2756                     case 0:
2757                         m_op = "none";
2758                         break;
2759                     case TCG_MO_LD_LD:
2760                         m_op = "rr";
2761                         break;
2762                     case TCG_MO_LD_ST:
2763                         m_op = "rw";
2764                         break;
2765                     case TCG_MO_ST_LD:
2766                         m_op = "wr";
2767                         break;
2768                     case TCG_MO_ST_ST:
2769                         m_op = "ww";
2770                         break;
2771                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2772                         m_op = "rr+rw";
2773                         break;
2774                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2775                         m_op = "rr+wr";
2776                         break;
2777                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2778                         m_op = "rr+ww";
2779                         break;
2780                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2781                         m_op = "rw+wr";
2782                         break;
2783                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2784                         m_op = "rw+ww";
2785                         break;
2786                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2787                         m_op = "wr+ww";
2788                         break;
2789                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2790                         m_op = "rr+rw+wr";
2791                         break;
2792                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2793                         m_op = "rr+rw+ww";
2794                         break;
2795                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2796                         m_op = "rr+wr+ww";
2797                         break;
2798                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2799                         m_op = "rw+wr+ww";
2800                         break;
2801                     case TCG_MO_ALL:
2802                         m_op = "all";
2803                         break;
2804                     default:
2805                         g_assert_not_reached();
2806                     }
2807 
2808                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2809                     i++, k++;
2810                 }
2811                 break;
2812             default:
2813                 break;
2814             }
2815             for (; i < nb_cargs; i++, k++) {
2816                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2817                                   op->args[k]);
2818             }
2819         }
2820 
2821         if (have_prefs || op->life) {
2822             for (; col < 40; ++col) {
2823                 putc(' ', f);
2824             }
2825         }
2826 
2827         if (op->life) {
2828             unsigned life = op->life;
2829 
2830             if (life & (SYNC_ARG * 3)) {
2831                 ne_fprintf(f, "  sync:");
2832                 for (i = 0; i < 2; ++i) {
2833                     if (life & (SYNC_ARG << i)) {
2834                         ne_fprintf(f, " %d", i);
2835                     }
2836                 }
2837             }
2838             life /= DEAD_ARG;
2839             if (life) {
2840                 ne_fprintf(f, "  dead:");
2841                 for (i = 0; life; ++i, life >>= 1) {
2842                     if (life & 1) {
2843                         ne_fprintf(f, " %d", i);
2844                     }
2845                 }
2846             }
2847         }
2848 
2849         if (have_prefs) {
2850             for (i = 0; i < nb_oargs; ++i) {
2851                 TCGRegSet set = output_pref(op, i);
2852 
2853                 if (i == 0) {
2854                     ne_fprintf(f, "  pref=");
2855                 } else {
2856                     ne_fprintf(f, ",");
2857                 }
2858                 if (set == 0) {
2859                     ne_fprintf(f, "none");
2860                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2861                     ne_fprintf(f, "all");
2862 #ifdef CONFIG_DEBUG_TCG
2863                 } else if (tcg_regset_single(set)) {
2864                     TCGReg reg = tcg_regset_first(set);
2865                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2866 #endif
2867                 } else if (TCG_TARGET_NB_REGS <= 32) {
2868                     ne_fprintf(f, "0x%x", (uint32_t)set);
2869                 } else {
2870                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2871                 }
2872             }
2873         }
2874 
2875         putc('\n', f);
2876     }
2877 }
2878 
2879 /* we give more priority to constraints with less registers */
2880 static int get_constraint_priority(const TCGOpDef *def, int k)
2881 {
2882     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2883     int n = ctpop64(arg_ct->regs);
2884 
2885     /*
2886      * Sort constraints of a single register first, which includes output
2887      * aliases (which must exactly match the input already allocated).
2888      */
2889     if (n == 1 || arg_ct->oalias) {
2890         return INT_MAX;
2891     }
2892 
2893     /*
2894      * Sort register pairs next, first then second immediately after.
2895      * Arbitrarily sort multiple pairs by the index of the first reg;
2896      * there shouldn't be many pairs.
2897      */
2898     switch (arg_ct->pair) {
2899     case 1:
2900     case 3:
2901         return (k + 1) * 2;
2902     case 2:
2903         return (arg_ct->pair_index + 1) * 2 - 1;
2904     }
2905 
2906     /* Finally, sort by decreasing register count. */
2907     assert(n > 1);
2908     return -n;
2909 }
2910 
2911 /* sort from highest priority to lowest */
2912 static void sort_constraints(TCGOpDef *def, int start, int n)
2913 {
2914     int i, j;
2915     TCGArgConstraint *a = def->args_ct;
2916 
2917     for (i = 0; i < n; i++) {
2918         a[start + i].sort_index = start + i;
2919     }
2920     if (n <= 1) {
2921         return;
2922     }
2923     for (i = 0; i < n - 1; i++) {
2924         for (j = i + 1; j < n; j++) {
2925             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2926             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2927             if (p1 < p2) {
2928                 int tmp = a[start + i].sort_index;
2929                 a[start + i].sort_index = a[start + j].sort_index;
2930                 a[start + j].sort_index = tmp;
2931             }
2932         }
2933     }
2934 }
2935 
2936 static void process_op_defs(TCGContext *s)
2937 {
2938     TCGOpcode op;
2939 
2940     for (op = 0; op < NB_OPS; op++) {
2941         TCGOpDef *def = &tcg_op_defs[op];
2942         const TCGTargetOpDef *tdefs;
2943         bool saw_alias_pair = false;
2944         int i, o, i2, o2, nb_args;
2945 
2946         if (def->flags & TCG_OPF_NOT_PRESENT) {
2947             continue;
2948         }
2949 
2950         nb_args = def->nb_iargs + def->nb_oargs;
2951         if (nb_args == 0) {
2952             continue;
2953         }
2954 
2955         /*
2956          * Macro magic should make it impossible, but double-check that
2957          * the array index is in range.  Since the signness of an enum
2958          * is implementation defined, force the result to unsigned.
2959          */
2960         unsigned con_set = tcg_target_op_def(op);
2961         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2962         tdefs = &constraint_sets[con_set];
2963 
2964         for (i = 0; i < nb_args; i++) {
2965             const char *ct_str = tdefs->args_ct_str[i];
2966             bool input_p = i >= def->nb_oargs;
2967 
2968             /* Incomplete TCGTargetOpDef entry. */
2969             tcg_debug_assert(ct_str != NULL);
2970 
2971             switch (*ct_str) {
2972             case '0' ... '9':
2973                 o = *ct_str - '0';
2974                 tcg_debug_assert(input_p);
2975                 tcg_debug_assert(o < def->nb_oargs);
2976                 tcg_debug_assert(def->args_ct[o].regs != 0);
2977                 tcg_debug_assert(!def->args_ct[o].oalias);
2978                 def->args_ct[i] = def->args_ct[o];
2979                 /* The output sets oalias.  */
2980                 def->args_ct[o].oalias = 1;
2981                 def->args_ct[o].alias_index = i;
2982                 /* The input sets ialias. */
2983                 def->args_ct[i].ialias = 1;
2984                 def->args_ct[i].alias_index = o;
2985                 if (def->args_ct[i].pair) {
2986                     saw_alias_pair = true;
2987                 }
2988                 tcg_debug_assert(ct_str[1] == '\0');
2989                 continue;
2990 
2991             case '&':
2992                 tcg_debug_assert(!input_p);
2993                 def->args_ct[i].newreg = true;
2994                 ct_str++;
2995                 break;
2996 
2997             case 'p': /* plus */
2998                 /* Allocate to the register after the previous. */
2999                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
3000                 o = i - 1;
3001                 tcg_debug_assert(!def->args_ct[o].pair);
3002                 tcg_debug_assert(!def->args_ct[o].ct);
3003                 def->args_ct[i] = (TCGArgConstraint){
3004                     .pair = 2,
3005                     .pair_index = o,
3006                     .regs = def->args_ct[o].regs << 1,
3007                     .newreg = def->args_ct[o].newreg,
3008                 };
3009                 def->args_ct[o].pair = 1;
3010                 def->args_ct[o].pair_index = i;
3011                 tcg_debug_assert(ct_str[1] == '\0');
3012                 continue;
3013 
3014             case 'm': /* minus */
3015                 /* Allocate to the register before the previous. */
3016                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
3017                 o = i - 1;
3018                 tcg_debug_assert(!def->args_ct[o].pair);
3019                 tcg_debug_assert(!def->args_ct[o].ct);
3020                 def->args_ct[i] = (TCGArgConstraint){
3021                     .pair = 1,
3022                     .pair_index = o,
3023                     .regs = def->args_ct[o].regs >> 1,
3024                     .newreg = def->args_ct[o].newreg,
3025                 };
3026                 def->args_ct[o].pair = 2;
3027                 def->args_ct[o].pair_index = i;
3028                 tcg_debug_assert(ct_str[1] == '\0');
3029                 continue;
3030             }
3031 
3032             do {
3033                 switch (*ct_str) {
3034                 case 'i':
3035                     def->args_ct[i].ct |= TCG_CT_CONST;
3036                     break;
3037 
3038                 /* Include all of the target-specific constraints. */
3039 
3040 #undef CONST
3041 #define CONST(CASE, MASK) \
3042     case CASE: def->args_ct[i].ct |= MASK; break;
3043 #define REGS(CASE, MASK) \
3044     case CASE: def->args_ct[i].regs |= MASK; break;
3045 
3046 #include "tcg-target-con-str.h"
3047 
3048 #undef REGS
3049 #undef CONST
3050                 default:
3051                 case '0' ... '9':
3052                 case '&':
3053                 case 'p':
3054                 case 'm':
3055                     /* Typo in TCGTargetOpDef constraint. */
3056                     g_assert_not_reached();
3057                 }
3058             } while (*++ct_str != '\0');
3059         }
3060 
3061         /* TCGTargetOpDef entry with too much information? */
3062         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
3063 
3064         /*
3065          * Fix up output pairs that are aliased with inputs.
3066          * When we created the alias, we copied pair from the output.
3067          * There are three cases:
3068          *    (1a) Pairs of inputs alias pairs of outputs.
3069          *    (1b) One input aliases the first of a pair of outputs.
3070          *    (2)  One input aliases the second of a pair of outputs.
3071          *
3072          * Case 1a is handled by making sure that the pair_index'es are
3073          * properly updated so that they appear the same as a pair of inputs.
3074          *
3075          * Case 1b is handled by setting the pair_index of the input to
3076          * itself, simply so it doesn't point to an unrelated argument.
3077          * Since we don't encounter the "second" during the input allocation
3078          * phase, nothing happens with the second half of the input pair.
3079          *
3080          * Case 2 is handled by setting the second input to pair=3, the
3081          * first output to pair=3, and the pair_index'es to match.
3082          */
3083         if (saw_alias_pair) {
3084             for (i = def->nb_oargs; i < nb_args; i++) {
3085                 /*
3086                  * Since [0-9pm] must be alone in the constraint string,
3087                  * the only way they can both be set is if the pair comes
3088                  * from the output alias.
3089                  */
3090                 if (!def->args_ct[i].ialias) {
3091                     continue;
3092                 }
3093                 switch (def->args_ct[i].pair) {
3094                 case 0:
3095                     break;
3096                 case 1:
3097                     o = def->args_ct[i].alias_index;
3098                     o2 = def->args_ct[o].pair_index;
3099                     tcg_debug_assert(def->args_ct[o].pair == 1);
3100                     tcg_debug_assert(def->args_ct[o2].pair == 2);
3101                     if (def->args_ct[o2].oalias) {
3102                         /* Case 1a */
3103                         i2 = def->args_ct[o2].alias_index;
3104                         tcg_debug_assert(def->args_ct[i2].pair == 2);
3105                         def->args_ct[i2].pair_index = i;
3106                         def->args_ct[i].pair_index = i2;
3107                     } else {
3108                         /* Case 1b */
3109                         def->args_ct[i].pair_index = i;
3110                     }
3111                     break;
3112                 case 2:
3113                     o = def->args_ct[i].alias_index;
3114                     o2 = def->args_ct[o].pair_index;
3115                     tcg_debug_assert(def->args_ct[o].pair == 2);
3116                     tcg_debug_assert(def->args_ct[o2].pair == 1);
3117                     if (def->args_ct[o2].oalias) {
3118                         /* Case 1a */
3119                         i2 = def->args_ct[o2].alias_index;
3120                         tcg_debug_assert(def->args_ct[i2].pair == 1);
3121                         def->args_ct[i2].pair_index = i;
3122                         def->args_ct[i].pair_index = i2;
3123                     } else {
3124                         /* Case 2 */
3125                         def->args_ct[i].pair = 3;
3126                         def->args_ct[o2].pair = 3;
3127                         def->args_ct[i].pair_index = o2;
3128                         def->args_ct[o2].pair_index = i;
3129                     }
3130                     break;
3131                 default:
3132                     g_assert_not_reached();
3133                 }
3134             }
3135         }
3136 
3137         /* sort the constraints (XXX: this is just an heuristic) */
3138         sort_constraints(def, 0, def->nb_oargs);
3139         sort_constraints(def, def->nb_oargs, def->nb_iargs);
3140     }
3141 }
3142 
3143 static void remove_label_use(TCGOp *op, int idx)
3144 {
3145     TCGLabel *label = arg_label(op->args[idx]);
3146     TCGLabelUse *use;
3147 
3148     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3149         if (use->op == op) {
3150             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3151             return;
3152         }
3153     }
3154     g_assert_not_reached();
3155 }
3156 
3157 void tcg_op_remove(TCGContext *s, TCGOp *op)
3158 {
3159     switch (op->opc) {
3160     case INDEX_op_br:
3161         remove_label_use(op, 0);
3162         break;
3163     case INDEX_op_brcond_i32:
3164     case INDEX_op_brcond_i64:
3165         remove_label_use(op, 3);
3166         break;
3167     case INDEX_op_brcond2_i32:
3168         remove_label_use(op, 5);
3169         break;
3170     default:
3171         break;
3172     }
3173 
3174     QTAILQ_REMOVE(&s->ops, op, link);
3175     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3176     s->nb_ops--;
3177 }
3178 
3179 void tcg_remove_ops_after(TCGOp *op)
3180 {
3181     TCGContext *s = tcg_ctx;
3182 
3183     while (true) {
3184         TCGOp *last = tcg_last_op();
3185         if (last == op) {
3186             return;
3187         }
3188         tcg_op_remove(s, last);
3189     }
3190 }
3191 
3192 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3193 {
3194     TCGContext *s = tcg_ctx;
3195     TCGOp *op = NULL;
3196 
3197     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3198         QTAILQ_FOREACH(op, &s->free_ops, link) {
3199             if (nargs <= op->nargs) {
3200                 QTAILQ_REMOVE(&s->free_ops, op, link);
3201                 nargs = op->nargs;
3202                 goto found;
3203             }
3204         }
3205     }
3206 
3207     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3208     nargs = MAX(4, nargs);
3209     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3210 
3211  found:
3212     memset(op, 0, offsetof(TCGOp, link));
3213     op->opc = opc;
3214     op->nargs = nargs;
3215 
3216     /* Check for bitfield overflow. */
3217     tcg_debug_assert(op->nargs == nargs);
3218 
3219     s->nb_ops++;
3220     return op;
3221 }
3222 
3223 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3224 {
3225     TCGOp *op = tcg_op_alloc(opc, nargs);
3226 
3227     if (tcg_ctx->emit_before_op) {
3228         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3229     } else {
3230         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3231     }
3232     return op;
3233 }
3234 
3235 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3236                             TCGOpcode opc, unsigned nargs)
3237 {
3238     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3239     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3240     return new_op;
3241 }
3242 
3243 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3244                            TCGOpcode opc, unsigned nargs)
3245 {
3246     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3247     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3248     return new_op;
3249 }
3250 
3251 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3252 {
3253     TCGLabelUse *u;
3254 
3255     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3256         TCGOp *op = u->op;
3257         switch (op->opc) {
3258         case INDEX_op_br:
3259             op->args[0] = label_arg(to);
3260             break;
3261         case INDEX_op_brcond_i32:
3262         case INDEX_op_brcond_i64:
3263             op->args[3] = label_arg(to);
3264             break;
3265         case INDEX_op_brcond2_i32:
3266             op->args[5] = label_arg(to);
3267             break;
3268         default:
3269             g_assert_not_reached();
3270         }
3271     }
3272 
3273     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3274 }
3275 
3276 /* Reachable analysis : remove unreachable code.  */
3277 static void __attribute__((noinline))
3278 reachable_code_pass(TCGContext *s)
3279 {
3280     TCGOp *op, *op_next, *op_prev;
3281     bool dead = false;
3282 
3283     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3284         bool remove = dead;
3285         TCGLabel *label;
3286 
3287         switch (op->opc) {
3288         case INDEX_op_set_label:
3289             label = arg_label(op->args[0]);
3290 
3291             /*
3292              * Note that the first op in the TB is always a load,
3293              * so there is always something before a label.
3294              */
3295             op_prev = QTAILQ_PREV(op, link);
3296 
3297             /*
3298              * If we find two sequential labels, move all branches to
3299              * reference the second label and remove the first label.
3300              * Do this before branch to next optimization, so that the
3301              * middle label is out of the way.
3302              */
3303             if (op_prev->opc == INDEX_op_set_label) {
3304                 move_label_uses(label, arg_label(op_prev->args[0]));
3305                 tcg_op_remove(s, op_prev);
3306                 op_prev = QTAILQ_PREV(op, link);
3307             }
3308 
3309             /*
3310              * Optimization can fold conditional branches to unconditional.
3311              * If we find a label which is preceded by an unconditional
3312              * branch to next, remove the branch.  We couldn't do this when
3313              * processing the branch because any dead code between the branch
3314              * and label had not yet been removed.
3315              */
3316             if (op_prev->opc == INDEX_op_br &&
3317                 label == arg_label(op_prev->args[0])) {
3318                 tcg_op_remove(s, op_prev);
3319                 /* Fall through means insns become live again.  */
3320                 dead = false;
3321             }
3322 
3323             if (QSIMPLEQ_EMPTY(&label->branches)) {
3324                 /*
3325                  * While there is an occasional backward branch, virtually
3326                  * all branches generated by the translators are forward.
3327                  * Which means that generally we will have already removed
3328                  * all references to the label that will be, and there is
3329                  * little to be gained by iterating.
3330                  */
3331                 remove = true;
3332             } else {
3333                 /* Once we see a label, insns become live again.  */
3334                 dead = false;
3335                 remove = false;
3336             }
3337             break;
3338 
3339         case INDEX_op_br:
3340         case INDEX_op_exit_tb:
3341         case INDEX_op_goto_ptr:
3342             /* Unconditional branches; everything following is dead.  */
3343             dead = true;
3344             break;
3345 
3346         case INDEX_op_call:
3347             /* Notice noreturn helper calls, raising exceptions.  */
3348             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3349                 dead = true;
3350             }
3351             break;
3352 
3353         case INDEX_op_insn_start:
3354             /* Never remove -- we need to keep these for unwind.  */
3355             remove = false;
3356             break;
3357 
3358         default:
3359             break;
3360         }
3361 
3362         if (remove) {
3363             tcg_op_remove(s, op);
3364         }
3365     }
3366 }
3367 
3368 #define TS_DEAD  1
3369 #define TS_MEM   2
3370 
3371 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3372 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3373 
3374 /* For liveness_pass_1, the register preferences for a given temp.  */
3375 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3376 {
3377     return ts->state_ptr;
3378 }
3379 
3380 /* For liveness_pass_1, reset the preferences for a given temp to the
3381  * maximal regset for its type.
3382  */
3383 static inline void la_reset_pref(TCGTemp *ts)
3384 {
3385     *la_temp_pref(ts)
3386         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3387 }
3388 
3389 /* liveness analysis: end of function: all temps are dead, and globals
3390    should be in memory. */
3391 static void la_func_end(TCGContext *s, int ng, int nt)
3392 {
3393     int i;
3394 
3395     for (i = 0; i < ng; ++i) {
3396         s->temps[i].state = TS_DEAD | TS_MEM;
3397         la_reset_pref(&s->temps[i]);
3398     }
3399     for (i = ng; i < nt; ++i) {
3400         s->temps[i].state = TS_DEAD;
3401         la_reset_pref(&s->temps[i]);
3402     }
3403 }
3404 
3405 /* liveness analysis: end of basic block: all temps are dead, globals
3406    and local temps should be in memory. */
3407 static void la_bb_end(TCGContext *s, int ng, int nt)
3408 {
3409     int i;
3410 
3411     for (i = 0; i < nt; ++i) {
3412         TCGTemp *ts = &s->temps[i];
3413         int state;
3414 
3415         switch (ts->kind) {
3416         case TEMP_FIXED:
3417         case TEMP_GLOBAL:
3418         case TEMP_TB:
3419             state = TS_DEAD | TS_MEM;
3420             break;
3421         case TEMP_EBB:
3422         case TEMP_CONST:
3423             state = TS_DEAD;
3424             break;
3425         default:
3426             g_assert_not_reached();
3427         }
3428         ts->state = state;
3429         la_reset_pref(ts);
3430     }
3431 }
3432 
3433 /* liveness analysis: sync globals back to memory.  */
3434 static void la_global_sync(TCGContext *s, int ng)
3435 {
3436     int i;
3437 
3438     for (i = 0; i < ng; ++i) {
3439         int state = s->temps[i].state;
3440         s->temps[i].state = state | TS_MEM;
3441         if (state == TS_DEAD) {
3442             /* If the global was previously dead, reset prefs.  */
3443             la_reset_pref(&s->temps[i]);
3444         }
3445     }
3446 }
3447 
3448 /*
3449  * liveness analysis: conditional branch: all temps are dead unless
3450  * explicitly live-across-conditional-branch, globals and local temps
3451  * should be synced.
3452  */
3453 static void la_bb_sync(TCGContext *s, int ng, int nt)
3454 {
3455     la_global_sync(s, ng);
3456 
3457     for (int i = ng; i < nt; ++i) {
3458         TCGTemp *ts = &s->temps[i];
3459         int state;
3460 
3461         switch (ts->kind) {
3462         case TEMP_TB:
3463             state = ts->state;
3464             ts->state = state | TS_MEM;
3465             if (state != TS_DEAD) {
3466                 continue;
3467             }
3468             break;
3469         case TEMP_EBB:
3470         case TEMP_CONST:
3471             continue;
3472         default:
3473             g_assert_not_reached();
3474         }
3475         la_reset_pref(&s->temps[i]);
3476     }
3477 }
3478 
3479 /* liveness analysis: sync globals back to memory and kill.  */
3480 static void la_global_kill(TCGContext *s, int ng)
3481 {
3482     int i;
3483 
3484     for (i = 0; i < ng; i++) {
3485         s->temps[i].state = TS_DEAD | TS_MEM;
3486         la_reset_pref(&s->temps[i]);
3487     }
3488 }
3489 
3490 /* liveness analysis: note live globals crossing calls.  */
3491 static void la_cross_call(TCGContext *s, int nt)
3492 {
3493     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3494     int i;
3495 
3496     for (i = 0; i < nt; i++) {
3497         TCGTemp *ts = &s->temps[i];
3498         if (!(ts->state & TS_DEAD)) {
3499             TCGRegSet *pset = la_temp_pref(ts);
3500             TCGRegSet set = *pset;
3501 
3502             set &= mask;
3503             /* If the combination is not possible, restart.  */
3504             if (set == 0) {
3505                 set = tcg_target_available_regs[ts->type] & mask;
3506             }
3507             *pset = set;
3508         }
3509     }
3510 }
3511 
3512 /*
3513  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3514  * to TEMP_EBB, if possible.
3515  */
3516 static void __attribute__((noinline))
3517 liveness_pass_0(TCGContext *s)
3518 {
3519     void * const multiple_ebb = (void *)(uintptr_t)-1;
3520     int nb_temps = s->nb_temps;
3521     TCGOp *op, *ebb;
3522 
3523     for (int i = s->nb_globals; i < nb_temps; ++i) {
3524         s->temps[i].state_ptr = NULL;
3525     }
3526 
3527     /*
3528      * Represent each EBB by the op at which it begins.  In the case of
3529      * the first EBB, this is the first op, otherwise it is a label.
3530      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3531      * within a single EBB, else MULTIPLE_EBB.
3532      */
3533     ebb = QTAILQ_FIRST(&s->ops);
3534     QTAILQ_FOREACH(op, &s->ops, link) {
3535         const TCGOpDef *def;
3536         int nb_oargs, nb_iargs;
3537 
3538         switch (op->opc) {
3539         case INDEX_op_set_label:
3540             ebb = op;
3541             continue;
3542         case INDEX_op_discard:
3543             continue;
3544         case INDEX_op_call:
3545             nb_oargs = TCGOP_CALLO(op);
3546             nb_iargs = TCGOP_CALLI(op);
3547             break;
3548         default:
3549             def = &tcg_op_defs[op->opc];
3550             nb_oargs = def->nb_oargs;
3551             nb_iargs = def->nb_iargs;
3552             break;
3553         }
3554 
3555         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3556             TCGTemp *ts = arg_temp(op->args[i]);
3557 
3558             if (ts->kind != TEMP_TB) {
3559                 continue;
3560             }
3561             if (ts->state_ptr == NULL) {
3562                 ts->state_ptr = ebb;
3563             } else if (ts->state_ptr != ebb) {
3564                 ts->state_ptr = multiple_ebb;
3565             }
3566         }
3567     }
3568 
3569     /*
3570      * For TEMP_TB that turned out not to be used beyond one EBB,
3571      * reduce the liveness to TEMP_EBB.
3572      */
3573     for (int i = s->nb_globals; i < nb_temps; ++i) {
3574         TCGTemp *ts = &s->temps[i];
3575         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3576             ts->kind = TEMP_EBB;
3577         }
3578     }
3579 }
3580 
3581 /* Liveness analysis : update the opc_arg_life array to tell if a
3582    given input arguments is dead. Instructions updating dead
3583    temporaries are removed. */
3584 static void __attribute__((noinline))
3585 liveness_pass_1(TCGContext *s)
3586 {
3587     int nb_globals = s->nb_globals;
3588     int nb_temps = s->nb_temps;
3589     TCGOp *op, *op_prev;
3590     TCGRegSet *prefs;
3591     int i;
3592 
3593     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3594     for (i = 0; i < nb_temps; ++i) {
3595         s->temps[i].state_ptr = prefs + i;
3596     }
3597 
3598     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3599     la_func_end(s, nb_globals, nb_temps);
3600 
3601     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3602         int nb_iargs, nb_oargs;
3603         TCGOpcode opc_new, opc_new2;
3604         bool have_opc_new2;
3605         TCGLifeData arg_life = 0;
3606         TCGTemp *ts;
3607         TCGOpcode opc = op->opc;
3608         const TCGOpDef *def = &tcg_op_defs[opc];
3609 
3610         switch (opc) {
3611         case INDEX_op_call:
3612             {
3613                 const TCGHelperInfo *info = tcg_call_info(op);
3614                 int call_flags = tcg_call_flags(op);
3615 
3616                 nb_oargs = TCGOP_CALLO(op);
3617                 nb_iargs = TCGOP_CALLI(op);
3618 
3619                 /* pure functions can be removed if their result is unused */
3620                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3621                     for (i = 0; i < nb_oargs; i++) {
3622                         ts = arg_temp(op->args[i]);
3623                         if (ts->state != TS_DEAD) {
3624                             goto do_not_remove_call;
3625                         }
3626                     }
3627                     goto do_remove;
3628                 }
3629             do_not_remove_call:
3630 
3631                 /* Output args are dead.  */
3632                 for (i = 0; i < nb_oargs; i++) {
3633                     ts = arg_temp(op->args[i]);
3634                     if (ts->state & TS_DEAD) {
3635                         arg_life |= DEAD_ARG << i;
3636                     }
3637                     if (ts->state & TS_MEM) {
3638                         arg_life |= SYNC_ARG << i;
3639                     }
3640                     ts->state = TS_DEAD;
3641                     la_reset_pref(ts);
3642                 }
3643 
3644                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3645                 memset(op->output_pref, 0, sizeof(op->output_pref));
3646 
3647                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3648                                     TCG_CALL_NO_READ_GLOBALS))) {
3649                     la_global_kill(s, nb_globals);
3650                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3651                     la_global_sync(s, nb_globals);
3652                 }
3653 
3654                 /* Record arguments that die in this helper.  */
3655                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3656                     ts = arg_temp(op->args[i]);
3657                     if (ts->state & TS_DEAD) {
3658                         arg_life |= DEAD_ARG << i;
3659                     }
3660                 }
3661 
3662                 /* For all live registers, remove call-clobbered prefs.  */
3663                 la_cross_call(s, nb_temps);
3664 
3665                 /*
3666                  * Input arguments are live for preceding opcodes.
3667                  *
3668                  * For those arguments that die, and will be allocated in
3669                  * registers, clear the register set for that arg, to be
3670                  * filled in below.  For args that will be on the stack,
3671                  * reset to any available reg.  Process arguments in reverse
3672                  * order so that if a temp is used more than once, the stack
3673                  * reset to max happens before the register reset to 0.
3674                  */
3675                 for (i = nb_iargs - 1; i >= 0; i--) {
3676                     const TCGCallArgumentLoc *loc = &info->in[i];
3677                     ts = arg_temp(op->args[nb_oargs + i]);
3678 
3679                     if (ts->state & TS_DEAD) {
3680                         switch (loc->kind) {
3681                         case TCG_CALL_ARG_NORMAL:
3682                         case TCG_CALL_ARG_EXTEND_U:
3683                         case TCG_CALL_ARG_EXTEND_S:
3684                             if (arg_slot_reg_p(loc->arg_slot)) {
3685                                 *la_temp_pref(ts) = 0;
3686                                 break;
3687                             }
3688                             /* fall through */
3689                         default:
3690                             *la_temp_pref(ts) =
3691                                 tcg_target_available_regs[ts->type];
3692                             break;
3693                         }
3694                         ts->state &= ~TS_DEAD;
3695                     }
3696                 }
3697 
3698                 /*
3699                  * For each input argument, add its input register to prefs.
3700                  * If a temp is used once, this produces a single set bit;
3701                  * if a temp is used multiple times, this produces a set.
3702                  */
3703                 for (i = 0; i < nb_iargs; i++) {
3704                     const TCGCallArgumentLoc *loc = &info->in[i];
3705                     ts = arg_temp(op->args[nb_oargs + i]);
3706 
3707                     switch (loc->kind) {
3708                     case TCG_CALL_ARG_NORMAL:
3709                     case TCG_CALL_ARG_EXTEND_U:
3710                     case TCG_CALL_ARG_EXTEND_S:
3711                         if (arg_slot_reg_p(loc->arg_slot)) {
3712                             tcg_regset_set_reg(*la_temp_pref(ts),
3713                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3714                         }
3715                         break;
3716                     default:
3717                         break;
3718                     }
3719                 }
3720             }
3721             break;
3722         case INDEX_op_insn_start:
3723             break;
3724         case INDEX_op_discard:
3725             /* mark the temporary as dead */
3726             ts = arg_temp(op->args[0]);
3727             ts->state = TS_DEAD;
3728             la_reset_pref(ts);
3729             break;
3730 
3731         case INDEX_op_add2_i32:
3732             opc_new = INDEX_op_add_i32;
3733             goto do_addsub2;
3734         case INDEX_op_sub2_i32:
3735             opc_new = INDEX_op_sub_i32;
3736             goto do_addsub2;
3737         case INDEX_op_add2_i64:
3738             opc_new = INDEX_op_add_i64;
3739             goto do_addsub2;
3740         case INDEX_op_sub2_i64:
3741             opc_new = INDEX_op_sub_i64;
3742         do_addsub2:
3743             nb_iargs = 4;
3744             nb_oargs = 2;
3745             /* Test if the high part of the operation is dead, but not
3746                the low part.  The result can be optimized to a simple
3747                add or sub.  This happens often for x86_64 guest when the
3748                cpu mode is set to 32 bit.  */
3749             if (arg_temp(op->args[1])->state == TS_DEAD) {
3750                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3751                     goto do_remove;
3752                 }
3753                 /* Replace the opcode and adjust the args in place,
3754                    leaving 3 unused args at the end.  */
3755                 op->opc = opc = opc_new;
3756                 op->args[1] = op->args[2];
3757                 op->args[2] = op->args[4];
3758                 /* Fall through and mark the single-word operation live.  */
3759                 nb_iargs = 2;
3760                 nb_oargs = 1;
3761             }
3762             goto do_not_remove;
3763 
3764         case INDEX_op_mulu2_i32:
3765             opc_new = INDEX_op_mul_i32;
3766             opc_new2 = INDEX_op_muluh_i32;
3767             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3768             goto do_mul2;
3769         case INDEX_op_muls2_i32:
3770             opc_new = INDEX_op_mul_i32;
3771             opc_new2 = INDEX_op_mulsh_i32;
3772             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3773             goto do_mul2;
3774         case INDEX_op_mulu2_i64:
3775             opc_new = INDEX_op_mul_i64;
3776             opc_new2 = INDEX_op_muluh_i64;
3777             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3778             goto do_mul2;
3779         case INDEX_op_muls2_i64:
3780             opc_new = INDEX_op_mul_i64;
3781             opc_new2 = INDEX_op_mulsh_i64;
3782             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3783             goto do_mul2;
3784         do_mul2:
3785             nb_iargs = 2;
3786             nb_oargs = 2;
3787             if (arg_temp(op->args[1])->state == TS_DEAD) {
3788                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3789                     /* Both parts of the operation are dead.  */
3790                     goto do_remove;
3791                 }
3792                 /* The high part of the operation is dead; generate the low. */
3793                 op->opc = opc = opc_new;
3794                 op->args[1] = op->args[2];
3795                 op->args[2] = op->args[3];
3796             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3797                 /* The low part of the operation is dead; generate the high. */
3798                 op->opc = opc = opc_new2;
3799                 op->args[0] = op->args[1];
3800                 op->args[1] = op->args[2];
3801                 op->args[2] = op->args[3];
3802             } else {
3803                 goto do_not_remove;
3804             }
3805             /* Mark the single-word operation live.  */
3806             nb_oargs = 1;
3807             goto do_not_remove;
3808 
3809         default:
3810             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3811             nb_iargs = def->nb_iargs;
3812             nb_oargs = def->nb_oargs;
3813 
3814             /* Test if the operation can be removed because all
3815                its outputs are dead. We assume that nb_oargs == 0
3816                implies side effects */
3817             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3818                 for (i = 0; i < nb_oargs; i++) {
3819                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3820                         goto do_not_remove;
3821                     }
3822                 }
3823                 goto do_remove;
3824             }
3825             goto do_not_remove;
3826 
3827         do_remove:
3828             tcg_op_remove(s, op);
3829             break;
3830 
3831         do_not_remove:
3832             for (i = 0; i < nb_oargs; i++) {
3833                 ts = arg_temp(op->args[i]);
3834 
3835                 /* Remember the preference of the uses that followed.  */
3836                 if (i < ARRAY_SIZE(op->output_pref)) {
3837                     op->output_pref[i] = *la_temp_pref(ts);
3838                 }
3839 
3840                 /* Output args are dead.  */
3841                 if (ts->state & TS_DEAD) {
3842                     arg_life |= DEAD_ARG << i;
3843                 }
3844                 if (ts->state & TS_MEM) {
3845                     arg_life |= SYNC_ARG << i;
3846                 }
3847                 ts->state = TS_DEAD;
3848                 la_reset_pref(ts);
3849             }
3850 
3851             /* If end of basic block, update.  */
3852             if (def->flags & TCG_OPF_BB_EXIT) {
3853                 la_func_end(s, nb_globals, nb_temps);
3854             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3855                 la_bb_sync(s, nb_globals, nb_temps);
3856             } else if (def->flags & TCG_OPF_BB_END) {
3857                 la_bb_end(s, nb_globals, nb_temps);
3858             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3859                 la_global_sync(s, nb_globals);
3860                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3861                     la_cross_call(s, nb_temps);
3862                 }
3863             }
3864 
3865             /* Record arguments that die in this opcode.  */
3866             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3867                 ts = arg_temp(op->args[i]);
3868                 if (ts->state & TS_DEAD) {
3869                     arg_life |= DEAD_ARG << i;
3870                 }
3871             }
3872 
3873             /* Input arguments are live for preceding opcodes.  */
3874             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3875                 ts = arg_temp(op->args[i]);
3876                 if (ts->state & TS_DEAD) {
3877                     /* For operands that were dead, initially allow
3878                        all regs for the type.  */
3879                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3880                     ts->state &= ~TS_DEAD;
3881                 }
3882             }
3883 
3884             /* Incorporate constraints for this operand.  */
3885             switch (opc) {
3886             case INDEX_op_mov_i32:
3887             case INDEX_op_mov_i64:
3888                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3889                    have proper constraints.  That said, special case
3890                    moves to propagate preferences backward.  */
3891                 if (IS_DEAD_ARG(1)) {
3892                     *la_temp_pref(arg_temp(op->args[0]))
3893                         = *la_temp_pref(arg_temp(op->args[1]));
3894                 }
3895                 break;
3896 
3897             default:
3898                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3899                     const TCGArgConstraint *ct = &def->args_ct[i];
3900                     TCGRegSet set, *pset;
3901 
3902                     ts = arg_temp(op->args[i]);
3903                     pset = la_temp_pref(ts);
3904                     set = *pset;
3905 
3906                     set &= ct->regs;
3907                     if (ct->ialias) {
3908                         set &= output_pref(op, ct->alias_index);
3909                     }
3910                     /* If the combination is not possible, restart.  */
3911                     if (set == 0) {
3912                         set = ct->regs;
3913                     }
3914                     *pset = set;
3915                 }
3916                 break;
3917             }
3918             break;
3919         }
3920         op->life = arg_life;
3921     }
3922 }
3923 
3924 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3925 static bool __attribute__((noinline))
3926 liveness_pass_2(TCGContext *s)
3927 {
3928     int nb_globals = s->nb_globals;
3929     int nb_temps, i;
3930     bool changes = false;
3931     TCGOp *op, *op_next;
3932 
3933     /* Create a temporary for each indirect global.  */
3934     for (i = 0; i < nb_globals; ++i) {
3935         TCGTemp *its = &s->temps[i];
3936         if (its->indirect_reg) {
3937             TCGTemp *dts = tcg_temp_alloc(s);
3938             dts->type = its->type;
3939             dts->base_type = its->base_type;
3940             dts->temp_subindex = its->temp_subindex;
3941             dts->kind = TEMP_EBB;
3942             its->state_ptr = dts;
3943         } else {
3944             its->state_ptr = NULL;
3945         }
3946         /* All globals begin dead.  */
3947         its->state = TS_DEAD;
3948     }
3949     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3950         TCGTemp *its = &s->temps[i];
3951         its->state_ptr = NULL;
3952         its->state = TS_DEAD;
3953     }
3954 
3955     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3956         TCGOpcode opc = op->opc;
3957         const TCGOpDef *def = &tcg_op_defs[opc];
3958         TCGLifeData arg_life = op->life;
3959         int nb_iargs, nb_oargs, call_flags;
3960         TCGTemp *arg_ts, *dir_ts;
3961 
3962         if (opc == INDEX_op_call) {
3963             nb_oargs = TCGOP_CALLO(op);
3964             nb_iargs = TCGOP_CALLI(op);
3965             call_flags = tcg_call_flags(op);
3966         } else {
3967             nb_iargs = def->nb_iargs;
3968             nb_oargs = def->nb_oargs;
3969 
3970             /* Set flags similar to how calls require.  */
3971             if (def->flags & TCG_OPF_COND_BRANCH) {
3972                 /* Like reading globals: sync_globals */
3973                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3974             } else if (def->flags & TCG_OPF_BB_END) {
3975                 /* Like writing globals: save_globals */
3976                 call_flags = 0;
3977             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3978                 /* Like reading globals: sync_globals */
3979                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3980             } else {
3981                 /* No effect on globals.  */
3982                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3983                               TCG_CALL_NO_WRITE_GLOBALS);
3984             }
3985         }
3986 
3987         /* Make sure that input arguments are available.  */
3988         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3989             arg_ts = arg_temp(op->args[i]);
3990             dir_ts = arg_ts->state_ptr;
3991             if (dir_ts && arg_ts->state == TS_DEAD) {
3992                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3993                                   ? INDEX_op_ld_i32
3994                                   : INDEX_op_ld_i64);
3995                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3996 
3997                 lop->args[0] = temp_arg(dir_ts);
3998                 lop->args[1] = temp_arg(arg_ts->mem_base);
3999                 lop->args[2] = arg_ts->mem_offset;
4000 
4001                 /* Loaded, but synced with memory.  */
4002                 arg_ts->state = TS_MEM;
4003             }
4004         }
4005 
4006         /* Perform input replacement, and mark inputs that became dead.
4007            No action is required except keeping temp_state up to date
4008            so that we reload when needed.  */
4009         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4010             arg_ts = arg_temp(op->args[i]);
4011             dir_ts = arg_ts->state_ptr;
4012             if (dir_ts) {
4013                 op->args[i] = temp_arg(dir_ts);
4014                 changes = true;
4015                 if (IS_DEAD_ARG(i)) {
4016                     arg_ts->state = TS_DEAD;
4017                 }
4018             }
4019         }
4020 
4021         /* Liveness analysis should ensure that the following are
4022            all correct, for call sites and basic block end points.  */
4023         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4024             /* Nothing to do */
4025         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4026             for (i = 0; i < nb_globals; ++i) {
4027                 /* Liveness should see that globals are synced back,
4028                    that is, either TS_DEAD or TS_MEM.  */
4029                 arg_ts = &s->temps[i];
4030                 tcg_debug_assert(arg_ts->state_ptr == 0
4031                                  || arg_ts->state != 0);
4032             }
4033         } else {
4034             for (i = 0; i < nb_globals; ++i) {
4035                 /* Liveness should see that globals are saved back,
4036                    that is, TS_DEAD, waiting to be reloaded.  */
4037                 arg_ts = &s->temps[i];
4038                 tcg_debug_assert(arg_ts->state_ptr == 0
4039                                  || arg_ts->state == TS_DEAD);
4040             }
4041         }
4042 
4043         /* Outputs become available.  */
4044         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
4045             arg_ts = arg_temp(op->args[0]);
4046             dir_ts = arg_ts->state_ptr;
4047             if (dir_ts) {
4048                 op->args[0] = temp_arg(dir_ts);
4049                 changes = true;
4050 
4051                 /* The output is now live and modified.  */
4052                 arg_ts->state = 0;
4053 
4054                 if (NEED_SYNC_ARG(0)) {
4055                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4056                                       ? INDEX_op_st_i32
4057                                       : INDEX_op_st_i64);
4058                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4059                     TCGTemp *out_ts = dir_ts;
4060 
4061                     if (IS_DEAD_ARG(0)) {
4062                         out_ts = arg_temp(op->args[1]);
4063                         arg_ts->state = TS_DEAD;
4064                         tcg_op_remove(s, op);
4065                     } else {
4066                         arg_ts->state = TS_MEM;
4067                     }
4068 
4069                     sop->args[0] = temp_arg(out_ts);
4070                     sop->args[1] = temp_arg(arg_ts->mem_base);
4071                     sop->args[2] = arg_ts->mem_offset;
4072                 } else {
4073                     tcg_debug_assert(!IS_DEAD_ARG(0));
4074                 }
4075             }
4076         } else {
4077             for (i = 0; i < nb_oargs; i++) {
4078                 arg_ts = arg_temp(op->args[i]);
4079                 dir_ts = arg_ts->state_ptr;
4080                 if (!dir_ts) {
4081                     continue;
4082                 }
4083                 op->args[i] = temp_arg(dir_ts);
4084                 changes = true;
4085 
4086                 /* The output is now live and modified.  */
4087                 arg_ts->state = 0;
4088 
4089                 /* Sync outputs upon their last write.  */
4090                 if (NEED_SYNC_ARG(i)) {
4091                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4092                                       ? INDEX_op_st_i32
4093                                       : INDEX_op_st_i64);
4094                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4095 
4096                     sop->args[0] = temp_arg(dir_ts);
4097                     sop->args[1] = temp_arg(arg_ts->mem_base);
4098                     sop->args[2] = arg_ts->mem_offset;
4099 
4100                     arg_ts->state = TS_MEM;
4101                 }
4102                 /* Drop outputs that are dead.  */
4103                 if (IS_DEAD_ARG(i)) {
4104                     arg_ts->state = TS_DEAD;
4105                 }
4106             }
4107         }
4108     }
4109 
4110     return changes;
4111 }
4112 
4113 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4114 {
4115     intptr_t off;
4116     int size, align;
4117 
4118     /* When allocating an object, look at the full type. */
4119     size = tcg_type_size(ts->base_type);
4120     switch (ts->base_type) {
4121     case TCG_TYPE_I32:
4122         align = 4;
4123         break;
4124     case TCG_TYPE_I64:
4125     case TCG_TYPE_V64:
4126         align = 8;
4127         break;
4128     case TCG_TYPE_I128:
4129     case TCG_TYPE_V128:
4130     case TCG_TYPE_V256:
4131         /*
4132          * Note that we do not require aligned storage for V256,
4133          * and that we provide alignment for I128 to match V128,
4134          * even if that's above what the host ABI requires.
4135          */
4136         align = 16;
4137         break;
4138     default:
4139         g_assert_not_reached();
4140     }
4141 
4142     /*
4143      * Assume the stack is sufficiently aligned.
4144      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4145      * and do not require 16 byte vector alignment.  This seems slightly
4146      * easier than fully parameterizing the above switch statement.
4147      */
4148     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4149     off = ROUND_UP(s->current_frame_offset, align);
4150 
4151     /* If we've exhausted the stack frame, restart with a smaller TB. */
4152     if (off + size > s->frame_end) {
4153         tcg_raise_tb_overflow(s);
4154     }
4155     s->current_frame_offset = off + size;
4156 #if defined(__sparc__)
4157     off += TCG_TARGET_STACK_BIAS;
4158 #endif
4159 
4160     /* If the object was subdivided, assign memory to all the parts. */
4161     if (ts->base_type != ts->type) {
4162         int part_size = tcg_type_size(ts->type);
4163         int part_count = size / part_size;
4164 
4165         /*
4166          * Each part is allocated sequentially in tcg_temp_new_internal.
4167          * Jump back to the first part by subtracting the current index.
4168          */
4169         ts -= ts->temp_subindex;
4170         for (int i = 0; i < part_count; ++i) {
4171             ts[i].mem_offset = off + i * part_size;
4172             ts[i].mem_base = s->frame_temp;
4173             ts[i].mem_allocated = 1;
4174         }
4175     } else {
4176         ts->mem_offset = off;
4177         ts->mem_base = s->frame_temp;
4178         ts->mem_allocated = 1;
4179     }
4180 }
4181 
4182 /* Assign @reg to @ts, and update reg_to_temp[]. */
4183 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4184 {
4185     if (ts->val_type == TEMP_VAL_REG) {
4186         TCGReg old = ts->reg;
4187         tcg_debug_assert(s->reg_to_temp[old] == ts);
4188         if (old == reg) {
4189             return;
4190         }
4191         s->reg_to_temp[old] = NULL;
4192     }
4193     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4194     s->reg_to_temp[reg] = ts;
4195     ts->val_type = TEMP_VAL_REG;
4196     ts->reg = reg;
4197 }
4198 
4199 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4200 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4201 {
4202     tcg_debug_assert(type != TEMP_VAL_REG);
4203     if (ts->val_type == TEMP_VAL_REG) {
4204         TCGReg reg = ts->reg;
4205         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4206         s->reg_to_temp[reg] = NULL;
4207     }
4208     ts->val_type = type;
4209 }
4210 
4211 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4212 
4213 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4214    mark it free; otherwise mark it dead.  */
4215 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4216 {
4217     TCGTempVal new_type;
4218 
4219     switch (ts->kind) {
4220     case TEMP_FIXED:
4221         return;
4222     case TEMP_GLOBAL:
4223     case TEMP_TB:
4224         new_type = TEMP_VAL_MEM;
4225         break;
4226     case TEMP_EBB:
4227         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4228         break;
4229     case TEMP_CONST:
4230         new_type = TEMP_VAL_CONST;
4231         break;
4232     default:
4233         g_assert_not_reached();
4234     }
4235     set_temp_val_nonreg(s, ts, new_type);
4236 }
4237 
4238 /* Mark a temporary as dead.  */
4239 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4240 {
4241     temp_free_or_dead(s, ts, 1);
4242 }
4243 
4244 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4245    registers needs to be allocated to store a constant.  If 'free_or_dead'
4246    is non-zero, subsequently release the temporary; if it is positive, the
4247    temp is dead; if it is negative, the temp is free.  */
4248 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4249                       TCGRegSet preferred_regs, int free_or_dead)
4250 {
4251     if (!temp_readonly(ts) && !ts->mem_coherent) {
4252         if (!ts->mem_allocated) {
4253             temp_allocate_frame(s, ts);
4254         }
4255         switch (ts->val_type) {
4256         case TEMP_VAL_CONST:
4257             /* If we're going to free the temp immediately, then we won't
4258                require it later in a register, so attempt to store the
4259                constant to memory directly.  */
4260             if (free_or_dead
4261                 && tcg_out_sti(s, ts->type, ts->val,
4262                                ts->mem_base->reg, ts->mem_offset)) {
4263                 break;
4264             }
4265             temp_load(s, ts, tcg_target_available_regs[ts->type],
4266                       allocated_regs, preferred_regs);
4267             /* fallthrough */
4268 
4269         case TEMP_VAL_REG:
4270             tcg_out_st(s, ts->type, ts->reg,
4271                        ts->mem_base->reg, ts->mem_offset);
4272             break;
4273 
4274         case TEMP_VAL_MEM:
4275             break;
4276 
4277         case TEMP_VAL_DEAD:
4278         default:
4279             g_assert_not_reached();
4280         }
4281         ts->mem_coherent = 1;
4282     }
4283     if (free_or_dead) {
4284         temp_free_or_dead(s, ts, free_or_dead);
4285     }
4286 }
4287 
4288 /* free register 'reg' by spilling the corresponding temporary if necessary */
4289 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4290 {
4291     TCGTemp *ts = s->reg_to_temp[reg];
4292     if (ts != NULL) {
4293         temp_sync(s, ts, allocated_regs, 0, -1);
4294     }
4295 }
4296 
4297 /**
4298  * tcg_reg_alloc:
4299  * @required_regs: Set of registers in which we must allocate.
4300  * @allocated_regs: Set of registers which must be avoided.
4301  * @preferred_regs: Set of registers we should prefer.
4302  * @rev: True if we search the registers in "indirect" order.
4303  *
4304  * The allocated register must be in @required_regs & ~@allocated_regs,
4305  * but if we can put it in @preferred_regs we may save a move later.
4306  */
4307 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4308                             TCGRegSet allocated_regs,
4309                             TCGRegSet preferred_regs, bool rev)
4310 {
4311     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4312     TCGRegSet reg_ct[2];
4313     const int *order;
4314 
4315     reg_ct[1] = required_regs & ~allocated_regs;
4316     tcg_debug_assert(reg_ct[1] != 0);
4317     reg_ct[0] = reg_ct[1] & preferred_regs;
4318 
4319     /* Skip the preferred_regs option if it cannot be satisfied,
4320        or if the preference made no difference.  */
4321     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4322 
4323     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4324 
4325     /* Try free registers, preferences first.  */
4326     for (j = f; j < 2; j++) {
4327         TCGRegSet set = reg_ct[j];
4328 
4329         if (tcg_regset_single(set)) {
4330             /* One register in the set.  */
4331             TCGReg reg = tcg_regset_first(set);
4332             if (s->reg_to_temp[reg] == NULL) {
4333                 return reg;
4334             }
4335         } else {
4336             for (i = 0; i < n; i++) {
4337                 TCGReg reg = order[i];
4338                 if (s->reg_to_temp[reg] == NULL &&
4339                     tcg_regset_test_reg(set, reg)) {
4340                     return reg;
4341                 }
4342             }
4343         }
4344     }
4345 
4346     /* We must spill something.  */
4347     for (j = f; j < 2; j++) {
4348         TCGRegSet set = reg_ct[j];
4349 
4350         if (tcg_regset_single(set)) {
4351             /* One register in the set.  */
4352             TCGReg reg = tcg_regset_first(set);
4353             tcg_reg_free(s, reg, allocated_regs);
4354             return reg;
4355         } else {
4356             for (i = 0; i < n; i++) {
4357                 TCGReg reg = order[i];
4358                 if (tcg_regset_test_reg(set, reg)) {
4359                     tcg_reg_free(s, reg, allocated_regs);
4360                     return reg;
4361                 }
4362             }
4363         }
4364     }
4365 
4366     g_assert_not_reached();
4367 }
4368 
4369 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4370                                  TCGRegSet allocated_regs,
4371                                  TCGRegSet preferred_regs, bool rev)
4372 {
4373     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4374     TCGRegSet reg_ct[2];
4375     const int *order;
4376 
4377     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4378     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4379     tcg_debug_assert(reg_ct[1] != 0);
4380     reg_ct[0] = reg_ct[1] & preferred_regs;
4381 
4382     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4383 
4384     /*
4385      * Skip the preferred_regs option if it cannot be satisfied,
4386      * or if the preference made no difference.
4387      */
4388     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4389 
4390     /*
4391      * Minimize the number of flushes by looking for 2 free registers first,
4392      * then a single flush, then two flushes.
4393      */
4394     for (fmin = 2; fmin >= 0; fmin--) {
4395         for (j = k; j < 2; j++) {
4396             TCGRegSet set = reg_ct[j];
4397 
4398             for (i = 0; i < n; i++) {
4399                 TCGReg reg = order[i];
4400 
4401                 if (tcg_regset_test_reg(set, reg)) {
4402                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4403                     if (f >= fmin) {
4404                         tcg_reg_free(s, reg, allocated_regs);
4405                         tcg_reg_free(s, reg + 1, allocated_regs);
4406                         return reg;
4407                     }
4408                 }
4409             }
4410         }
4411     }
4412     g_assert_not_reached();
4413 }
4414 
4415 /* Make sure the temporary is in a register.  If needed, allocate the register
4416    from DESIRED while avoiding ALLOCATED.  */
4417 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4418                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4419 {
4420     TCGReg reg;
4421 
4422     switch (ts->val_type) {
4423     case TEMP_VAL_REG:
4424         return;
4425     case TEMP_VAL_CONST:
4426         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4427                             preferred_regs, ts->indirect_base);
4428         if (ts->type <= TCG_TYPE_I64) {
4429             tcg_out_movi(s, ts->type, reg, ts->val);
4430         } else {
4431             uint64_t val = ts->val;
4432             MemOp vece = MO_64;
4433 
4434             /*
4435              * Find the minimal vector element that matches the constant.
4436              * The targets will, in general, have to do this search anyway,
4437              * do this generically.
4438              */
4439             if (val == dup_const(MO_8, val)) {
4440                 vece = MO_8;
4441             } else if (val == dup_const(MO_16, val)) {
4442                 vece = MO_16;
4443             } else if (val == dup_const(MO_32, val)) {
4444                 vece = MO_32;
4445             }
4446 
4447             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4448         }
4449         ts->mem_coherent = 0;
4450         break;
4451     case TEMP_VAL_MEM:
4452         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4453                             preferred_regs, ts->indirect_base);
4454         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4455         ts->mem_coherent = 1;
4456         break;
4457     case TEMP_VAL_DEAD:
4458     default:
4459         g_assert_not_reached();
4460     }
4461     set_temp_val_reg(s, ts, reg);
4462 }
4463 
4464 /* Save a temporary to memory. 'allocated_regs' is used in case a
4465    temporary registers needs to be allocated to store a constant.  */
4466 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4467 {
4468     /* The liveness analysis already ensures that globals are back
4469        in memory. Keep an tcg_debug_assert for safety. */
4470     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4471 }
4472 
4473 /* save globals to their canonical location and assume they can be
4474    modified be the following code. 'allocated_regs' is used in case a
4475    temporary registers needs to be allocated to store a constant. */
4476 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4477 {
4478     int i, n;
4479 
4480     for (i = 0, n = s->nb_globals; i < n; i++) {
4481         temp_save(s, &s->temps[i], allocated_regs);
4482     }
4483 }
4484 
4485 /* sync globals to their canonical location and assume they can be
4486    read by the following code. 'allocated_regs' is used in case a
4487    temporary registers needs to be allocated to store a constant. */
4488 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4489 {
4490     int i, n;
4491 
4492     for (i = 0, n = s->nb_globals; i < n; i++) {
4493         TCGTemp *ts = &s->temps[i];
4494         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4495                          || ts->kind == TEMP_FIXED
4496                          || ts->mem_coherent);
4497     }
4498 }
4499 
4500 /* at the end of a basic block, we assume all temporaries are dead and
4501    all globals are stored at their canonical location. */
4502 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4503 {
4504     int i;
4505 
4506     for (i = s->nb_globals; i < s->nb_temps; i++) {
4507         TCGTemp *ts = &s->temps[i];
4508 
4509         switch (ts->kind) {
4510         case TEMP_TB:
4511             temp_save(s, ts, allocated_regs);
4512             break;
4513         case TEMP_EBB:
4514             /* The liveness analysis already ensures that temps are dead.
4515                Keep an tcg_debug_assert for safety. */
4516             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4517             break;
4518         case TEMP_CONST:
4519             /* Similarly, we should have freed any allocated register. */
4520             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4521             break;
4522         default:
4523             g_assert_not_reached();
4524         }
4525     }
4526 
4527     save_globals(s, allocated_regs);
4528 }
4529 
4530 /*
4531  * At a conditional branch, we assume all temporaries are dead unless
4532  * explicitly live-across-conditional-branch; all globals and local
4533  * temps are synced to their location.
4534  */
4535 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4536 {
4537     sync_globals(s, allocated_regs);
4538 
4539     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4540         TCGTemp *ts = &s->temps[i];
4541         /*
4542          * The liveness analysis already ensures that temps are dead.
4543          * Keep tcg_debug_asserts for safety.
4544          */
4545         switch (ts->kind) {
4546         case TEMP_TB:
4547             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4548             break;
4549         case TEMP_EBB:
4550         case TEMP_CONST:
4551             break;
4552         default:
4553             g_assert_not_reached();
4554         }
4555     }
4556 }
4557 
4558 /*
4559  * Specialized code generation for INDEX_op_mov_* with a constant.
4560  */
4561 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4562                                   tcg_target_ulong val, TCGLifeData arg_life,
4563                                   TCGRegSet preferred_regs)
4564 {
4565     /* ENV should not be modified.  */
4566     tcg_debug_assert(!temp_readonly(ots));
4567 
4568     /* The movi is not explicitly generated here.  */
4569     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4570     ots->val = val;
4571     ots->mem_coherent = 0;
4572     if (NEED_SYNC_ARG(0)) {
4573         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4574     } else if (IS_DEAD_ARG(0)) {
4575         temp_dead(s, ots);
4576     }
4577 }
4578 
4579 /*
4580  * Specialized code generation for INDEX_op_mov_*.
4581  */
4582 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4583 {
4584     const TCGLifeData arg_life = op->life;
4585     TCGRegSet allocated_regs, preferred_regs;
4586     TCGTemp *ts, *ots;
4587     TCGType otype, itype;
4588     TCGReg oreg, ireg;
4589 
4590     allocated_regs = s->reserved_regs;
4591     preferred_regs = output_pref(op, 0);
4592     ots = arg_temp(op->args[0]);
4593     ts = arg_temp(op->args[1]);
4594 
4595     /* ENV should not be modified.  */
4596     tcg_debug_assert(!temp_readonly(ots));
4597 
4598     /* Note that otype != itype for no-op truncation.  */
4599     otype = ots->type;
4600     itype = ts->type;
4601 
4602     if (ts->val_type == TEMP_VAL_CONST) {
4603         /* propagate constant or generate sti */
4604         tcg_target_ulong val = ts->val;
4605         if (IS_DEAD_ARG(1)) {
4606             temp_dead(s, ts);
4607         }
4608         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4609         return;
4610     }
4611 
4612     /* If the source value is in memory we're going to be forced
4613        to have it in a register in order to perform the copy.  Copy
4614        the SOURCE value into its own register first, that way we
4615        don't have to reload SOURCE the next time it is used. */
4616     if (ts->val_type == TEMP_VAL_MEM) {
4617         temp_load(s, ts, tcg_target_available_regs[itype],
4618                   allocated_regs, preferred_regs);
4619     }
4620     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4621     ireg = ts->reg;
4622 
4623     if (IS_DEAD_ARG(0)) {
4624         /* mov to a non-saved dead register makes no sense (even with
4625            liveness analysis disabled). */
4626         tcg_debug_assert(NEED_SYNC_ARG(0));
4627         if (!ots->mem_allocated) {
4628             temp_allocate_frame(s, ots);
4629         }
4630         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4631         if (IS_DEAD_ARG(1)) {
4632             temp_dead(s, ts);
4633         }
4634         temp_dead(s, ots);
4635         return;
4636     }
4637 
4638     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4639         /*
4640          * The mov can be suppressed.  Kill input first, so that it
4641          * is unlinked from reg_to_temp, then set the output to the
4642          * reg that we saved from the input.
4643          */
4644         temp_dead(s, ts);
4645         oreg = ireg;
4646     } else {
4647         if (ots->val_type == TEMP_VAL_REG) {
4648             oreg = ots->reg;
4649         } else {
4650             /* Make sure to not spill the input register during allocation. */
4651             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4652                                  allocated_regs | ((TCGRegSet)1 << ireg),
4653                                  preferred_regs, ots->indirect_base);
4654         }
4655         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4656             /*
4657              * Cross register class move not supported.
4658              * Store the source register into the destination slot
4659              * and leave the destination temp as TEMP_VAL_MEM.
4660              */
4661             assert(!temp_readonly(ots));
4662             if (!ts->mem_allocated) {
4663                 temp_allocate_frame(s, ots);
4664             }
4665             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4666             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4667             ots->mem_coherent = 1;
4668             return;
4669         }
4670     }
4671     set_temp_val_reg(s, ots, oreg);
4672     ots->mem_coherent = 0;
4673 
4674     if (NEED_SYNC_ARG(0)) {
4675         temp_sync(s, ots, allocated_regs, 0, 0);
4676     }
4677 }
4678 
4679 /*
4680  * Specialized code generation for INDEX_op_dup_vec.
4681  */
4682 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4683 {
4684     const TCGLifeData arg_life = op->life;
4685     TCGRegSet dup_out_regs, dup_in_regs;
4686     TCGTemp *its, *ots;
4687     TCGType itype, vtype;
4688     unsigned vece;
4689     int lowpart_ofs;
4690     bool ok;
4691 
4692     ots = arg_temp(op->args[0]);
4693     its = arg_temp(op->args[1]);
4694 
4695     /* ENV should not be modified.  */
4696     tcg_debug_assert(!temp_readonly(ots));
4697 
4698     itype = its->type;
4699     vece = TCGOP_VECE(op);
4700     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4701 
4702     if (its->val_type == TEMP_VAL_CONST) {
4703         /* Propagate constant via movi -> dupi.  */
4704         tcg_target_ulong val = its->val;
4705         if (IS_DEAD_ARG(1)) {
4706             temp_dead(s, its);
4707         }
4708         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4709         return;
4710     }
4711 
4712     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4713     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4714 
4715     /* Allocate the output register now.  */
4716     if (ots->val_type != TEMP_VAL_REG) {
4717         TCGRegSet allocated_regs = s->reserved_regs;
4718         TCGReg oreg;
4719 
4720         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4721             /* Make sure to not spill the input register. */
4722             tcg_regset_set_reg(allocated_regs, its->reg);
4723         }
4724         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4725                              output_pref(op, 0), ots->indirect_base);
4726         set_temp_val_reg(s, ots, oreg);
4727     }
4728 
4729     switch (its->val_type) {
4730     case TEMP_VAL_REG:
4731         /*
4732          * The dup constriaints must be broad, covering all possible VECE.
4733          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4734          * to fail, indicating that extra moves are required for that case.
4735          */
4736         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4737             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4738                 goto done;
4739             }
4740             /* Try again from memory or a vector input register.  */
4741         }
4742         if (!its->mem_coherent) {
4743             /*
4744              * The input register is not synced, and so an extra store
4745              * would be required to use memory.  Attempt an integer-vector
4746              * register move first.  We do not have a TCGRegSet for this.
4747              */
4748             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4749                 break;
4750             }
4751             /* Sync the temp back to its slot and load from there.  */
4752             temp_sync(s, its, s->reserved_regs, 0, 0);
4753         }
4754         /* fall through */
4755 
4756     case TEMP_VAL_MEM:
4757         lowpart_ofs = 0;
4758         if (HOST_BIG_ENDIAN) {
4759             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4760         }
4761         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4762                              its->mem_offset + lowpart_ofs)) {
4763             goto done;
4764         }
4765         /* Load the input into the destination vector register. */
4766         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4767         break;
4768 
4769     default:
4770         g_assert_not_reached();
4771     }
4772 
4773     /* We now have a vector input register, so dup must succeed. */
4774     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4775     tcg_debug_assert(ok);
4776 
4777  done:
4778     ots->mem_coherent = 0;
4779     if (IS_DEAD_ARG(1)) {
4780         temp_dead(s, its);
4781     }
4782     if (NEED_SYNC_ARG(0)) {
4783         temp_sync(s, ots, s->reserved_regs, 0, 0);
4784     }
4785     if (IS_DEAD_ARG(0)) {
4786         temp_dead(s, ots);
4787     }
4788 }
4789 
4790 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4791 {
4792     const TCGLifeData arg_life = op->life;
4793     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4794     TCGRegSet i_allocated_regs;
4795     TCGRegSet o_allocated_regs;
4796     int i, k, nb_iargs, nb_oargs;
4797     TCGReg reg;
4798     TCGArg arg;
4799     const TCGArgConstraint *arg_ct;
4800     TCGTemp *ts;
4801     TCGArg new_args[TCG_MAX_OP_ARGS];
4802     int const_args[TCG_MAX_OP_ARGS];
4803     TCGCond op_cond;
4804 
4805     nb_oargs = def->nb_oargs;
4806     nb_iargs = def->nb_iargs;
4807 
4808     /* copy constants */
4809     memcpy(new_args + nb_oargs + nb_iargs,
4810            op->args + nb_oargs + nb_iargs,
4811            sizeof(TCGArg) * def->nb_cargs);
4812 
4813     i_allocated_regs = s->reserved_regs;
4814     o_allocated_regs = s->reserved_regs;
4815 
4816     switch (op->opc) {
4817     case INDEX_op_brcond_i32:
4818     case INDEX_op_brcond_i64:
4819         op_cond = op->args[2];
4820         break;
4821     case INDEX_op_setcond_i32:
4822     case INDEX_op_setcond_i64:
4823     case INDEX_op_negsetcond_i32:
4824     case INDEX_op_negsetcond_i64:
4825     case INDEX_op_cmp_vec:
4826         op_cond = op->args[3];
4827         break;
4828     case INDEX_op_brcond2_i32:
4829         op_cond = op->args[4];
4830         break;
4831     case INDEX_op_movcond_i32:
4832     case INDEX_op_movcond_i64:
4833     case INDEX_op_setcond2_i32:
4834     case INDEX_op_cmpsel_vec:
4835         op_cond = op->args[5];
4836         break;
4837     default:
4838         /* No condition within opcode. */
4839         op_cond = TCG_COND_ALWAYS;
4840         break;
4841     }
4842 
4843     /* satisfy input constraints */
4844     for (k = 0; k < nb_iargs; k++) {
4845         TCGRegSet i_preferred_regs, i_required_regs;
4846         bool allocate_new_reg, copyto_new_reg;
4847         TCGTemp *ts2;
4848         int i1, i2;
4849 
4850         i = def->args_ct[nb_oargs + k].sort_index;
4851         arg = op->args[i];
4852         arg_ct = &def->args_ct[i];
4853         ts = arg_temp(arg);
4854 
4855         if (ts->val_type == TEMP_VAL_CONST
4856             && tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
4857                                       op_cond, TCGOP_VECE(op))) {
4858             /* constant is OK for instruction */
4859             const_args[i] = 1;
4860             new_args[i] = ts->val;
4861             continue;
4862         }
4863 
4864         reg = ts->reg;
4865         i_preferred_regs = 0;
4866         i_required_regs = arg_ct->regs;
4867         allocate_new_reg = false;
4868         copyto_new_reg = false;
4869 
4870         switch (arg_ct->pair) {
4871         case 0: /* not paired */
4872             if (arg_ct->ialias) {
4873                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4874 
4875                 /*
4876                  * If the input is readonly, then it cannot also be an
4877                  * output and aliased to itself.  If the input is not
4878                  * dead after the instruction, we must allocate a new
4879                  * register and move it.
4880                  */
4881                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4882                     || def->args_ct[arg_ct->alias_index].newreg) {
4883                     allocate_new_reg = true;
4884                 } else if (ts->val_type == TEMP_VAL_REG) {
4885                     /*
4886                      * Check if the current register has already been
4887                      * allocated for another input.
4888                      */
4889                     allocate_new_reg =
4890                         tcg_regset_test_reg(i_allocated_regs, reg);
4891                 }
4892             }
4893             if (!allocate_new_reg) {
4894                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4895                           i_preferred_regs);
4896                 reg = ts->reg;
4897                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4898             }
4899             if (allocate_new_reg) {
4900                 /*
4901                  * Allocate a new register matching the constraint
4902                  * and move the temporary register into it.
4903                  */
4904                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4905                           i_allocated_regs, 0);
4906                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4907                                     i_preferred_regs, ts->indirect_base);
4908                 copyto_new_reg = true;
4909             }
4910             break;
4911 
4912         case 1:
4913             /* First of an input pair; if i1 == i2, the second is an output. */
4914             i1 = i;
4915             i2 = arg_ct->pair_index;
4916             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4917 
4918             /*
4919              * It is easier to default to allocating a new pair
4920              * and to identify a few cases where it's not required.
4921              */
4922             if (arg_ct->ialias) {
4923                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4924                 if (IS_DEAD_ARG(i1) &&
4925                     IS_DEAD_ARG(i2) &&
4926                     !temp_readonly(ts) &&
4927                     ts->val_type == TEMP_VAL_REG &&
4928                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4929                     tcg_regset_test_reg(i_required_regs, reg) &&
4930                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4931                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4932                     (ts2
4933                      ? ts2->val_type == TEMP_VAL_REG &&
4934                        ts2->reg == reg + 1 &&
4935                        !temp_readonly(ts2)
4936                      : s->reg_to_temp[reg + 1] == NULL)) {
4937                     break;
4938                 }
4939             } else {
4940                 /* Without aliasing, the pair must also be an input. */
4941                 tcg_debug_assert(ts2);
4942                 if (ts->val_type == TEMP_VAL_REG &&
4943                     ts2->val_type == TEMP_VAL_REG &&
4944                     ts2->reg == reg + 1 &&
4945                     tcg_regset_test_reg(i_required_regs, reg)) {
4946                     break;
4947                 }
4948             }
4949             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4950                                      0, ts->indirect_base);
4951             goto do_pair;
4952 
4953         case 2: /* pair second */
4954             reg = new_args[arg_ct->pair_index] + 1;
4955             goto do_pair;
4956 
4957         case 3: /* ialias with second output, no first input */
4958             tcg_debug_assert(arg_ct->ialias);
4959             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4960 
4961             if (IS_DEAD_ARG(i) &&
4962                 !temp_readonly(ts) &&
4963                 ts->val_type == TEMP_VAL_REG &&
4964                 reg > 0 &&
4965                 s->reg_to_temp[reg - 1] == NULL &&
4966                 tcg_regset_test_reg(i_required_regs, reg) &&
4967                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4968                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4969                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4970                 break;
4971             }
4972             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4973                                      i_allocated_regs, 0,
4974                                      ts->indirect_base);
4975             tcg_regset_set_reg(i_allocated_regs, reg);
4976             reg += 1;
4977             goto do_pair;
4978 
4979         do_pair:
4980             /*
4981              * If an aliased input is not dead after the instruction,
4982              * we must allocate a new register and move it.
4983              */
4984             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4985                 TCGRegSet t_allocated_regs = i_allocated_regs;
4986 
4987                 /*
4988                  * Because of the alias, and the continued life, make sure
4989                  * that the temp is somewhere *other* than the reg pair,
4990                  * and we get a copy in reg.
4991                  */
4992                 tcg_regset_set_reg(t_allocated_regs, reg);
4993                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4994                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4995                     /* If ts was already in reg, copy it somewhere else. */
4996                     TCGReg nr;
4997                     bool ok;
4998 
4999                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5000                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5001                                        t_allocated_regs, 0, ts->indirect_base);
5002                     ok = tcg_out_mov(s, ts->type, nr, reg);
5003                     tcg_debug_assert(ok);
5004 
5005                     set_temp_val_reg(s, ts, nr);
5006                 } else {
5007                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5008                               t_allocated_regs, 0);
5009                     copyto_new_reg = true;
5010                 }
5011             } else {
5012                 /* Preferably allocate to reg, otherwise copy. */
5013                 i_required_regs = (TCGRegSet)1 << reg;
5014                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5015                           i_preferred_regs);
5016                 copyto_new_reg = ts->reg != reg;
5017             }
5018             break;
5019 
5020         default:
5021             g_assert_not_reached();
5022         }
5023 
5024         if (copyto_new_reg) {
5025             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5026                 /*
5027                  * Cross register class move not supported.  Sync the
5028                  * temp back to its slot and load from there.
5029                  */
5030                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5031                 tcg_out_ld(s, ts->type, reg,
5032                            ts->mem_base->reg, ts->mem_offset);
5033             }
5034         }
5035         new_args[i] = reg;
5036         const_args[i] = 0;
5037         tcg_regset_set_reg(i_allocated_regs, reg);
5038     }
5039 
5040     /* mark dead temporaries and free the associated registers */
5041     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5042         if (IS_DEAD_ARG(i)) {
5043             temp_dead(s, arg_temp(op->args[i]));
5044         }
5045     }
5046 
5047     if (def->flags & TCG_OPF_COND_BRANCH) {
5048         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5049     } else if (def->flags & TCG_OPF_BB_END) {
5050         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5051     } else {
5052         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5053             /* XXX: permit generic clobber register list ? */
5054             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5055                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5056                     tcg_reg_free(s, i, i_allocated_regs);
5057                 }
5058             }
5059         }
5060         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5061             /* sync globals if the op has side effects and might trigger
5062                an exception. */
5063             sync_globals(s, i_allocated_regs);
5064         }
5065 
5066         /* satisfy the output constraints */
5067         for(k = 0; k < nb_oargs; k++) {
5068             i = def->args_ct[k].sort_index;
5069             arg = op->args[i];
5070             arg_ct = &def->args_ct[i];
5071             ts = arg_temp(arg);
5072 
5073             /* ENV should not be modified.  */
5074             tcg_debug_assert(!temp_readonly(ts));
5075 
5076             switch (arg_ct->pair) {
5077             case 0: /* not paired */
5078                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5079                     reg = new_args[arg_ct->alias_index];
5080                 } else if (arg_ct->newreg) {
5081                     reg = tcg_reg_alloc(s, arg_ct->regs,
5082                                         i_allocated_regs | o_allocated_regs,
5083                                         output_pref(op, k), ts->indirect_base);
5084                 } else {
5085                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5086                                         output_pref(op, k), ts->indirect_base);
5087                 }
5088                 break;
5089 
5090             case 1: /* first of pair */
5091                 if (arg_ct->oalias) {
5092                     reg = new_args[arg_ct->alias_index];
5093                 } else if (arg_ct->newreg) {
5094                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5095                                              i_allocated_regs | o_allocated_regs,
5096                                              output_pref(op, k),
5097                                              ts->indirect_base);
5098                 } else {
5099                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5100                                              output_pref(op, k),
5101                                              ts->indirect_base);
5102                 }
5103                 break;
5104 
5105             case 2: /* second of pair */
5106                 if (arg_ct->oalias) {
5107                     reg = new_args[arg_ct->alias_index];
5108                 } else {
5109                     reg = new_args[arg_ct->pair_index] + 1;
5110                 }
5111                 break;
5112 
5113             case 3: /* first of pair, aliasing with a second input */
5114                 tcg_debug_assert(!arg_ct->newreg);
5115                 reg = new_args[arg_ct->pair_index] - 1;
5116                 break;
5117 
5118             default:
5119                 g_assert_not_reached();
5120             }
5121             tcg_regset_set_reg(o_allocated_regs, reg);
5122             set_temp_val_reg(s, ts, reg);
5123             ts->mem_coherent = 0;
5124             new_args[i] = reg;
5125         }
5126     }
5127 
5128     /* emit instruction */
5129     switch (op->opc) {
5130     case INDEX_op_ext8s_i32:
5131         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5132         break;
5133     case INDEX_op_ext8s_i64:
5134         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5135         break;
5136     case INDEX_op_ext8u_i32:
5137     case INDEX_op_ext8u_i64:
5138         tcg_out_ext8u(s, new_args[0], new_args[1]);
5139         break;
5140     case INDEX_op_ext16s_i32:
5141         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5142         break;
5143     case INDEX_op_ext16s_i64:
5144         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5145         break;
5146     case INDEX_op_ext16u_i32:
5147     case INDEX_op_ext16u_i64:
5148         tcg_out_ext16u(s, new_args[0], new_args[1]);
5149         break;
5150     case INDEX_op_ext32s_i64:
5151         tcg_out_ext32s(s, new_args[0], new_args[1]);
5152         break;
5153     case INDEX_op_ext32u_i64:
5154         tcg_out_ext32u(s, new_args[0], new_args[1]);
5155         break;
5156     case INDEX_op_ext_i32_i64:
5157         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5158         break;
5159     case INDEX_op_extu_i32_i64:
5160         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5161         break;
5162     case INDEX_op_extrl_i64_i32:
5163         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5164         break;
5165     default:
5166         if (def->flags & TCG_OPF_VECTOR) {
5167             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
5168                            new_args, const_args);
5169         } else {
5170             tcg_out_op(s, op->opc, new_args, const_args);
5171         }
5172         break;
5173     }
5174 
5175     /* move the outputs in the correct register if needed */
5176     for(i = 0; i < nb_oargs; i++) {
5177         ts = arg_temp(op->args[i]);
5178 
5179         /* ENV should not be modified.  */
5180         tcg_debug_assert(!temp_readonly(ts));
5181 
5182         if (NEED_SYNC_ARG(i)) {
5183             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5184         } else if (IS_DEAD_ARG(i)) {
5185             temp_dead(s, ts);
5186         }
5187     }
5188 }
5189 
5190 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5191 {
5192     const TCGLifeData arg_life = op->life;
5193     TCGTemp *ots, *itsl, *itsh;
5194     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5195 
5196     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5197     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5198     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5199 
5200     ots = arg_temp(op->args[0]);
5201     itsl = arg_temp(op->args[1]);
5202     itsh = arg_temp(op->args[2]);
5203 
5204     /* ENV should not be modified.  */
5205     tcg_debug_assert(!temp_readonly(ots));
5206 
5207     /* Allocate the output register now.  */
5208     if (ots->val_type != TEMP_VAL_REG) {
5209         TCGRegSet allocated_regs = s->reserved_regs;
5210         TCGRegSet dup_out_regs =
5211             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5212         TCGReg oreg;
5213 
5214         /* Make sure to not spill the input registers. */
5215         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5216             tcg_regset_set_reg(allocated_regs, itsl->reg);
5217         }
5218         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5219             tcg_regset_set_reg(allocated_regs, itsh->reg);
5220         }
5221 
5222         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5223                              output_pref(op, 0), ots->indirect_base);
5224         set_temp_val_reg(s, ots, oreg);
5225     }
5226 
5227     /* Promote dup2 of immediates to dupi_vec. */
5228     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5229         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5230         MemOp vece = MO_64;
5231 
5232         if (val == dup_const(MO_8, val)) {
5233             vece = MO_8;
5234         } else if (val == dup_const(MO_16, val)) {
5235             vece = MO_16;
5236         } else if (val == dup_const(MO_32, val)) {
5237             vece = MO_32;
5238         }
5239 
5240         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5241         goto done;
5242     }
5243 
5244     /* If the two inputs form one 64-bit value, try dupm_vec. */
5245     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5246         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5247         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5248         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5249 
5250         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5251         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5252 
5253         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5254                              its->mem_base->reg, its->mem_offset)) {
5255             goto done;
5256         }
5257     }
5258 
5259     /* Fall back to generic expansion. */
5260     return false;
5261 
5262  done:
5263     ots->mem_coherent = 0;
5264     if (IS_DEAD_ARG(1)) {
5265         temp_dead(s, itsl);
5266     }
5267     if (IS_DEAD_ARG(2)) {
5268         temp_dead(s, itsh);
5269     }
5270     if (NEED_SYNC_ARG(0)) {
5271         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5272     } else if (IS_DEAD_ARG(0)) {
5273         temp_dead(s, ots);
5274     }
5275     return true;
5276 }
5277 
5278 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5279                          TCGRegSet allocated_regs)
5280 {
5281     if (ts->val_type == TEMP_VAL_REG) {
5282         if (ts->reg != reg) {
5283             tcg_reg_free(s, reg, allocated_regs);
5284             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5285                 /*
5286                  * Cross register class move not supported.  Sync the
5287                  * temp back to its slot and load from there.
5288                  */
5289                 temp_sync(s, ts, allocated_regs, 0, 0);
5290                 tcg_out_ld(s, ts->type, reg,
5291                            ts->mem_base->reg, ts->mem_offset);
5292             }
5293         }
5294     } else {
5295         TCGRegSet arg_set = 0;
5296 
5297         tcg_reg_free(s, reg, allocated_regs);
5298         tcg_regset_set_reg(arg_set, reg);
5299         temp_load(s, ts, arg_set, allocated_regs, 0);
5300     }
5301 }
5302 
5303 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5304                          TCGRegSet allocated_regs)
5305 {
5306     /*
5307      * When the destination is on the stack, load up the temp and store.
5308      * If there are many call-saved registers, the temp might live to
5309      * see another use; otherwise it'll be discarded.
5310      */
5311     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5312     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5313                arg_slot_stk_ofs(arg_slot));
5314 }
5315 
5316 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5317                             TCGTemp *ts, TCGRegSet *allocated_regs)
5318 {
5319     if (arg_slot_reg_p(l->arg_slot)) {
5320         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5321         load_arg_reg(s, reg, ts, *allocated_regs);
5322         tcg_regset_set_reg(*allocated_regs, reg);
5323     } else {
5324         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5325     }
5326 }
5327 
5328 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5329                          intptr_t ref_off, TCGRegSet *allocated_regs)
5330 {
5331     TCGReg reg;
5332 
5333     if (arg_slot_reg_p(arg_slot)) {
5334         reg = tcg_target_call_iarg_regs[arg_slot];
5335         tcg_reg_free(s, reg, *allocated_regs);
5336         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5337         tcg_regset_set_reg(*allocated_regs, reg);
5338     } else {
5339         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5340                             *allocated_regs, 0, false);
5341         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5342         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5343                    arg_slot_stk_ofs(arg_slot));
5344     }
5345 }
5346 
5347 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5348 {
5349     const int nb_oargs = TCGOP_CALLO(op);
5350     const int nb_iargs = TCGOP_CALLI(op);
5351     const TCGLifeData arg_life = op->life;
5352     const TCGHelperInfo *info = tcg_call_info(op);
5353     TCGRegSet allocated_regs = s->reserved_regs;
5354     int i;
5355 
5356     /*
5357      * Move inputs into place in reverse order,
5358      * so that we place stacked arguments first.
5359      */
5360     for (i = nb_iargs - 1; i >= 0; --i) {
5361         const TCGCallArgumentLoc *loc = &info->in[i];
5362         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5363 
5364         switch (loc->kind) {
5365         case TCG_CALL_ARG_NORMAL:
5366         case TCG_CALL_ARG_EXTEND_U:
5367         case TCG_CALL_ARG_EXTEND_S:
5368             load_arg_normal(s, loc, ts, &allocated_regs);
5369             break;
5370         case TCG_CALL_ARG_BY_REF:
5371             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5372             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5373                          arg_slot_stk_ofs(loc->ref_slot),
5374                          &allocated_regs);
5375             break;
5376         case TCG_CALL_ARG_BY_REF_N:
5377             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5378             break;
5379         default:
5380             g_assert_not_reached();
5381         }
5382     }
5383 
5384     /* Mark dead temporaries and free the associated registers.  */
5385     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5386         if (IS_DEAD_ARG(i)) {
5387             temp_dead(s, arg_temp(op->args[i]));
5388         }
5389     }
5390 
5391     /* Clobber call registers.  */
5392     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5393         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5394             tcg_reg_free(s, i, allocated_regs);
5395         }
5396     }
5397 
5398     /*
5399      * Save globals if they might be written by the helper,
5400      * sync them if they might be read.
5401      */
5402     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5403         /* Nothing to do */
5404     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5405         sync_globals(s, allocated_regs);
5406     } else {
5407         save_globals(s, allocated_regs);
5408     }
5409 
5410     /*
5411      * If the ABI passes a pointer to the returned struct as the first
5412      * argument, load that now.  Pass a pointer to the output home slot.
5413      */
5414     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5415         TCGTemp *ts = arg_temp(op->args[0]);
5416 
5417         if (!ts->mem_allocated) {
5418             temp_allocate_frame(s, ts);
5419         }
5420         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5421     }
5422 
5423     tcg_out_call(s, tcg_call_func(op), info);
5424 
5425     /* Assign output registers and emit moves if needed.  */
5426     switch (info->out_kind) {
5427     case TCG_CALL_RET_NORMAL:
5428         for (i = 0; i < nb_oargs; i++) {
5429             TCGTemp *ts = arg_temp(op->args[i]);
5430             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5431 
5432             /* ENV should not be modified.  */
5433             tcg_debug_assert(!temp_readonly(ts));
5434 
5435             set_temp_val_reg(s, ts, reg);
5436             ts->mem_coherent = 0;
5437         }
5438         break;
5439 
5440     case TCG_CALL_RET_BY_VEC:
5441         {
5442             TCGTemp *ts = arg_temp(op->args[0]);
5443 
5444             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5445             tcg_debug_assert(ts->temp_subindex == 0);
5446             if (!ts->mem_allocated) {
5447                 temp_allocate_frame(s, ts);
5448             }
5449             tcg_out_st(s, TCG_TYPE_V128,
5450                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5451                        ts->mem_base->reg, ts->mem_offset);
5452         }
5453         /* fall through to mark all parts in memory */
5454 
5455     case TCG_CALL_RET_BY_REF:
5456         /* The callee has performed a write through the reference. */
5457         for (i = 0; i < nb_oargs; i++) {
5458             TCGTemp *ts = arg_temp(op->args[i]);
5459             ts->val_type = TEMP_VAL_MEM;
5460         }
5461         break;
5462 
5463     default:
5464         g_assert_not_reached();
5465     }
5466 
5467     /* Flush or discard output registers as needed. */
5468     for (i = 0; i < nb_oargs; i++) {
5469         TCGTemp *ts = arg_temp(op->args[i]);
5470         if (NEED_SYNC_ARG(i)) {
5471             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5472         } else if (IS_DEAD_ARG(i)) {
5473             temp_dead(s, ts);
5474         }
5475     }
5476 }
5477 
5478 /**
5479  * atom_and_align_for_opc:
5480  * @s: tcg context
5481  * @opc: memory operation code
5482  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5483  * @allow_two_ops: true if we are prepared to issue two operations
5484  *
5485  * Return the alignment and atomicity to use for the inline fast path
5486  * for the given memory operation.  The alignment may be larger than
5487  * that specified in @opc, and the correct alignment will be diagnosed
5488  * by the slow path helper.
5489  *
5490  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5491  * and issue two loads or stores for subalignment.
5492  */
5493 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5494                                            MemOp host_atom, bool allow_two_ops)
5495 {
5496     MemOp align = get_alignment_bits(opc);
5497     MemOp size = opc & MO_SIZE;
5498     MemOp half = size ? size - 1 : 0;
5499     MemOp atom = opc & MO_ATOM_MASK;
5500     MemOp atmax;
5501 
5502     switch (atom) {
5503     case MO_ATOM_NONE:
5504         /* The operation requires no specific atomicity. */
5505         atmax = MO_8;
5506         break;
5507 
5508     case MO_ATOM_IFALIGN:
5509         atmax = size;
5510         break;
5511 
5512     case MO_ATOM_IFALIGN_PAIR:
5513         atmax = half;
5514         break;
5515 
5516     case MO_ATOM_WITHIN16:
5517         atmax = size;
5518         if (size == MO_128) {
5519             /* Misalignment implies !within16, and therefore no atomicity. */
5520         } else if (host_atom != MO_ATOM_WITHIN16) {
5521             /* The host does not implement within16, so require alignment. */
5522             align = MAX(align, size);
5523         }
5524         break;
5525 
5526     case MO_ATOM_WITHIN16_PAIR:
5527         atmax = size;
5528         /*
5529          * Misalignment implies !within16, and therefore half atomicity.
5530          * Any host prepared for two operations can implement this with
5531          * half alignment.
5532          */
5533         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5534             align = MAX(align, half);
5535         }
5536         break;
5537 
5538     case MO_ATOM_SUBALIGN:
5539         atmax = size;
5540         if (host_atom != MO_ATOM_SUBALIGN) {
5541             /* If unaligned but not odd, there are subobjects up to half. */
5542             if (allow_two_ops) {
5543                 align = MAX(align, half);
5544             } else {
5545                 align = MAX(align, size);
5546             }
5547         }
5548         break;
5549 
5550     default:
5551         g_assert_not_reached();
5552     }
5553 
5554     return (TCGAtomAlign){ .atom = atmax, .align = align };
5555 }
5556 
5557 /*
5558  * Similarly for qemu_ld/st slow path helpers.
5559  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5560  * using only the provided backend tcg_out_* functions.
5561  */
5562 
5563 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5564 {
5565     int ofs = arg_slot_stk_ofs(slot);
5566 
5567     /*
5568      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5569      * require extension to uint64_t, adjust the address for uint32_t.
5570      */
5571     if (HOST_BIG_ENDIAN &&
5572         TCG_TARGET_REG_BITS == 64 &&
5573         type == TCG_TYPE_I32) {
5574         ofs += 4;
5575     }
5576     return ofs;
5577 }
5578 
5579 static void tcg_out_helper_load_slots(TCGContext *s,
5580                                       unsigned nmov, TCGMovExtend *mov,
5581                                       const TCGLdstHelperParam *parm)
5582 {
5583     unsigned i;
5584     TCGReg dst3;
5585 
5586     /*
5587      * Start from the end, storing to the stack first.
5588      * This frees those registers, so we need not consider overlap.
5589      */
5590     for (i = nmov; i-- > 0; ) {
5591         unsigned slot = mov[i].dst;
5592 
5593         if (arg_slot_reg_p(slot)) {
5594             goto found_reg;
5595         }
5596 
5597         TCGReg src = mov[i].src;
5598         TCGType dst_type = mov[i].dst_type;
5599         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5600 
5601         /* The argument is going onto the stack; extend into scratch. */
5602         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5603             tcg_debug_assert(parm->ntmp != 0);
5604             mov[i].dst = src = parm->tmp[0];
5605             tcg_out_movext1(s, &mov[i]);
5606         }
5607 
5608         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5609                    tcg_out_helper_stk_ofs(dst_type, slot));
5610     }
5611     return;
5612 
5613  found_reg:
5614     /*
5615      * The remaining arguments are in registers.
5616      * Convert slot numbers to argument registers.
5617      */
5618     nmov = i + 1;
5619     for (i = 0; i < nmov; ++i) {
5620         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5621     }
5622 
5623     switch (nmov) {
5624     case 4:
5625         /* The backend must have provided enough temps for the worst case. */
5626         tcg_debug_assert(parm->ntmp >= 2);
5627 
5628         dst3 = mov[3].dst;
5629         for (unsigned j = 0; j < 3; ++j) {
5630             if (dst3 == mov[j].src) {
5631                 /*
5632                  * Conflict. Copy the source to a temporary, perform the
5633                  * remaining moves, then the extension from our scratch
5634                  * on the way out.
5635                  */
5636                 TCGReg scratch = parm->tmp[1];
5637 
5638                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5639                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5640                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5641                 break;
5642             }
5643         }
5644 
5645         /* No conflicts: perform this move and continue. */
5646         tcg_out_movext1(s, &mov[3]);
5647         /* fall through */
5648 
5649     case 3:
5650         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5651                         parm->ntmp ? parm->tmp[0] : -1);
5652         break;
5653     case 2:
5654         tcg_out_movext2(s, mov, mov + 1,
5655                         parm->ntmp ? parm->tmp[0] : -1);
5656         break;
5657     case 1:
5658         tcg_out_movext1(s, mov);
5659         break;
5660     default:
5661         g_assert_not_reached();
5662     }
5663 }
5664 
5665 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5666                                     TCGType type, tcg_target_long imm,
5667                                     const TCGLdstHelperParam *parm)
5668 {
5669     if (arg_slot_reg_p(slot)) {
5670         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5671     } else {
5672         int ofs = tcg_out_helper_stk_ofs(type, slot);
5673         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5674             tcg_debug_assert(parm->ntmp != 0);
5675             tcg_out_movi(s, type, parm->tmp[0], imm);
5676             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5677         }
5678     }
5679 }
5680 
5681 static void tcg_out_helper_load_common_args(TCGContext *s,
5682                                             const TCGLabelQemuLdst *ldst,
5683                                             const TCGLdstHelperParam *parm,
5684                                             const TCGHelperInfo *info,
5685                                             unsigned next_arg)
5686 {
5687     TCGMovExtend ptr_mov = {
5688         .dst_type = TCG_TYPE_PTR,
5689         .src_type = TCG_TYPE_PTR,
5690         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5691     };
5692     const TCGCallArgumentLoc *loc = &info->in[0];
5693     TCGType type;
5694     unsigned slot;
5695     tcg_target_ulong imm;
5696 
5697     /*
5698      * Handle env, which is always first.
5699      */
5700     ptr_mov.dst = loc->arg_slot;
5701     ptr_mov.src = TCG_AREG0;
5702     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5703 
5704     /*
5705      * Handle oi.
5706      */
5707     imm = ldst->oi;
5708     loc = &info->in[next_arg];
5709     type = TCG_TYPE_I32;
5710     switch (loc->kind) {
5711     case TCG_CALL_ARG_NORMAL:
5712         break;
5713     case TCG_CALL_ARG_EXTEND_U:
5714     case TCG_CALL_ARG_EXTEND_S:
5715         /* No extension required for MemOpIdx. */
5716         tcg_debug_assert(imm <= INT32_MAX);
5717         type = TCG_TYPE_REG;
5718         break;
5719     default:
5720         g_assert_not_reached();
5721     }
5722     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5723     next_arg++;
5724 
5725     /*
5726      * Handle ra.
5727      */
5728     loc = &info->in[next_arg];
5729     slot = loc->arg_slot;
5730     if (parm->ra_gen) {
5731         int arg_reg = -1;
5732         TCGReg ra_reg;
5733 
5734         if (arg_slot_reg_p(slot)) {
5735             arg_reg = tcg_target_call_iarg_regs[slot];
5736         }
5737         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5738 
5739         ptr_mov.dst = slot;
5740         ptr_mov.src = ra_reg;
5741         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5742     } else {
5743         imm = (uintptr_t)ldst->raddr;
5744         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5745     }
5746 }
5747 
5748 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5749                                        const TCGCallArgumentLoc *loc,
5750                                        TCGType dst_type, TCGType src_type,
5751                                        TCGReg lo, TCGReg hi)
5752 {
5753     MemOp reg_mo;
5754 
5755     if (dst_type <= TCG_TYPE_REG) {
5756         MemOp src_ext;
5757 
5758         switch (loc->kind) {
5759         case TCG_CALL_ARG_NORMAL:
5760             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5761             break;
5762         case TCG_CALL_ARG_EXTEND_U:
5763             dst_type = TCG_TYPE_REG;
5764             src_ext = MO_UL;
5765             break;
5766         case TCG_CALL_ARG_EXTEND_S:
5767             dst_type = TCG_TYPE_REG;
5768             src_ext = MO_SL;
5769             break;
5770         default:
5771             g_assert_not_reached();
5772         }
5773 
5774         mov[0].dst = loc->arg_slot;
5775         mov[0].dst_type = dst_type;
5776         mov[0].src = lo;
5777         mov[0].src_type = src_type;
5778         mov[0].src_ext = src_ext;
5779         return 1;
5780     }
5781 
5782     if (TCG_TARGET_REG_BITS == 32) {
5783         assert(dst_type == TCG_TYPE_I64);
5784         reg_mo = MO_32;
5785     } else {
5786         assert(dst_type == TCG_TYPE_I128);
5787         reg_mo = MO_64;
5788     }
5789 
5790     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5791     mov[0].src = lo;
5792     mov[0].dst_type = TCG_TYPE_REG;
5793     mov[0].src_type = TCG_TYPE_REG;
5794     mov[0].src_ext = reg_mo;
5795 
5796     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5797     mov[1].src = hi;
5798     mov[1].dst_type = TCG_TYPE_REG;
5799     mov[1].src_type = TCG_TYPE_REG;
5800     mov[1].src_ext = reg_mo;
5801 
5802     return 2;
5803 }
5804 
5805 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5806                                    const TCGLdstHelperParam *parm)
5807 {
5808     const TCGHelperInfo *info;
5809     const TCGCallArgumentLoc *loc;
5810     TCGMovExtend mov[2];
5811     unsigned next_arg, nmov;
5812     MemOp mop = get_memop(ldst->oi);
5813 
5814     switch (mop & MO_SIZE) {
5815     case MO_8:
5816     case MO_16:
5817     case MO_32:
5818         info = &info_helper_ld32_mmu;
5819         break;
5820     case MO_64:
5821         info = &info_helper_ld64_mmu;
5822         break;
5823     case MO_128:
5824         info = &info_helper_ld128_mmu;
5825         break;
5826     default:
5827         g_assert_not_reached();
5828     }
5829 
5830     /* Defer env argument. */
5831     next_arg = 1;
5832 
5833     loc = &info->in[next_arg];
5834     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5835         /*
5836          * 32-bit host with 32-bit guest: zero-extend the guest address
5837          * to 64-bits for the helper by storing the low part, then
5838          * load a zero for the high part.
5839          */
5840         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5841                                TCG_TYPE_I32, TCG_TYPE_I32,
5842                                ldst->addrlo_reg, -1);
5843         tcg_out_helper_load_slots(s, 1, mov, parm);
5844 
5845         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5846                                 TCG_TYPE_I32, 0, parm);
5847         next_arg += 2;
5848     } else {
5849         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5850                                       ldst->addrlo_reg, ldst->addrhi_reg);
5851         tcg_out_helper_load_slots(s, nmov, mov, parm);
5852         next_arg += nmov;
5853     }
5854 
5855     switch (info->out_kind) {
5856     case TCG_CALL_RET_NORMAL:
5857     case TCG_CALL_RET_BY_VEC:
5858         break;
5859     case TCG_CALL_RET_BY_REF:
5860         /*
5861          * The return reference is in the first argument slot.
5862          * We need memory in which to return: re-use the top of stack.
5863          */
5864         {
5865             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5866 
5867             if (arg_slot_reg_p(0)) {
5868                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5869                                  TCG_REG_CALL_STACK, ofs_slot0);
5870             } else {
5871                 tcg_debug_assert(parm->ntmp != 0);
5872                 tcg_out_addi_ptr(s, parm->tmp[0],
5873                                  TCG_REG_CALL_STACK, ofs_slot0);
5874                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5875                            TCG_REG_CALL_STACK, ofs_slot0);
5876             }
5877         }
5878         break;
5879     default:
5880         g_assert_not_reached();
5881     }
5882 
5883     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5884 }
5885 
5886 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5887                                   bool load_sign,
5888                                   const TCGLdstHelperParam *parm)
5889 {
5890     MemOp mop = get_memop(ldst->oi);
5891     TCGMovExtend mov[2];
5892     int ofs_slot0;
5893 
5894     switch (ldst->type) {
5895     case TCG_TYPE_I64:
5896         if (TCG_TARGET_REG_BITS == 32) {
5897             break;
5898         }
5899         /* fall through */
5900 
5901     case TCG_TYPE_I32:
5902         mov[0].dst = ldst->datalo_reg;
5903         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5904         mov[0].dst_type = ldst->type;
5905         mov[0].src_type = TCG_TYPE_REG;
5906 
5907         /*
5908          * If load_sign, then we allowed the helper to perform the
5909          * appropriate sign extension to tcg_target_ulong, and all
5910          * we need now is a plain move.
5911          *
5912          * If they do not, then we expect the relevant extension
5913          * instruction to be no more expensive than a move, and
5914          * we thus save the icache etc by only using one of two
5915          * helper functions.
5916          */
5917         if (load_sign || !(mop & MO_SIGN)) {
5918             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5919                 mov[0].src_ext = MO_32;
5920             } else {
5921                 mov[0].src_ext = MO_64;
5922             }
5923         } else {
5924             mov[0].src_ext = mop & MO_SSIZE;
5925         }
5926         tcg_out_movext1(s, mov);
5927         return;
5928 
5929     case TCG_TYPE_I128:
5930         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5931         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5932         switch (TCG_TARGET_CALL_RET_I128) {
5933         case TCG_CALL_RET_NORMAL:
5934             break;
5935         case TCG_CALL_RET_BY_VEC:
5936             tcg_out_st(s, TCG_TYPE_V128,
5937                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5938                        TCG_REG_CALL_STACK, ofs_slot0);
5939             /* fall through */
5940         case TCG_CALL_RET_BY_REF:
5941             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5942                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5943             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5944                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5945             return;
5946         default:
5947             g_assert_not_reached();
5948         }
5949         break;
5950 
5951     default:
5952         g_assert_not_reached();
5953     }
5954 
5955     mov[0].dst = ldst->datalo_reg;
5956     mov[0].src =
5957         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5958     mov[0].dst_type = TCG_TYPE_REG;
5959     mov[0].src_type = TCG_TYPE_REG;
5960     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5961 
5962     mov[1].dst = ldst->datahi_reg;
5963     mov[1].src =
5964         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5965     mov[1].dst_type = TCG_TYPE_REG;
5966     mov[1].src_type = TCG_TYPE_REG;
5967     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5968 
5969     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5970 }
5971 
5972 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5973                                    const TCGLdstHelperParam *parm)
5974 {
5975     const TCGHelperInfo *info;
5976     const TCGCallArgumentLoc *loc;
5977     TCGMovExtend mov[4];
5978     TCGType data_type;
5979     unsigned next_arg, nmov, n;
5980     MemOp mop = get_memop(ldst->oi);
5981 
5982     switch (mop & MO_SIZE) {
5983     case MO_8:
5984     case MO_16:
5985     case MO_32:
5986         info = &info_helper_st32_mmu;
5987         data_type = TCG_TYPE_I32;
5988         break;
5989     case MO_64:
5990         info = &info_helper_st64_mmu;
5991         data_type = TCG_TYPE_I64;
5992         break;
5993     case MO_128:
5994         info = &info_helper_st128_mmu;
5995         data_type = TCG_TYPE_I128;
5996         break;
5997     default:
5998         g_assert_not_reached();
5999     }
6000 
6001     /* Defer env argument. */
6002     next_arg = 1;
6003     nmov = 0;
6004 
6005     /* Handle addr argument. */
6006     loc = &info->in[next_arg];
6007     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6008         /*
6009          * 32-bit host with 32-bit guest: zero-extend the guest address
6010          * to 64-bits for the helper by storing the low part.  Later,
6011          * after we have processed the register inputs, we will load a
6012          * zero for the high part.
6013          */
6014         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6015                                TCG_TYPE_I32, TCG_TYPE_I32,
6016                                ldst->addrlo_reg, -1);
6017         next_arg += 2;
6018         nmov += 1;
6019     } else {
6020         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6021                                    ldst->addrlo_reg, ldst->addrhi_reg);
6022         next_arg += n;
6023         nmov += n;
6024     }
6025 
6026     /* Handle data argument. */
6027     loc = &info->in[next_arg];
6028     switch (loc->kind) {
6029     case TCG_CALL_ARG_NORMAL:
6030     case TCG_CALL_ARG_EXTEND_U:
6031     case TCG_CALL_ARG_EXTEND_S:
6032         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6033                                    ldst->datalo_reg, ldst->datahi_reg);
6034         next_arg += n;
6035         nmov += n;
6036         tcg_out_helper_load_slots(s, nmov, mov, parm);
6037         break;
6038 
6039     case TCG_CALL_ARG_BY_REF:
6040         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6041         tcg_debug_assert(data_type == TCG_TYPE_I128);
6042         tcg_out_st(s, TCG_TYPE_I64,
6043                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6044                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6045         tcg_out_st(s, TCG_TYPE_I64,
6046                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6047                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6048 
6049         tcg_out_helper_load_slots(s, nmov, mov, parm);
6050 
6051         if (arg_slot_reg_p(loc->arg_slot)) {
6052             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6053                              TCG_REG_CALL_STACK,
6054                              arg_slot_stk_ofs(loc->ref_slot));
6055         } else {
6056             tcg_debug_assert(parm->ntmp != 0);
6057             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6058                              arg_slot_stk_ofs(loc->ref_slot));
6059             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6060                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6061         }
6062         next_arg += 2;
6063         break;
6064 
6065     default:
6066         g_assert_not_reached();
6067     }
6068 
6069     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6070         /* Zero extend the address by loading a zero for the high part. */
6071         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6072         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6073     }
6074 
6075     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6076 }
6077 
6078 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6079 {
6080     int i, start_words, num_insns;
6081     TCGOp *op;
6082 
6083     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6084                  && qemu_log_in_addr_range(pc_start))) {
6085         FILE *logfile = qemu_log_trylock();
6086         if (logfile) {
6087             fprintf(logfile, "OP:\n");
6088             tcg_dump_ops(s, logfile, false);
6089             fprintf(logfile, "\n");
6090             qemu_log_unlock(logfile);
6091         }
6092     }
6093 
6094 #ifdef CONFIG_DEBUG_TCG
6095     /* Ensure all labels referenced have been emitted.  */
6096     {
6097         TCGLabel *l;
6098         bool error = false;
6099 
6100         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6101             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6102                 qemu_log_mask(CPU_LOG_TB_OP,
6103                               "$L%d referenced but not present.\n", l->id);
6104                 error = true;
6105             }
6106         }
6107         assert(!error);
6108     }
6109 #endif
6110 
6111     tcg_optimize(s);
6112 
6113     reachable_code_pass(s);
6114     liveness_pass_0(s);
6115     liveness_pass_1(s);
6116 
6117     if (s->nb_indirects > 0) {
6118         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6119                      && qemu_log_in_addr_range(pc_start))) {
6120             FILE *logfile = qemu_log_trylock();
6121             if (logfile) {
6122                 fprintf(logfile, "OP before indirect lowering:\n");
6123                 tcg_dump_ops(s, logfile, false);
6124                 fprintf(logfile, "\n");
6125                 qemu_log_unlock(logfile);
6126             }
6127         }
6128 
6129         /* Replace indirect temps with direct temps.  */
6130         if (liveness_pass_2(s)) {
6131             /* If changes were made, re-run liveness.  */
6132             liveness_pass_1(s);
6133         }
6134     }
6135 
6136     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6137                  && qemu_log_in_addr_range(pc_start))) {
6138         FILE *logfile = qemu_log_trylock();
6139         if (logfile) {
6140             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6141             tcg_dump_ops(s, logfile, true);
6142             fprintf(logfile, "\n");
6143             qemu_log_unlock(logfile);
6144         }
6145     }
6146 
6147     /* Initialize goto_tb jump offsets. */
6148     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6149     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6150     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6151     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6152 
6153     tcg_reg_alloc_start(s);
6154 
6155     /*
6156      * Reset the buffer pointers when restarting after overflow.
6157      * TODO: Move this into translate-all.c with the rest of the
6158      * buffer management.  Having only this done here is confusing.
6159      */
6160     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6161     s->code_ptr = s->code_buf;
6162 
6163 #ifdef TCG_TARGET_NEED_LDST_LABELS
6164     QSIMPLEQ_INIT(&s->ldst_labels);
6165 #endif
6166 #ifdef TCG_TARGET_NEED_POOL_LABELS
6167     s->pool_labels = NULL;
6168 #endif
6169 
6170     start_words = s->insn_start_words;
6171     s->gen_insn_data =
6172         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6173 
6174     tcg_out_tb_start(s);
6175 
6176     num_insns = -1;
6177     QTAILQ_FOREACH(op, &s->ops, link) {
6178         TCGOpcode opc = op->opc;
6179 
6180         switch (opc) {
6181         case INDEX_op_mov_i32:
6182         case INDEX_op_mov_i64:
6183         case INDEX_op_mov_vec:
6184             tcg_reg_alloc_mov(s, op);
6185             break;
6186         case INDEX_op_dup_vec:
6187             tcg_reg_alloc_dup(s, op);
6188             break;
6189         case INDEX_op_insn_start:
6190             if (num_insns >= 0) {
6191                 size_t off = tcg_current_code_size(s);
6192                 s->gen_insn_end_off[num_insns] = off;
6193                 /* Assert that we do not overflow our stored offset.  */
6194                 assert(s->gen_insn_end_off[num_insns] == off);
6195             }
6196             num_insns++;
6197             for (i = 0; i < start_words; ++i) {
6198                 s->gen_insn_data[num_insns * start_words + i] =
6199                     tcg_get_insn_start_param(op, i);
6200             }
6201             break;
6202         case INDEX_op_discard:
6203             temp_dead(s, arg_temp(op->args[0]));
6204             break;
6205         case INDEX_op_set_label:
6206             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6207             tcg_out_label(s, arg_label(op->args[0]));
6208             break;
6209         case INDEX_op_call:
6210             tcg_reg_alloc_call(s, op);
6211             break;
6212         case INDEX_op_exit_tb:
6213             tcg_out_exit_tb(s, op->args[0]);
6214             break;
6215         case INDEX_op_goto_tb:
6216             tcg_out_goto_tb(s, op->args[0]);
6217             break;
6218         case INDEX_op_dup2_vec:
6219             if (tcg_reg_alloc_dup2(s, op)) {
6220                 break;
6221             }
6222             /* fall through */
6223         default:
6224             /* Sanity check that we've not introduced any unhandled opcodes. */
6225             tcg_debug_assert(tcg_op_supported(opc));
6226             /* Note: in order to speed up the code, it would be much
6227                faster to have specialized register allocator functions for
6228                some common argument patterns */
6229             tcg_reg_alloc_op(s, op);
6230             break;
6231         }
6232         /* Test for (pending) buffer overflow.  The assumption is that any
6233            one operation beginning below the high water mark cannot overrun
6234            the buffer completely.  Thus we can test for overflow after
6235            generating code without having to check during generation.  */
6236         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6237             return -1;
6238         }
6239         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6240         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6241             return -2;
6242         }
6243     }
6244     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6245     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6246 
6247     /* Generate TB finalization at the end of block */
6248 #ifdef TCG_TARGET_NEED_LDST_LABELS
6249     i = tcg_out_ldst_finalize(s);
6250     if (i < 0) {
6251         return i;
6252     }
6253 #endif
6254 #ifdef TCG_TARGET_NEED_POOL_LABELS
6255     i = tcg_out_pool_finalize(s);
6256     if (i < 0) {
6257         return i;
6258     }
6259 #endif
6260     if (!tcg_resolve_relocs(s)) {
6261         return -2;
6262     }
6263 
6264 #ifndef CONFIG_TCG_INTERPRETER
6265     /* flush instruction cache */
6266     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6267                         (uintptr_t)s->code_buf,
6268                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6269 #endif
6270 
6271     return tcg_current_code_size(s);
6272 }
6273 
6274 #ifdef ELF_HOST_MACHINE
6275 /* In order to use this feature, the backend needs to do three things:
6276 
6277    (1) Define ELF_HOST_MACHINE to indicate both what value to
6278        put into the ELF image and to indicate support for the feature.
6279 
6280    (2) Define tcg_register_jit.  This should create a buffer containing
6281        the contents of a .debug_frame section that describes the post-
6282        prologue unwind info for the tcg machine.
6283 
6284    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6285 */
6286 
6287 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6288 typedef enum {
6289     JIT_NOACTION = 0,
6290     JIT_REGISTER_FN,
6291     JIT_UNREGISTER_FN
6292 } jit_actions_t;
6293 
6294 struct jit_code_entry {
6295     struct jit_code_entry *next_entry;
6296     struct jit_code_entry *prev_entry;
6297     const void *symfile_addr;
6298     uint64_t symfile_size;
6299 };
6300 
6301 struct jit_descriptor {
6302     uint32_t version;
6303     uint32_t action_flag;
6304     struct jit_code_entry *relevant_entry;
6305     struct jit_code_entry *first_entry;
6306 };
6307 
6308 void __jit_debug_register_code(void) __attribute__((noinline));
6309 void __jit_debug_register_code(void)
6310 {
6311     asm("");
6312 }
6313 
6314 /* Must statically initialize the version, because GDB may check
6315    the version before we can set it.  */
6316 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6317 
6318 /* End GDB interface.  */
6319 
6320 static int find_string(const char *strtab, const char *str)
6321 {
6322     const char *p = strtab + 1;
6323 
6324     while (1) {
6325         if (strcmp(p, str) == 0) {
6326             return p - strtab;
6327         }
6328         p += strlen(p) + 1;
6329     }
6330 }
6331 
6332 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6333                                  const void *debug_frame,
6334                                  size_t debug_frame_size)
6335 {
6336     struct __attribute__((packed)) DebugInfo {
6337         uint32_t  len;
6338         uint16_t  version;
6339         uint32_t  abbrev;
6340         uint8_t   ptr_size;
6341         uint8_t   cu_die;
6342         uint16_t  cu_lang;
6343         uintptr_t cu_low_pc;
6344         uintptr_t cu_high_pc;
6345         uint8_t   fn_die;
6346         char      fn_name[16];
6347         uintptr_t fn_low_pc;
6348         uintptr_t fn_high_pc;
6349         uint8_t   cu_eoc;
6350     };
6351 
6352     struct ElfImage {
6353         ElfW(Ehdr) ehdr;
6354         ElfW(Phdr) phdr;
6355         ElfW(Shdr) shdr[7];
6356         ElfW(Sym)  sym[2];
6357         struct DebugInfo di;
6358         uint8_t    da[24];
6359         char       str[80];
6360     };
6361 
6362     struct ElfImage *img;
6363 
6364     static const struct ElfImage img_template = {
6365         .ehdr = {
6366             .e_ident[EI_MAG0] = ELFMAG0,
6367             .e_ident[EI_MAG1] = ELFMAG1,
6368             .e_ident[EI_MAG2] = ELFMAG2,
6369             .e_ident[EI_MAG3] = ELFMAG3,
6370             .e_ident[EI_CLASS] = ELF_CLASS,
6371             .e_ident[EI_DATA] = ELF_DATA,
6372             .e_ident[EI_VERSION] = EV_CURRENT,
6373             .e_type = ET_EXEC,
6374             .e_machine = ELF_HOST_MACHINE,
6375             .e_version = EV_CURRENT,
6376             .e_phoff = offsetof(struct ElfImage, phdr),
6377             .e_shoff = offsetof(struct ElfImage, shdr),
6378             .e_ehsize = sizeof(ElfW(Shdr)),
6379             .e_phentsize = sizeof(ElfW(Phdr)),
6380             .e_phnum = 1,
6381             .e_shentsize = sizeof(ElfW(Shdr)),
6382             .e_shnum = ARRAY_SIZE(img->shdr),
6383             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6384 #ifdef ELF_HOST_FLAGS
6385             .e_flags = ELF_HOST_FLAGS,
6386 #endif
6387 #ifdef ELF_OSABI
6388             .e_ident[EI_OSABI] = ELF_OSABI,
6389 #endif
6390         },
6391         .phdr = {
6392             .p_type = PT_LOAD,
6393             .p_flags = PF_X,
6394         },
6395         .shdr = {
6396             [0] = { .sh_type = SHT_NULL },
6397             /* Trick: The contents of code_gen_buffer are not present in
6398                this fake ELF file; that got allocated elsewhere.  Therefore
6399                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6400                will not look for contents.  We can record any address.  */
6401             [1] = { /* .text */
6402                 .sh_type = SHT_NOBITS,
6403                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6404             },
6405             [2] = { /* .debug_info */
6406                 .sh_type = SHT_PROGBITS,
6407                 .sh_offset = offsetof(struct ElfImage, di),
6408                 .sh_size = sizeof(struct DebugInfo),
6409             },
6410             [3] = { /* .debug_abbrev */
6411                 .sh_type = SHT_PROGBITS,
6412                 .sh_offset = offsetof(struct ElfImage, da),
6413                 .sh_size = sizeof(img->da),
6414             },
6415             [4] = { /* .debug_frame */
6416                 .sh_type = SHT_PROGBITS,
6417                 .sh_offset = sizeof(struct ElfImage),
6418             },
6419             [5] = { /* .symtab */
6420                 .sh_type = SHT_SYMTAB,
6421                 .sh_offset = offsetof(struct ElfImage, sym),
6422                 .sh_size = sizeof(img->sym),
6423                 .sh_info = 1,
6424                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6425                 .sh_entsize = sizeof(ElfW(Sym)),
6426             },
6427             [6] = { /* .strtab */
6428                 .sh_type = SHT_STRTAB,
6429                 .sh_offset = offsetof(struct ElfImage, str),
6430                 .sh_size = sizeof(img->str),
6431             }
6432         },
6433         .sym = {
6434             [1] = { /* code_gen_buffer */
6435                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6436                 .st_shndx = 1,
6437             }
6438         },
6439         .di = {
6440             .len = sizeof(struct DebugInfo) - 4,
6441             .version = 2,
6442             .ptr_size = sizeof(void *),
6443             .cu_die = 1,
6444             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6445             .fn_die = 2,
6446             .fn_name = "code_gen_buffer"
6447         },
6448         .da = {
6449             1,          /* abbrev number (the cu) */
6450             0x11, 1,    /* DW_TAG_compile_unit, has children */
6451             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6452             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6453             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6454             0, 0,       /* end of abbrev */
6455             2,          /* abbrev number (the fn) */
6456             0x2e, 0,    /* DW_TAG_subprogram, no children */
6457             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6458             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6459             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6460             0, 0,       /* end of abbrev */
6461             0           /* no more abbrev */
6462         },
6463         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6464                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6465     };
6466 
6467     /* We only need a single jit entry; statically allocate it.  */
6468     static struct jit_code_entry one_entry;
6469 
6470     uintptr_t buf = (uintptr_t)buf_ptr;
6471     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6472     DebugFrameHeader *dfh;
6473 
6474     img = g_malloc(img_size);
6475     *img = img_template;
6476 
6477     img->phdr.p_vaddr = buf;
6478     img->phdr.p_paddr = buf;
6479     img->phdr.p_memsz = buf_size;
6480 
6481     img->shdr[1].sh_name = find_string(img->str, ".text");
6482     img->shdr[1].sh_addr = buf;
6483     img->shdr[1].sh_size = buf_size;
6484 
6485     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6486     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6487 
6488     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6489     img->shdr[4].sh_size = debug_frame_size;
6490 
6491     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6492     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6493 
6494     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6495     img->sym[1].st_value = buf;
6496     img->sym[1].st_size = buf_size;
6497 
6498     img->di.cu_low_pc = buf;
6499     img->di.cu_high_pc = buf + buf_size;
6500     img->di.fn_low_pc = buf;
6501     img->di.fn_high_pc = buf + buf_size;
6502 
6503     dfh = (DebugFrameHeader *)(img + 1);
6504     memcpy(dfh, debug_frame, debug_frame_size);
6505     dfh->fde.func_start = buf;
6506     dfh->fde.func_len = buf_size;
6507 
6508 #ifdef DEBUG_JIT
6509     /* Enable this block to be able to debug the ELF image file creation.
6510        One can use readelf, objdump, or other inspection utilities.  */
6511     {
6512         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6513         FILE *f = fopen(jit, "w+b");
6514         if (f) {
6515             if (fwrite(img, img_size, 1, f) != img_size) {
6516                 /* Avoid stupid unused return value warning for fwrite.  */
6517             }
6518             fclose(f);
6519         }
6520     }
6521 #endif
6522 
6523     one_entry.symfile_addr = img;
6524     one_entry.symfile_size = img_size;
6525 
6526     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6527     __jit_debug_descriptor.relevant_entry = &one_entry;
6528     __jit_debug_descriptor.first_entry = &one_entry;
6529     __jit_debug_register_code();
6530 }
6531 #else
6532 /* No support for the feature.  Provide the entry point expected by exec.c,
6533    and implement the internal function we declared earlier.  */
6534 
6535 static void tcg_register_jit_int(const void *buf, size_t size,
6536                                  const void *debug_frame,
6537                                  size_t debug_frame_size)
6538 {
6539 }
6540 
6541 void tcg_register_jit(const void *buf, size_t buf_size)
6542 {
6543 }
6544 #endif /* ELF_HOST_MACHINE */
6545 
6546 #if !TCG_TARGET_MAYBE_vec
6547 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6548 {
6549     g_assert_not_reached();
6550 }
6551 #endif
6552