xref: /openbmc/qemu/tcg/tcg.c (revision 13d885b0ad4ada4d216b0341de5ae4a9ce3f5abb)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/tcg-op-common.h"
40 
41 #if UINTPTR_MAX == UINT32_MAX
42 # define ELF_CLASS  ELFCLASS32
43 #else
44 # define ELF_CLASS  ELFCLASS64
45 #endif
46 #if HOST_BIG_ENDIAN
47 # define ELF_DATA   ELFDATA2MSB
48 #else
49 # define ELF_DATA   ELFDATA2LSB
50 #endif
51 
52 #include "elf.h"
53 #include "exec/log.h"
54 #include "tcg/tcg-ldst.h"
55 #include "tcg/tcg-temp-internal.h"
56 #include "tcg-internal.h"
57 #include "accel/tcg/perf.h"
58 #ifdef CONFIG_USER_ONLY
59 #include "exec/user/guest-base.h"
60 #endif
61 
62 /* Forward declarations for functions declared in tcg-target.c.inc and
63    used here. */
64 static void tcg_target_init(TCGContext *s);
65 static void tcg_target_qemu_prologue(TCGContext *s);
66 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
67                         intptr_t value, intptr_t addend);
68 
69 /* The CIE and FDE header definitions will be common to all hosts.  */
70 typedef struct {
71     uint32_t len __attribute__((aligned((sizeof(void *)))));
72     uint32_t id;
73     uint8_t version;
74     char augmentation[1];
75     uint8_t code_align;
76     uint8_t data_align;
77     uint8_t return_column;
78 } DebugFrameCIE;
79 
80 typedef struct QEMU_PACKED {
81     uint32_t len __attribute__((aligned((sizeof(void *)))));
82     uint32_t cie_offset;
83     uintptr_t func_start;
84     uintptr_t func_len;
85 } DebugFrameFDEHeader;
86 
87 typedef struct QEMU_PACKED {
88     DebugFrameCIE cie;
89     DebugFrameFDEHeader fde;
90 } DebugFrameHeader;
91 
92 typedef struct TCGLabelQemuLdst {
93     bool is_ld;             /* qemu_ld: true, qemu_st: false */
94     MemOpIdx oi;
95     TCGType type;           /* result type of a load */
96     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
97     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
98     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
99     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
100     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
101     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
102     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
103 } TCGLabelQemuLdst;
104 
105 static void tcg_register_jit_int(const void *buf, size_t size,
106                                  const void *debug_frame,
107                                  size_t debug_frame_size)
108     __attribute__((unused));
109 
110 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
111 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
112                        intptr_t arg2);
113 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
114 static void tcg_out_movi(TCGContext *s, TCGType type,
115                          TCGReg ret, tcg_target_long arg);
116 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
117 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
118 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
119 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
120 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
121 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
122 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
123 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
126 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
127 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
128 static void tcg_out_goto_tb(TCGContext *s, int which);
129 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
130                        const TCGArg args[TCG_MAX_OP_ARGS],
131                        const int const_args[TCG_MAX_OP_ARGS]);
132 #if TCG_TARGET_MAYBE_vec
133 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
134                             TCGReg dst, TCGReg src);
135 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
136                              TCGReg dst, TCGReg base, intptr_t offset);
137 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
138                              TCGReg dst, int64_t arg);
139 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
140                            unsigned vecl, unsigned vece,
141                            const TCGArg args[TCG_MAX_OP_ARGS],
142                            const int const_args[TCG_MAX_OP_ARGS]);
143 #else
144 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
145                                    TCGReg dst, TCGReg src)
146 {
147     g_assert_not_reached();
148 }
149 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
150                                     TCGReg dst, TCGReg base, intptr_t offset)
151 {
152     g_assert_not_reached();
153 }
154 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
155                                     TCGReg dst, int64_t arg)
156 {
157     g_assert_not_reached();
158 }
159 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
160                                   unsigned vecl, unsigned vece,
161                                   const TCGArg args[TCG_MAX_OP_ARGS],
162                                   const int const_args[TCG_MAX_OP_ARGS])
163 {
164     g_assert_not_reached();
165 }
166 #endif
167 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
168                        intptr_t arg2);
169 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
170                         TCGReg base, intptr_t ofs);
171 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
172                          const TCGHelperInfo *info);
173 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
174 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
175 #ifdef TCG_TARGET_NEED_LDST_LABELS
176 static int tcg_out_ldst_finalize(TCGContext *s);
177 #endif
178 
179 typedef struct TCGLdstHelperParam {
180     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
181     unsigned ntmp;
182     int tmp[3];
183 } TCGLdstHelperParam;
184 
185 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
186                                    const TCGLdstHelperParam *p)
187     __attribute__((unused));
188 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
189                                   bool load_sign, const TCGLdstHelperParam *p)
190     __attribute__((unused));
191 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
192                                    const TCGLdstHelperParam *p)
193     __attribute__((unused));
194 
195 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
196     [MO_UB] = helper_ldub_mmu,
197     [MO_SB] = helper_ldsb_mmu,
198     [MO_UW] = helper_lduw_mmu,
199     [MO_SW] = helper_ldsw_mmu,
200     [MO_UL] = helper_ldul_mmu,
201     [MO_UQ] = helper_ldq_mmu,
202 #if TCG_TARGET_REG_BITS == 64
203     [MO_SL] = helper_ldsl_mmu,
204     [MO_128] = helper_ld16_mmu,
205 #endif
206 };
207 
208 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
209     [MO_8]  = helper_stb_mmu,
210     [MO_16] = helper_stw_mmu,
211     [MO_32] = helper_stl_mmu,
212     [MO_64] = helper_stq_mmu,
213 #if TCG_TARGET_REG_BITS == 64
214     [MO_128] = helper_st16_mmu,
215 #endif
216 };
217 
218 typedef struct {
219     MemOp atom;   /* lg2 bits of atomicity required */
220     MemOp align;  /* lg2 bits of alignment to use */
221 } TCGAtomAlign;
222 
223 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
224                                            MemOp host_atom, bool allow_two_ops)
225     __attribute__((unused));
226 
227 TCGContext tcg_init_ctx;
228 __thread TCGContext *tcg_ctx;
229 
230 TCGContext **tcg_ctxs;
231 unsigned int tcg_cur_ctxs;
232 unsigned int tcg_max_ctxs;
233 TCGv_env cpu_env = 0;
234 const void *tcg_code_gen_epilogue;
235 uintptr_t tcg_splitwx_diff;
236 
237 #ifndef CONFIG_TCG_INTERPRETER
238 tcg_prologue_fn *tcg_qemu_tb_exec;
239 #endif
240 
241 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
242 static TCGRegSet tcg_target_call_clobber_regs;
243 
244 #if TCG_TARGET_INSN_UNIT_SIZE == 1
245 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
246 {
247     *s->code_ptr++ = v;
248 }
249 
250 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
251                                                       uint8_t v)
252 {
253     *p = v;
254 }
255 #endif
256 
257 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
258 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
259 {
260     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
261         *s->code_ptr++ = v;
262     } else {
263         tcg_insn_unit *p = s->code_ptr;
264         memcpy(p, &v, sizeof(v));
265         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
266     }
267 }
268 
269 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
270                                                        uint16_t v)
271 {
272     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
273         *p = v;
274     } else {
275         memcpy(p, &v, sizeof(v));
276     }
277 }
278 #endif
279 
280 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
281 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
282 {
283     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
284         *s->code_ptr++ = v;
285     } else {
286         tcg_insn_unit *p = s->code_ptr;
287         memcpy(p, &v, sizeof(v));
288         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
289     }
290 }
291 
292 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
293                                                        uint32_t v)
294 {
295     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
296         *p = v;
297     } else {
298         memcpy(p, &v, sizeof(v));
299     }
300 }
301 #endif
302 
303 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
304 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
305 {
306     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
307         *s->code_ptr++ = v;
308     } else {
309         tcg_insn_unit *p = s->code_ptr;
310         memcpy(p, &v, sizeof(v));
311         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
312     }
313 }
314 
315 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
316                                                        uint64_t v)
317 {
318     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
319         *p = v;
320     } else {
321         memcpy(p, &v, sizeof(v));
322     }
323 }
324 #endif
325 
326 /* label relocation processing */
327 
328 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
329                           TCGLabel *l, intptr_t addend)
330 {
331     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
332 
333     r->type = type;
334     r->ptr = code_ptr;
335     r->addend = addend;
336     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
337 }
338 
339 static void tcg_out_label(TCGContext *s, TCGLabel *l)
340 {
341     tcg_debug_assert(!l->has_value);
342     l->has_value = 1;
343     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
344 }
345 
346 TCGLabel *gen_new_label(void)
347 {
348     TCGContext *s = tcg_ctx;
349     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
350 
351     memset(l, 0, sizeof(TCGLabel));
352     l->id = s->nb_labels++;
353     QSIMPLEQ_INIT(&l->branches);
354     QSIMPLEQ_INIT(&l->relocs);
355 
356     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
357 
358     return l;
359 }
360 
361 static bool tcg_resolve_relocs(TCGContext *s)
362 {
363     TCGLabel *l;
364 
365     QSIMPLEQ_FOREACH(l, &s->labels, next) {
366         TCGRelocation *r;
367         uintptr_t value = l->u.value;
368 
369         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
370             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
371                 return false;
372             }
373         }
374     }
375     return true;
376 }
377 
378 static void set_jmp_reset_offset(TCGContext *s, int which)
379 {
380     /*
381      * We will check for overflow at the end of the opcode loop in
382      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
383      */
384     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
385 }
386 
387 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
388 {
389     /*
390      * We will check for overflow at the end of the opcode loop in
391      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
392      */
393     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
394 }
395 
396 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
397 {
398     /*
399      * Return the read-execute version of the pointer, for the benefit
400      * of any pc-relative addressing mode.
401      */
402     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
403 }
404 
405 #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
406 static int tlb_mask_table_ofs(TCGContext *s, int which)
407 {
408     return s->tlb_fast_offset + which * sizeof(CPUTLBDescFast);
409 }
410 #endif
411 
412 /* Signal overflow, starting over with fewer guest insns. */
413 static G_NORETURN
414 void tcg_raise_tb_overflow(TCGContext *s)
415 {
416     siglongjmp(s->jmp_trans, -2);
417 }
418 
419 /*
420  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
421  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
422  *
423  * However, tcg_out_helper_load_slots reuses this field to hold an
424  * argument slot number (which may designate a argument register or an
425  * argument stack slot), converting to TCGReg once all arguments that
426  * are destined for the stack are processed.
427  */
428 typedef struct TCGMovExtend {
429     unsigned dst;
430     TCGReg src;
431     TCGType dst_type;
432     TCGType src_type;
433     MemOp src_ext;
434 } TCGMovExtend;
435 
436 /**
437  * tcg_out_movext -- move and extend
438  * @s: tcg context
439  * @dst_type: integral type for destination
440  * @dst: destination register
441  * @src_type: integral type for source
442  * @src_ext: extension to apply to source
443  * @src: source register
444  *
445  * Move or extend @src into @dst, depending on @src_ext and the types.
446  */
447 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
448                            TCGType src_type, MemOp src_ext, TCGReg src)
449 {
450     switch (src_ext) {
451     case MO_UB:
452         tcg_out_ext8u(s, dst, src);
453         break;
454     case MO_SB:
455         tcg_out_ext8s(s, dst_type, dst, src);
456         break;
457     case MO_UW:
458         tcg_out_ext16u(s, dst, src);
459         break;
460     case MO_SW:
461         tcg_out_ext16s(s, dst_type, dst, src);
462         break;
463     case MO_UL:
464     case MO_SL:
465         if (dst_type == TCG_TYPE_I32) {
466             if (src_type == TCG_TYPE_I32) {
467                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
468             } else {
469                 tcg_out_extrl_i64_i32(s, dst, src);
470             }
471         } else if (src_type == TCG_TYPE_I32) {
472             if (src_ext & MO_SIGN) {
473                 tcg_out_exts_i32_i64(s, dst, src);
474             } else {
475                 tcg_out_extu_i32_i64(s, dst, src);
476             }
477         } else {
478             if (src_ext & MO_SIGN) {
479                 tcg_out_ext32s(s, dst, src);
480             } else {
481                 tcg_out_ext32u(s, dst, src);
482             }
483         }
484         break;
485     case MO_UQ:
486         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
487         if (dst_type == TCG_TYPE_I32) {
488             tcg_out_extrl_i64_i32(s, dst, src);
489         } else {
490             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
491         }
492         break;
493     default:
494         g_assert_not_reached();
495     }
496 }
497 
498 /* Minor variations on a theme, using a structure. */
499 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
500                                     TCGReg src)
501 {
502     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
503 }
504 
505 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
506 {
507     tcg_out_movext1_new_src(s, i, i->src);
508 }
509 
510 /**
511  * tcg_out_movext2 -- move and extend two pair
512  * @s: tcg context
513  * @i1: first move description
514  * @i2: second move description
515  * @scratch: temporary register, or -1 for none
516  *
517  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
518  * between the sources and destinations.
519  */
520 
521 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
522                             const TCGMovExtend *i2, int scratch)
523 {
524     TCGReg src1 = i1->src;
525     TCGReg src2 = i2->src;
526 
527     if (i1->dst != src2) {
528         tcg_out_movext1(s, i1);
529         tcg_out_movext1(s, i2);
530         return;
531     }
532     if (i2->dst == src1) {
533         TCGType src1_type = i1->src_type;
534         TCGType src2_type = i2->src_type;
535 
536         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
537             /* The data is now in the correct registers, now extend. */
538             src1 = i2->src;
539             src2 = i1->src;
540         } else {
541             tcg_debug_assert(scratch >= 0);
542             tcg_out_mov(s, src1_type, scratch, src1);
543             src1 = scratch;
544         }
545     }
546     tcg_out_movext1_new_src(s, i2, src2);
547     tcg_out_movext1_new_src(s, i1, src1);
548 }
549 
550 /**
551  * tcg_out_movext3 -- move and extend three pair
552  * @s: tcg context
553  * @i1: first move description
554  * @i2: second move description
555  * @i3: third move description
556  * @scratch: temporary register, or -1 for none
557  *
558  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
559  * between the sources and destinations.
560  */
561 
562 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
563                             const TCGMovExtend *i2, const TCGMovExtend *i3,
564                             int scratch)
565 {
566     TCGReg src1 = i1->src;
567     TCGReg src2 = i2->src;
568     TCGReg src3 = i3->src;
569 
570     if (i1->dst != src2 && i1->dst != src3) {
571         tcg_out_movext1(s, i1);
572         tcg_out_movext2(s, i2, i3, scratch);
573         return;
574     }
575     if (i2->dst != src1 && i2->dst != src3) {
576         tcg_out_movext1(s, i2);
577         tcg_out_movext2(s, i1, i3, scratch);
578         return;
579     }
580     if (i3->dst != src1 && i3->dst != src2) {
581         tcg_out_movext1(s, i3);
582         tcg_out_movext2(s, i1, i2, scratch);
583         return;
584     }
585 
586     /*
587      * There is a cycle.  Since there are only 3 nodes, the cycle is
588      * either "clockwise" or "anti-clockwise", and can be solved with
589      * a single scratch or two xchg.
590      */
591     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
592         /* "Clockwise" */
593         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
594             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
595             /* The data is now in the correct registers, now extend. */
596             tcg_out_movext1_new_src(s, i1, i1->dst);
597             tcg_out_movext1_new_src(s, i2, i2->dst);
598             tcg_out_movext1_new_src(s, i3, i3->dst);
599         } else {
600             tcg_debug_assert(scratch >= 0);
601             tcg_out_mov(s, i1->src_type, scratch, src1);
602             tcg_out_movext1(s, i3);
603             tcg_out_movext1(s, i2);
604             tcg_out_movext1_new_src(s, i1, scratch);
605         }
606     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
607         /* "Anti-clockwise" */
608         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
609             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
610             /* The data is now in the correct registers, now extend. */
611             tcg_out_movext1_new_src(s, i1, i1->dst);
612             tcg_out_movext1_new_src(s, i2, i2->dst);
613             tcg_out_movext1_new_src(s, i3, i3->dst);
614         } else {
615             tcg_debug_assert(scratch >= 0);
616             tcg_out_mov(s, i1->src_type, scratch, src1);
617             tcg_out_movext1(s, i2);
618             tcg_out_movext1(s, i3);
619             tcg_out_movext1_new_src(s, i1, scratch);
620         }
621     } else {
622         g_assert_not_reached();
623     }
624 }
625 
626 #define C_PFX1(P, A)                    P##A
627 #define C_PFX2(P, A, B)                 P##A##_##B
628 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
629 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
630 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
631 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
632 
633 /* Define an enumeration for the various combinations. */
634 
635 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
636 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
637 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
638 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
639 
640 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
641 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
642 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
643 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
644 
645 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
646 
647 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
648 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
649 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
650 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
651 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
652 
653 typedef enum {
654 #include "tcg-target-con-set.h"
655 } TCGConstraintSetIndex;
656 
657 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
658 
659 #undef C_O0_I1
660 #undef C_O0_I2
661 #undef C_O0_I3
662 #undef C_O0_I4
663 #undef C_O1_I1
664 #undef C_O1_I2
665 #undef C_O1_I3
666 #undef C_O1_I4
667 #undef C_N1_I2
668 #undef C_O2_I1
669 #undef C_O2_I2
670 #undef C_O2_I3
671 #undef C_O2_I4
672 #undef C_N1_O1_I4
673 
674 /* Put all of the constraint sets into an array, indexed by the enum. */
675 
676 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
677 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
678 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
679 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
680 
681 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
682 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
683 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
684 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
685 
686 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
687 
688 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
689 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
690 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
691 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
692 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
693 
694 static const TCGTargetOpDef constraint_sets[] = {
695 #include "tcg-target-con-set.h"
696 };
697 
698 
699 #undef C_O0_I1
700 #undef C_O0_I2
701 #undef C_O0_I3
702 #undef C_O0_I4
703 #undef C_O1_I1
704 #undef C_O1_I2
705 #undef C_O1_I3
706 #undef C_O1_I4
707 #undef C_N1_I2
708 #undef C_O2_I1
709 #undef C_O2_I2
710 #undef C_O2_I3
711 #undef C_O2_I4
712 #undef C_N1_O1_I4
713 
714 /* Expand the enumerator to be returned from tcg_target_op_def(). */
715 
716 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
717 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
718 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
719 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
720 
721 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
722 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
723 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
724 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
725 
726 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
727 
728 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
729 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
730 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
731 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
732 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
733 
734 #include "tcg-target.c.inc"
735 
736 static void alloc_tcg_plugin_context(TCGContext *s)
737 {
738 #ifdef CONFIG_PLUGIN
739     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
740     s->plugin_tb->insns =
741         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
742 #endif
743 }
744 
745 /*
746  * All TCG threads except the parent (i.e. the one that called tcg_context_init
747  * and registered the target's TCG globals) must register with this function
748  * before initiating translation.
749  *
750  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
751  * of tcg_region_init() for the reasoning behind this.
752  *
753  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
754  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
755  * is not used anymore for translation once this function is called.
756  *
757  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
758  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
759  */
760 #ifdef CONFIG_USER_ONLY
761 void tcg_register_thread(void)
762 {
763     tcg_ctx = &tcg_init_ctx;
764 }
765 #else
766 void tcg_register_thread(void)
767 {
768     TCGContext *s = g_malloc(sizeof(*s));
769     unsigned int i, n;
770 
771     *s = tcg_init_ctx;
772 
773     /* Relink mem_base.  */
774     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
775         if (tcg_init_ctx.temps[i].mem_base) {
776             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
777             tcg_debug_assert(b >= 0 && b < n);
778             s->temps[i].mem_base = &s->temps[b];
779         }
780     }
781 
782     /* Claim an entry in tcg_ctxs */
783     n = qatomic_fetch_inc(&tcg_cur_ctxs);
784     g_assert(n < tcg_max_ctxs);
785     qatomic_set(&tcg_ctxs[n], s);
786 
787     if (n > 0) {
788         alloc_tcg_plugin_context(s);
789         tcg_region_initial_alloc(s);
790     }
791 
792     tcg_ctx = s;
793 }
794 #endif /* !CONFIG_USER_ONLY */
795 
796 /* pool based memory allocation */
797 void *tcg_malloc_internal(TCGContext *s, int size)
798 {
799     TCGPool *p;
800     int pool_size;
801 
802     if (size > TCG_POOL_CHUNK_SIZE) {
803         /* big malloc: insert a new pool (XXX: could optimize) */
804         p = g_malloc(sizeof(TCGPool) + size);
805         p->size = size;
806         p->next = s->pool_first_large;
807         s->pool_first_large = p;
808         return p->data;
809     } else {
810         p = s->pool_current;
811         if (!p) {
812             p = s->pool_first;
813             if (!p)
814                 goto new_pool;
815         } else {
816             if (!p->next) {
817             new_pool:
818                 pool_size = TCG_POOL_CHUNK_SIZE;
819                 p = g_malloc(sizeof(TCGPool) + pool_size);
820                 p->size = pool_size;
821                 p->next = NULL;
822                 if (s->pool_current) {
823                     s->pool_current->next = p;
824                 } else {
825                     s->pool_first = p;
826                 }
827             } else {
828                 p = p->next;
829             }
830         }
831     }
832     s->pool_current = p;
833     s->pool_cur = p->data + size;
834     s->pool_end = p->data + p->size;
835     return p->data;
836 }
837 
838 void tcg_pool_reset(TCGContext *s)
839 {
840     TCGPool *p, *t;
841     for (p = s->pool_first_large; p; p = t) {
842         t = p->next;
843         g_free(p);
844     }
845     s->pool_first_large = NULL;
846     s->pool_cur = s->pool_end = NULL;
847     s->pool_current = NULL;
848 }
849 
850 /*
851  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
852  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
853  * We only use these for layout in tcg_out_ld_helper_ret and
854  * tcg_out_st_helper_args, and share them between several of
855  * the helpers, with the end result that it's easier to build manually.
856  */
857 
858 #if TCG_TARGET_REG_BITS == 32
859 # define dh_typecode_ttl  dh_typecode_i32
860 #else
861 # define dh_typecode_ttl  dh_typecode_i64
862 #endif
863 
864 static TCGHelperInfo info_helper_ld32_mmu = {
865     .flags = TCG_CALL_NO_WG,
866     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
867               | dh_typemask(env, 1)
868               | dh_typemask(i64, 2)  /* uint64_t addr */
869               | dh_typemask(i32, 3)  /* unsigned oi */
870               | dh_typemask(ptr, 4)  /* uintptr_t ra */
871 };
872 
873 static TCGHelperInfo info_helper_ld64_mmu = {
874     .flags = TCG_CALL_NO_WG,
875     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
876               | dh_typemask(env, 1)
877               | dh_typemask(i64, 2)  /* uint64_t addr */
878               | dh_typemask(i32, 3)  /* unsigned oi */
879               | dh_typemask(ptr, 4)  /* uintptr_t ra */
880 };
881 
882 static TCGHelperInfo info_helper_ld128_mmu = {
883     .flags = TCG_CALL_NO_WG,
884     .typemask = dh_typemask(i128, 0) /* return Int128 */
885               | dh_typemask(env, 1)
886               | dh_typemask(i64, 2)  /* uint64_t addr */
887               | dh_typemask(i32, 3)  /* unsigned oi */
888               | dh_typemask(ptr, 4)  /* uintptr_t ra */
889 };
890 
891 static TCGHelperInfo info_helper_st32_mmu = {
892     .flags = TCG_CALL_NO_WG,
893     .typemask = dh_typemask(void, 0)
894               | dh_typemask(env, 1)
895               | dh_typemask(i64, 2)  /* uint64_t addr */
896               | dh_typemask(i32, 3)  /* uint32_t data */
897               | dh_typemask(i32, 4)  /* unsigned oi */
898               | dh_typemask(ptr, 5)  /* uintptr_t ra */
899 };
900 
901 static TCGHelperInfo info_helper_st64_mmu = {
902     .flags = TCG_CALL_NO_WG,
903     .typemask = dh_typemask(void, 0)
904               | dh_typemask(env, 1)
905               | dh_typemask(i64, 2)  /* uint64_t addr */
906               | dh_typemask(i64, 3)  /* uint64_t data */
907               | dh_typemask(i32, 4)  /* unsigned oi */
908               | dh_typemask(ptr, 5)  /* uintptr_t ra */
909 };
910 
911 static TCGHelperInfo info_helper_st128_mmu = {
912     .flags = TCG_CALL_NO_WG,
913     .typemask = dh_typemask(void, 0)
914               | dh_typemask(env, 1)
915               | dh_typemask(i64, 2)  /* uint64_t addr */
916               | dh_typemask(i128, 3) /* Int128 data */
917               | dh_typemask(i32, 4)  /* unsigned oi */
918               | dh_typemask(ptr, 5)  /* uintptr_t ra */
919 };
920 
921 #ifdef CONFIG_TCG_INTERPRETER
922 static ffi_type *typecode_to_ffi(int argmask)
923 {
924     /*
925      * libffi does not support __int128_t, so we have forced Int128
926      * to use the structure definition instead of the builtin type.
927      */
928     static ffi_type *ffi_type_i128_elements[3] = {
929         &ffi_type_uint64,
930         &ffi_type_uint64,
931         NULL
932     };
933     static ffi_type ffi_type_i128 = {
934         .size = 16,
935         .alignment = __alignof__(Int128),
936         .type = FFI_TYPE_STRUCT,
937         .elements = ffi_type_i128_elements,
938     };
939 
940     switch (argmask) {
941     case dh_typecode_void:
942         return &ffi_type_void;
943     case dh_typecode_i32:
944         return &ffi_type_uint32;
945     case dh_typecode_s32:
946         return &ffi_type_sint32;
947     case dh_typecode_i64:
948         return &ffi_type_uint64;
949     case dh_typecode_s64:
950         return &ffi_type_sint64;
951     case dh_typecode_ptr:
952         return &ffi_type_pointer;
953     case dh_typecode_i128:
954         return &ffi_type_i128;
955     }
956     g_assert_not_reached();
957 }
958 
959 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
960 {
961     unsigned typemask = info->typemask;
962     struct {
963         ffi_cif cif;
964         ffi_type *args[];
965     } *ca;
966     ffi_status status;
967     int nargs;
968 
969     /* Ignoring the return type, find the last non-zero field. */
970     nargs = 32 - clz32(typemask >> 3);
971     nargs = DIV_ROUND_UP(nargs, 3);
972     assert(nargs <= MAX_CALL_IARGS);
973 
974     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
975     ca->cif.rtype = typecode_to_ffi(typemask & 7);
976     ca->cif.nargs = nargs;
977 
978     if (nargs != 0) {
979         ca->cif.arg_types = ca->args;
980         for (int j = 0; j < nargs; ++j) {
981             int typecode = extract32(typemask, (j + 1) * 3, 3);
982             ca->args[j] = typecode_to_ffi(typecode);
983         }
984     }
985 
986     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
987                           ca->cif.rtype, ca->cif.arg_types);
988     assert(status == FFI_OK);
989 
990     return &ca->cif;
991 }
992 
993 #define HELPER_INFO_INIT(I)      (&(I)->cif)
994 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
995 #else
996 #define HELPER_INFO_INIT(I)      (&(I)->init)
997 #define HELPER_INFO_INIT_VAL(I)  1
998 #endif /* CONFIG_TCG_INTERPRETER */
999 
1000 static inline bool arg_slot_reg_p(unsigned arg_slot)
1001 {
1002     /*
1003      * Split the sizeof away from the comparison to avoid Werror from
1004      * "unsigned < 0 is always false", when iarg_regs is empty.
1005      */
1006     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1007     return arg_slot < nreg;
1008 }
1009 
1010 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1011 {
1012     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1013     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1014 
1015     tcg_debug_assert(stk_slot < max);
1016     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1017 }
1018 
1019 typedef struct TCGCumulativeArgs {
1020     int arg_idx;                /* tcg_gen_callN args[] */
1021     int info_in_idx;            /* TCGHelperInfo in[] */
1022     int arg_slot;               /* regs+stack slot */
1023     int ref_slot;               /* stack slots for references */
1024 } TCGCumulativeArgs;
1025 
1026 static void layout_arg_even(TCGCumulativeArgs *cum)
1027 {
1028     cum->arg_slot += cum->arg_slot & 1;
1029 }
1030 
1031 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1032                          TCGCallArgumentKind kind)
1033 {
1034     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1035 
1036     *loc = (TCGCallArgumentLoc){
1037         .kind = kind,
1038         .arg_idx = cum->arg_idx,
1039         .arg_slot = cum->arg_slot,
1040     };
1041     cum->info_in_idx++;
1042     cum->arg_slot++;
1043 }
1044 
1045 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1046                                 TCGHelperInfo *info, int n)
1047 {
1048     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1049 
1050     for (int i = 0; i < n; ++i) {
1051         /* Layout all using the same arg_idx, adjusting the subindex. */
1052         loc[i] = (TCGCallArgumentLoc){
1053             .kind = TCG_CALL_ARG_NORMAL,
1054             .arg_idx = cum->arg_idx,
1055             .tmp_subindex = i,
1056             .arg_slot = cum->arg_slot + i,
1057         };
1058     }
1059     cum->info_in_idx += n;
1060     cum->arg_slot += n;
1061 }
1062 
1063 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1064 {
1065     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1066     int n = 128 / TCG_TARGET_REG_BITS;
1067 
1068     /* The first subindex carries the pointer. */
1069     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1070 
1071     /*
1072      * The callee is allowed to clobber memory associated with
1073      * structure pass by-reference.  Therefore we must make copies.
1074      * Allocate space from "ref_slot", which will be adjusted to
1075      * follow the parameters on the stack.
1076      */
1077     loc[0].ref_slot = cum->ref_slot;
1078 
1079     /*
1080      * Subsequent words also go into the reference slot, but
1081      * do not accumulate into the regular arguments.
1082      */
1083     for (int i = 1; i < n; ++i) {
1084         loc[i] = (TCGCallArgumentLoc){
1085             .kind = TCG_CALL_ARG_BY_REF_N,
1086             .arg_idx = cum->arg_idx,
1087             .tmp_subindex = i,
1088             .ref_slot = cum->ref_slot + i,
1089         };
1090     }
1091     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1092     cum->ref_slot += n;
1093 }
1094 
1095 static void init_call_layout(TCGHelperInfo *info)
1096 {
1097     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1098     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1099     unsigned typemask = info->typemask;
1100     unsigned typecode;
1101     TCGCumulativeArgs cum = { };
1102 
1103     /*
1104      * Parse and place any function return value.
1105      */
1106     typecode = typemask & 7;
1107     switch (typecode) {
1108     case dh_typecode_void:
1109         info->nr_out = 0;
1110         break;
1111     case dh_typecode_i32:
1112     case dh_typecode_s32:
1113     case dh_typecode_ptr:
1114         info->nr_out = 1;
1115         info->out_kind = TCG_CALL_RET_NORMAL;
1116         break;
1117     case dh_typecode_i64:
1118     case dh_typecode_s64:
1119         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1120         info->out_kind = TCG_CALL_RET_NORMAL;
1121         /* Query the last register now to trigger any assert early. */
1122         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1123         break;
1124     case dh_typecode_i128:
1125         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1126         info->out_kind = TCG_TARGET_CALL_RET_I128;
1127         switch (TCG_TARGET_CALL_RET_I128) {
1128         case TCG_CALL_RET_NORMAL:
1129             /* Query the last register now to trigger any assert early. */
1130             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1131             break;
1132         case TCG_CALL_RET_BY_VEC:
1133             /* Query the single register now to trigger any assert early. */
1134             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1135             break;
1136         case TCG_CALL_RET_BY_REF:
1137             /*
1138              * Allocate the first argument to the output.
1139              * We don't need to store this anywhere, just make it
1140              * unavailable for use in the input loop below.
1141              */
1142             cum.arg_slot = 1;
1143             break;
1144         default:
1145             qemu_build_not_reached();
1146         }
1147         break;
1148     default:
1149         g_assert_not_reached();
1150     }
1151 
1152     /*
1153      * Parse and place function arguments.
1154      */
1155     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1156         TCGCallArgumentKind kind;
1157         TCGType type;
1158 
1159         typecode = typemask & 7;
1160         switch (typecode) {
1161         case dh_typecode_i32:
1162         case dh_typecode_s32:
1163             type = TCG_TYPE_I32;
1164             break;
1165         case dh_typecode_i64:
1166         case dh_typecode_s64:
1167             type = TCG_TYPE_I64;
1168             break;
1169         case dh_typecode_ptr:
1170             type = TCG_TYPE_PTR;
1171             break;
1172         case dh_typecode_i128:
1173             type = TCG_TYPE_I128;
1174             break;
1175         default:
1176             g_assert_not_reached();
1177         }
1178 
1179         switch (type) {
1180         case TCG_TYPE_I32:
1181             switch (TCG_TARGET_CALL_ARG_I32) {
1182             case TCG_CALL_ARG_EVEN:
1183                 layout_arg_even(&cum);
1184                 /* fall through */
1185             case TCG_CALL_ARG_NORMAL:
1186                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1187                 break;
1188             case TCG_CALL_ARG_EXTEND:
1189                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1190                 layout_arg_1(&cum, info, kind);
1191                 break;
1192             default:
1193                 qemu_build_not_reached();
1194             }
1195             break;
1196 
1197         case TCG_TYPE_I64:
1198             switch (TCG_TARGET_CALL_ARG_I64) {
1199             case TCG_CALL_ARG_EVEN:
1200                 layout_arg_even(&cum);
1201                 /* fall through */
1202             case TCG_CALL_ARG_NORMAL:
1203                 if (TCG_TARGET_REG_BITS == 32) {
1204                     layout_arg_normal_n(&cum, info, 2);
1205                 } else {
1206                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1207                 }
1208                 break;
1209             default:
1210                 qemu_build_not_reached();
1211             }
1212             break;
1213 
1214         case TCG_TYPE_I128:
1215             switch (TCG_TARGET_CALL_ARG_I128) {
1216             case TCG_CALL_ARG_EVEN:
1217                 layout_arg_even(&cum);
1218                 /* fall through */
1219             case TCG_CALL_ARG_NORMAL:
1220                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1221                 break;
1222             case TCG_CALL_ARG_BY_REF:
1223                 layout_arg_by_ref(&cum, info);
1224                 break;
1225             default:
1226                 qemu_build_not_reached();
1227             }
1228             break;
1229 
1230         default:
1231             g_assert_not_reached();
1232         }
1233     }
1234     info->nr_in = cum.info_in_idx;
1235 
1236     /* Validate that we didn't overrun the input array. */
1237     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1238     /* Validate the backend has enough argument space. */
1239     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1240 
1241     /*
1242      * Relocate the "ref_slot" area to the end of the parameters.
1243      * Minimizing this stack offset helps code size for x86,
1244      * which has a signed 8-bit offset encoding.
1245      */
1246     if (cum.ref_slot != 0) {
1247         int ref_base = 0;
1248 
1249         if (cum.arg_slot > max_reg_slots) {
1250             int align = __alignof(Int128) / sizeof(tcg_target_long);
1251 
1252             ref_base = cum.arg_slot - max_reg_slots;
1253             if (align > 1) {
1254                 ref_base = ROUND_UP(ref_base, align);
1255             }
1256         }
1257         assert(ref_base + cum.ref_slot <= max_stk_slots);
1258         ref_base += max_reg_slots;
1259 
1260         if (ref_base != 0) {
1261             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1262                 TCGCallArgumentLoc *loc = &info->in[i];
1263                 switch (loc->kind) {
1264                 case TCG_CALL_ARG_BY_REF:
1265                 case TCG_CALL_ARG_BY_REF_N:
1266                     loc->ref_slot += ref_base;
1267                     break;
1268                 default:
1269                     break;
1270                 }
1271             }
1272         }
1273     }
1274 }
1275 
1276 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1277 static void process_op_defs(TCGContext *s);
1278 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1279                                             TCGReg reg, const char *name);
1280 
1281 static void tcg_context_init(unsigned max_cpus)
1282 {
1283     TCGContext *s = &tcg_init_ctx;
1284     int op, total_args, n, i;
1285     TCGOpDef *def;
1286     TCGArgConstraint *args_ct;
1287     TCGTemp *ts;
1288 
1289     memset(s, 0, sizeof(*s));
1290     s->nb_globals = 0;
1291 
1292     /* Count total number of arguments and allocate the corresponding
1293        space */
1294     total_args = 0;
1295     for(op = 0; op < NB_OPS; op++) {
1296         def = &tcg_op_defs[op];
1297         n = def->nb_iargs + def->nb_oargs;
1298         total_args += n;
1299     }
1300 
1301     args_ct = g_new0(TCGArgConstraint, total_args);
1302 
1303     for(op = 0; op < NB_OPS; op++) {
1304         def = &tcg_op_defs[op];
1305         def->args_ct = args_ct;
1306         n = def->nb_iargs + def->nb_oargs;
1307         args_ct += n;
1308     }
1309 
1310     init_call_layout(&info_helper_ld32_mmu);
1311     init_call_layout(&info_helper_ld64_mmu);
1312     init_call_layout(&info_helper_ld128_mmu);
1313     init_call_layout(&info_helper_st32_mmu);
1314     init_call_layout(&info_helper_st64_mmu);
1315     init_call_layout(&info_helper_st128_mmu);
1316 
1317     tcg_target_init(s);
1318     process_op_defs(s);
1319 
1320     /* Reverse the order of the saved registers, assuming they're all at
1321        the start of tcg_target_reg_alloc_order.  */
1322     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1323         int r = tcg_target_reg_alloc_order[n];
1324         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1325             break;
1326         }
1327     }
1328     for (i = 0; i < n; ++i) {
1329         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1330     }
1331     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1332         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1333     }
1334 
1335     alloc_tcg_plugin_context(s);
1336 
1337     tcg_ctx = s;
1338     /*
1339      * In user-mode we simply share the init context among threads, since we
1340      * use a single region. See the documentation tcg_region_init() for the
1341      * reasoning behind this.
1342      * In softmmu we will have at most max_cpus TCG threads.
1343      */
1344 #ifdef CONFIG_USER_ONLY
1345     tcg_ctxs = &tcg_ctx;
1346     tcg_cur_ctxs = 1;
1347     tcg_max_ctxs = 1;
1348 #else
1349     tcg_max_ctxs = max_cpus;
1350     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1351 #endif
1352 
1353     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1354     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1355     cpu_env = temp_tcgv_ptr(ts);
1356 }
1357 
1358 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1359 {
1360     tcg_context_init(max_cpus);
1361     tcg_region_init(tb_size, splitwx, max_cpus);
1362 }
1363 
1364 /*
1365  * Allocate TBs right before their corresponding translated code, making
1366  * sure that TBs and code are on different cache lines.
1367  */
1368 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1369 {
1370     uintptr_t align = qemu_icache_linesize;
1371     TranslationBlock *tb;
1372     void *next;
1373 
1374  retry:
1375     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1376     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1377 
1378     if (unlikely(next > s->code_gen_highwater)) {
1379         if (tcg_region_alloc(s)) {
1380             return NULL;
1381         }
1382         goto retry;
1383     }
1384     qatomic_set(&s->code_gen_ptr, next);
1385     s->data_gen_ptr = NULL;
1386     return tb;
1387 }
1388 
1389 void tcg_prologue_init(TCGContext *s)
1390 {
1391     size_t prologue_size;
1392 
1393     s->code_ptr = s->code_gen_ptr;
1394     s->code_buf = s->code_gen_ptr;
1395     s->data_gen_ptr = NULL;
1396 
1397 #ifndef CONFIG_TCG_INTERPRETER
1398     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1399 #endif
1400 
1401 #ifdef TCG_TARGET_NEED_POOL_LABELS
1402     s->pool_labels = NULL;
1403 #endif
1404 
1405     qemu_thread_jit_write();
1406     /* Generate the prologue.  */
1407     tcg_target_qemu_prologue(s);
1408 
1409 #ifdef TCG_TARGET_NEED_POOL_LABELS
1410     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1411     {
1412         int result = tcg_out_pool_finalize(s);
1413         tcg_debug_assert(result == 0);
1414     }
1415 #endif
1416 
1417     prologue_size = tcg_current_code_size(s);
1418     perf_report_prologue(s->code_gen_ptr, prologue_size);
1419 
1420 #ifndef CONFIG_TCG_INTERPRETER
1421     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1422                         (uintptr_t)s->code_buf, prologue_size);
1423 #endif
1424 
1425     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1426         FILE *logfile = qemu_log_trylock();
1427         if (logfile) {
1428             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1429             if (s->data_gen_ptr) {
1430                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1431                 size_t data_size = prologue_size - code_size;
1432                 size_t i;
1433 
1434                 disas(logfile, s->code_gen_ptr, code_size);
1435 
1436                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1437                     if (sizeof(tcg_target_ulong) == 8) {
1438                         fprintf(logfile,
1439                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1440                                 (uintptr_t)s->data_gen_ptr + i,
1441                                 *(uint64_t *)(s->data_gen_ptr + i));
1442                     } else {
1443                         fprintf(logfile,
1444                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1445                                 (uintptr_t)s->data_gen_ptr + i,
1446                                 *(uint32_t *)(s->data_gen_ptr + i));
1447                     }
1448                 }
1449             } else {
1450                 disas(logfile, s->code_gen_ptr, prologue_size);
1451             }
1452             fprintf(logfile, "\n");
1453             qemu_log_unlock(logfile);
1454         }
1455     }
1456 
1457 #ifndef CONFIG_TCG_INTERPRETER
1458     /*
1459      * Assert that goto_ptr is implemented completely, setting an epilogue.
1460      * For tci, we use NULL as the signal to return from the interpreter,
1461      * so skip this check.
1462      */
1463     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1464 #endif
1465 
1466     tcg_region_prologue_set(s);
1467 }
1468 
1469 void tcg_func_start(TCGContext *s)
1470 {
1471     tcg_pool_reset(s);
1472     s->nb_temps = s->nb_globals;
1473 
1474     /* No temps have been previously allocated for size or locality.  */
1475     memset(s->free_temps, 0, sizeof(s->free_temps));
1476 
1477     /* No constant temps have been previously allocated. */
1478     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1479         if (s->const_table[i]) {
1480             g_hash_table_remove_all(s->const_table[i]);
1481         }
1482     }
1483 
1484     s->nb_ops = 0;
1485     s->nb_labels = 0;
1486     s->current_frame_offset = s->frame_start;
1487 
1488 #ifdef CONFIG_DEBUG_TCG
1489     s->goto_tb_issue_mask = 0;
1490 #endif
1491 
1492     QTAILQ_INIT(&s->ops);
1493     QTAILQ_INIT(&s->free_ops);
1494     QSIMPLEQ_INIT(&s->labels);
1495 
1496     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1497                      s->addr_type == TCG_TYPE_I64);
1498 
1499 #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
1500     tcg_debug_assert(s->tlb_fast_offset < 0);
1501     tcg_debug_assert(s->tlb_fast_offset >= MIN_TLB_MASK_TABLE_OFS);
1502 #endif
1503 
1504     tcg_debug_assert(s->insn_start_words > 0);
1505 }
1506 
1507 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1508 {
1509     int n = s->nb_temps++;
1510 
1511     if (n >= TCG_MAX_TEMPS) {
1512         tcg_raise_tb_overflow(s);
1513     }
1514     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1515 }
1516 
1517 static TCGTemp *tcg_global_alloc(TCGContext *s)
1518 {
1519     TCGTemp *ts;
1520 
1521     tcg_debug_assert(s->nb_globals == s->nb_temps);
1522     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1523     s->nb_globals++;
1524     ts = tcg_temp_alloc(s);
1525     ts->kind = TEMP_GLOBAL;
1526 
1527     return ts;
1528 }
1529 
1530 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1531                                             TCGReg reg, const char *name)
1532 {
1533     TCGTemp *ts;
1534 
1535     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1536 
1537     ts = tcg_global_alloc(s);
1538     ts->base_type = type;
1539     ts->type = type;
1540     ts->kind = TEMP_FIXED;
1541     ts->reg = reg;
1542     ts->name = name;
1543     tcg_regset_set_reg(s->reserved_regs, reg);
1544 
1545     return ts;
1546 }
1547 
1548 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1549 {
1550     s->frame_start = start;
1551     s->frame_end = start + size;
1552     s->frame_temp
1553         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1554 }
1555 
1556 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1557                                      intptr_t offset, const char *name)
1558 {
1559     TCGContext *s = tcg_ctx;
1560     TCGTemp *base_ts = tcgv_ptr_temp(base);
1561     TCGTemp *ts = tcg_global_alloc(s);
1562     int indirect_reg = 0;
1563 
1564     switch (base_ts->kind) {
1565     case TEMP_FIXED:
1566         break;
1567     case TEMP_GLOBAL:
1568         /* We do not support double-indirect registers.  */
1569         tcg_debug_assert(!base_ts->indirect_reg);
1570         base_ts->indirect_base = 1;
1571         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1572                             ? 2 : 1);
1573         indirect_reg = 1;
1574         break;
1575     default:
1576         g_assert_not_reached();
1577     }
1578 
1579     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1580         TCGTemp *ts2 = tcg_global_alloc(s);
1581         char buf[64];
1582 
1583         ts->base_type = TCG_TYPE_I64;
1584         ts->type = TCG_TYPE_I32;
1585         ts->indirect_reg = indirect_reg;
1586         ts->mem_allocated = 1;
1587         ts->mem_base = base_ts;
1588         ts->mem_offset = offset;
1589         pstrcpy(buf, sizeof(buf), name);
1590         pstrcat(buf, sizeof(buf), "_0");
1591         ts->name = strdup(buf);
1592 
1593         tcg_debug_assert(ts2 == ts + 1);
1594         ts2->base_type = TCG_TYPE_I64;
1595         ts2->type = TCG_TYPE_I32;
1596         ts2->indirect_reg = indirect_reg;
1597         ts2->mem_allocated = 1;
1598         ts2->mem_base = base_ts;
1599         ts2->mem_offset = offset + 4;
1600         ts2->temp_subindex = 1;
1601         pstrcpy(buf, sizeof(buf), name);
1602         pstrcat(buf, sizeof(buf), "_1");
1603         ts2->name = strdup(buf);
1604     } else {
1605         ts->base_type = type;
1606         ts->type = type;
1607         ts->indirect_reg = indirect_reg;
1608         ts->mem_allocated = 1;
1609         ts->mem_base = base_ts;
1610         ts->mem_offset = offset;
1611         ts->name = name;
1612     }
1613     return ts;
1614 }
1615 
1616 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1617 {
1618     TCGContext *s = tcg_ctx;
1619     TCGTemp *ts;
1620     int n;
1621 
1622     if (kind == TEMP_EBB) {
1623         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1624 
1625         if (idx < TCG_MAX_TEMPS) {
1626             /* There is already an available temp with the right type.  */
1627             clear_bit(idx, s->free_temps[type].l);
1628 
1629             ts = &s->temps[idx];
1630             ts->temp_allocated = 1;
1631             tcg_debug_assert(ts->base_type == type);
1632             tcg_debug_assert(ts->kind == kind);
1633             return ts;
1634         }
1635     } else {
1636         tcg_debug_assert(kind == TEMP_TB);
1637     }
1638 
1639     switch (type) {
1640     case TCG_TYPE_I32:
1641     case TCG_TYPE_V64:
1642     case TCG_TYPE_V128:
1643     case TCG_TYPE_V256:
1644         n = 1;
1645         break;
1646     case TCG_TYPE_I64:
1647         n = 64 / TCG_TARGET_REG_BITS;
1648         break;
1649     case TCG_TYPE_I128:
1650         n = 128 / TCG_TARGET_REG_BITS;
1651         break;
1652     default:
1653         g_assert_not_reached();
1654     }
1655 
1656     ts = tcg_temp_alloc(s);
1657     ts->base_type = type;
1658     ts->temp_allocated = 1;
1659     ts->kind = kind;
1660 
1661     if (n == 1) {
1662         ts->type = type;
1663     } else {
1664         ts->type = TCG_TYPE_REG;
1665 
1666         for (int i = 1; i < n; ++i) {
1667             TCGTemp *ts2 = tcg_temp_alloc(s);
1668 
1669             tcg_debug_assert(ts2 == ts + i);
1670             ts2->base_type = type;
1671             ts2->type = TCG_TYPE_REG;
1672             ts2->temp_allocated = 1;
1673             ts2->temp_subindex = i;
1674             ts2->kind = kind;
1675         }
1676     }
1677     return ts;
1678 }
1679 
1680 TCGv_vec tcg_temp_new_vec(TCGType type)
1681 {
1682     TCGTemp *t;
1683 
1684 #ifdef CONFIG_DEBUG_TCG
1685     switch (type) {
1686     case TCG_TYPE_V64:
1687         assert(TCG_TARGET_HAS_v64);
1688         break;
1689     case TCG_TYPE_V128:
1690         assert(TCG_TARGET_HAS_v128);
1691         break;
1692     case TCG_TYPE_V256:
1693         assert(TCG_TARGET_HAS_v256);
1694         break;
1695     default:
1696         g_assert_not_reached();
1697     }
1698 #endif
1699 
1700     t = tcg_temp_new_internal(type, TEMP_EBB);
1701     return temp_tcgv_vec(t);
1702 }
1703 
1704 /* Create a new temp of the same type as an existing temp.  */
1705 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1706 {
1707     TCGTemp *t = tcgv_vec_temp(match);
1708 
1709     tcg_debug_assert(t->temp_allocated != 0);
1710 
1711     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1712     return temp_tcgv_vec(t);
1713 }
1714 
1715 void tcg_temp_free_internal(TCGTemp *ts)
1716 {
1717     TCGContext *s = tcg_ctx;
1718 
1719     switch (ts->kind) {
1720     case TEMP_CONST:
1721     case TEMP_TB:
1722         /* Silently ignore free. */
1723         break;
1724     case TEMP_EBB:
1725         tcg_debug_assert(ts->temp_allocated != 0);
1726         ts->temp_allocated = 0;
1727         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1728         break;
1729     default:
1730         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1731         g_assert_not_reached();
1732     }
1733 }
1734 
1735 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1736 {
1737     TCGContext *s = tcg_ctx;
1738     GHashTable *h = s->const_table[type];
1739     TCGTemp *ts;
1740 
1741     if (h == NULL) {
1742         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1743         s->const_table[type] = h;
1744     }
1745 
1746     ts = g_hash_table_lookup(h, &val);
1747     if (ts == NULL) {
1748         int64_t *val_ptr;
1749 
1750         ts = tcg_temp_alloc(s);
1751 
1752         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1753             TCGTemp *ts2 = tcg_temp_alloc(s);
1754 
1755             tcg_debug_assert(ts2 == ts + 1);
1756 
1757             ts->base_type = TCG_TYPE_I64;
1758             ts->type = TCG_TYPE_I32;
1759             ts->kind = TEMP_CONST;
1760             ts->temp_allocated = 1;
1761 
1762             ts2->base_type = TCG_TYPE_I64;
1763             ts2->type = TCG_TYPE_I32;
1764             ts2->kind = TEMP_CONST;
1765             ts2->temp_allocated = 1;
1766             ts2->temp_subindex = 1;
1767 
1768             /*
1769              * Retain the full value of the 64-bit constant in the low
1770              * part, so that the hash table works.  Actual uses will
1771              * truncate the value to the low part.
1772              */
1773             ts[HOST_BIG_ENDIAN].val = val;
1774             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1775             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1776         } else {
1777             ts->base_type = type;
1778             ts->type = type;
1779             ts->kind = TEMP_CONST;
1780             ts->temp_allocated = 1;
1781             ts->val = val;
1782             val_ptr = &ts->val;
1783         }
1784         g_hash_table_insert(h, val_ptr, ts);
1785     }
1786 
1787     return ts;
1788 }
1789 
1790 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1791 {
1792     val = dup_const(vece, val);
1793     return temp_tcgv_vec(tcg_constant_internal(type, val));
1794 }
1795 
1796 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1797 {
1798     TCGTemp *t = tcgv_vec_temp(match);
1799 
1800     tcg_debug_assert(t->temp_allocated != 0);
1801     return tcg_constant_vec(t->base_type, vece, val);
1802 }
1803 
1804 #ifdef CONFIG_DEBUG_TCG
1805 size_t temp_idx(TCGTemp *ts)
1806 {
1807     ptrdiff_t n = ts - tcg_ctx->temps;
1808     assert(n >= 0 && n < tcg_ctx->nb_temps);
1809     return n;
1810 }
1811 
1812 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1813 {
1814     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1815 
1816     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1817     assert(o % sizeof(TCGTemp) == 0);
1818 
1819     return (void *)tcg_ctx + (uintptr_t)v;
1820 }
1821 #endif /* CONFIG_DEBUG_TCG */
1822 
1823 /* Return true if OP may appear in the opcode stream.
1824    Test the runtime variable that controls each opcode.  */
1825 bool tcg_op_supported(TCGOpcode op)
1826 {
1827     const bool have_vec
1828         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1829 
1830     switch (op) {
1831     case INDEX_op_discard:
1832     case INDEX_op_set_label:
1833     case INDEX_op_call:
1834     case INDEX_op_br:
1835     case INDEX_op_mb:
1836     case INDEX_op_insn_start:
1837     case INDEX_op_exit_tb:
1838     case INDEX_op_goto_tb:
1839     case INDEX_op_goto_ptr:
1840     case INDEX_op_qemu_ld_a32_i32:
1841     case INDEX_op_qemu_ld_a64_i32:
1842     case INDEX_op_qemu_st_a32_i32:
1843     case INDEX_op_qemu_st_a64_i32:
1844     case INDEX_op_qemu_ld_a32_i64:
1845     case INDEX_op_qemu_ld_a64_i64:
1846     case INDEX_op_qemu_st_a32_i64:
1847     case INDEX_op_qemu_st_a64_i64:
1848         return true;
1849 
1850     case INDEX_op_qemu_st8_a32_i32:
1851     case INDEX_op_qemu_st8_a64_i32:
1852         return TCG_TARGET_HAS_qemu_st8_i32;
1853 
1854     case INDEX_op_qemu_ld_a32_i128:
1855     case INDEX_op_qemu_ld_a64_i128:
1856     case INDEX_op_qemu_st_a32_i128:
1857     case INDEX_op_qemu_st_a64_i128:
1858         return TCG_TARGET_HAS_qemu_ldst_i128;
1859 
1860     case INDEX_op_mov_i32:
1861     case INDEX_op_setcond_i32:
1862     case INDEX_op_brcond_i32:
1863     case INDEX_op_ld8u_i32:
1864     case INDEX_op_ld8s_i32:
1865     case INDEX_op_ld16u_i32:
1866     case INDEX_op_ld16s_i32:
1867     case INDEX_op_ld_i32:
1868     case INDEX_op_st8_i32:
1869     case INDEX_op_st16_i32:
1870     case INDEX_op_st_i32:
1871     case INDEX_op_add_i32:
1872     case INDEX_op_sub_i32:
1873     case INDEX_op_mul_i32:
1874     case INDEX_op_and_i32:
1875     case INDEX_op_or_i32:
1876     case INDEX_op_xor_i32:
1877     case INDEX_op_shl_i32:
1878     case INDEX_op_shr_i32:
1879     case INDEX_op_sar_i32:
1880         return true;
1881 
1882     case INDEX_op_movcond_i32:
1883         return TCG_TARGET_HAS_movcond_i32;
1884     case INDEX_op_div_i32:
1885     case INDEX_op_divu_i32:
1886         return TCG_TARGET_HAS_div_i32;
1887     case INDEX_op_rem_i32:
1888     case INDEX_op_remu_i32:
1889         return TCG_TARGET_HAS_rem_i32;
1890     case INDEX_op_div2_i32:
1891     case INDEX_op_divu2_i32:
1892         return TCG_TARGET_HAS_div2_i32;
1893     case INDEX_op_rotl_i32:
1894     case INDEX_op_rotr_i32:
1895         return TCG_TARGET_HAS_rot_i32;
1896     case INDEX_op_deposit_i32:
1897         return TCG_TARGET_HAS_deposit_i32;
1898     case INDEX_op_extract_i32:
1899         return TCG_TARGET_HAS_extract_i32;
1900     case INDEX_op_sextract_i32:
1901         return TCG_TARGET_HAS_sextract_i32;
1902     case INDEX_op_extract2_i32:
1903         return TCG_TARGET_HAS_extract2_i32;
1904     case INDEX_op_add2_i32:
1905         return TCG_TARGET_HAS_add2_i32;
1906     case INDEX_op_sub2_i32:
1907         return TCG_TARGET_HAS_sub2_i32;
1908     case INDEX_op_mulu2_i32:
1909         return TCG_TARGET_HAS_mulu2_i32;
1910     case INDEX_op_muls2_i32:
1911         return TCG_TARGET_HAS_muls2_i32;
1912     case INDEX_op_muluh_i32:
1913         return TCG_TARGET_HAS_muluh_i32;
1914     case INDEX_op_mulsh_i32:
1915         return TCG_TARGET_HAS_mulsh_i32;
1916     case INDEX_op_ext8s_i32:
1917         return TCG_TARGET_HAS_ext8s_i32;
1918     case INDEX_op_ext16s_i32:
1919         return TCG_TARGET_HAS_ext16s_i32;
1920     case INDEX_op_ext8u_i32:
1921         return TCG_TARGET_HAS_ext8u_i32;
1922     case INDEX_op_ext16u_i32:
1923         return TCG_TARGET_HAS_ext16u_i32;
1924     case INDEX_op_bswap16_i32:
1925         return TCG_TARGET_HAS_bswap16_i32;
1926     case INDEX_op_bswap32_i32:
1927         return TCG_TARGET_HAS_bswap32_i32;
1928     case INDEX_op_not_i32:
1929         return TCG_TARGET_HAS_not_i32;
1930     case INDEX_op_neg_i32:
1931         return TCG_TARGET_HAS_neg_i32;
1932     case INDEX_op_andc_i32:
1933         return TCG_TARGET_HAS_andc_i32;
1934     case INDEX_op_orc_i32:
1935         return TCG_TARGET_HAS_orc_i32;
1936     case INDEX_op_eqv_i32:
1937         return TCG_TARGET_HAS_eqv_i32;
1938     case INDEX_op_nand_i32:
1939         return TCG_TARGET_HAS_nand_i32;
1940     case INDEX_op_nor_i32:
1941         return TCG_TARGET_HAS_nor_i32;
1942     case INDEX_op_clz_i32:
1943         return TCG_TARGET_HAS_clz_i32;
1944     case INDEX_op_ctz_i32:
1945         return TCG_TARGET_HAS_ctz_i32;
1946     case INDEX_op_ctpop_i32:
1947         return TCG_TARGET_HAS_ctpop_i32;
1948 
1949     case INDEX_op_brcond2_i32:
1950     case INDEX_op_setcond2_i32:
1951         return TCG_TARGET_REG_BITS == 32;
1952 
1953     case INDEX_op_mov_i64:
1954     case INDEX_op_setcond_i64:
1955     case INDEX_op_brcond_i64:
1956     case INDEX_op_ld8u_i64:
1957     case INDEX_op_ld8s_i64:
1958     case INDEX_op_ld16u_i64:
1959     case INDEX_op_ld16s_i64:
1960     case INDEX_op_ld32u_i64:
1961     case INDEX_op_ld32s_i64:
1962     case INDEX_op_ld_i64:
1963     case INDEX_op_st8_i64:
1964     case INDEX_op_st16_i64:
1965     case INDEX_op_st32_i64:
1966     case INDEX_op_st_i64:
1967     case INDEX_op_add_i64:
1968     case INDEX_op_sub_i64:
1969     case INDEX_op_mul_i64:
1970     case INDEX_op_and_i64:
1971     case INDEX_op_or_i64:
1972     case INDEX_op_xor_i64:
1973     case INDEX_op_shl_i64:
1974     case INDEX_op_shr_i64:
1975     case INDEX_op_sar_i64:
1976     case INDEX_op_ext_i32_i64:
1977     case INDEX_op_extu_i32_i64:
1978         return TCG_TARGET_REG_BITS == 64;
1979 
1980     case INDEX_op_movcond_i64:
1981         return TCG_TARGET_HAS_movcond_i64;
1982     case INDEX_op_div_i64:
1983     case INDEX_op_divu_i64:
1984         return TCG_TARGET_HAS_div_i64;
1985     case INDEX_op_rem_i64:
1986     case INDEX_op_remu_i64:
1987         return TCG_TARGET_HAS_rem_i64;
1988     case INDEX_op_div2_i64:
1989     case INDEX_op_divu2_i64:
1990         return TCG_TARGET_HAS_div2_i64;
1991     case INDEX_op_rotl_i64:
1992     case INDEX_op_rotr_i64:
1993         return TCG_TARGET_HAS_rot_i64;
1994     case INDEX_op_deposit_i64:
1995         return TCG_TARGET_HAS_deposit_i64;
1996     case INDEX_op_extract_i64:
1997         return TCG_TARGET_HAS_extract_i64;
1998     case INDEX_op_sextract_i64:
1999         return TCG_TARGET_HAS_sextract_i64;
2000     case INDEX_op_extract2_i64:
2001         return TCG_TARGET_HAS_extract2_i64;
2002     case INDEX_op_extrl_i64_i32:
2003     case INDEX_op_extrh_i64_i32:
2004         return TCG_TARGET_HAS_extr_i64_i32;
2005     case INDEX_op_ext8s_i64:
2006         return TCG_TARGET_HAS_ext8s_i64;
2007     case INDEX_op_ext16s_i64:
2008         return TCG_TARGET_HAS_ext16s_i64;
2009     case INDEX_op_ext32s_i64:
2010         return TCG_TARGET_HAS_ext32s_i64;
2011     case INDEX_op_ext8u_i64:
2012         return TCG_TARGET_HAS_ext8u_i64;
2013     case INDEX_op_ext16u_i64:
2014         return TCG_TARGET_HAS_ext16u_i64;
2015     case INDEX_op_ext32u_i64:
2016         return TCG_TARGET_HAS_ext32u_i64;
2017     case INDEX_op_bswap16_i64:
2018         return TCG_TARGET_HAS_bswap16_i64;
2019     case INDEX_op_bswap32_i64:
2020         return TCG_TARGET_HAS_bswap32_i64;
2021     case INDEX_op_bswap64_i64:
2022         return TCG_TARGET_HAS_bswap64_i64;
2023     case INDEX_op_not_i64:
2024         return TCG_TARGET_HAS_not_i64;
2025     case INDEX_op_neg_i64:
2026         return TCG_TARGET_HAS_neg_i64;
2027     case INDEX_op_andc_i64:
2028         return TCG_TARGET_HAS_andc_i64;
2029     case INDEX_op_orc_i64:
2030         return TCG_TARGET_HAS_orc_i64;
2031     case INDEX_op_eqv_i64:
2032         return TCG_TARGET_HAS_eqv_i64;
2033     case INDEX_op_nand_i64:
2034         return TCG_TARGET_HAS_nand_i64;
2035     case INDEX_op_nor_i64:
2036         return TCG_TARGET_HAS_nor_i64;
2037     case INDEX_op_clz_i64:
2038         return TCG_TARGET_HAS_clz_i64;
2039     case INDEX_op_ctz_i64:
2040         return TCG_TARGET_HAS_ctz_i64;
2041     case INDEX_op_ctpop_i64:
2042         return TCG_TARGET_HAS_ctpop_i64;
2043     case INDEX_op_add2_i64:
2044         return TCG_TARGET_HAS_add2_i64;
2045     case INDEX_op_sub2_i64:
2046         return TCG_TARGET_HAS_sub2_i64;
2047     case INDEX_op_mulu2_i64:
2048         return TCG_TARGET_HAS_mulu2_i64;
2049     case INDEX_op_muls2_i64:
2050         return TCG_TARGET_HAS_muls2_i64;
2051     case INDEX_op_muluh_i64:
2052         return TCG_TARGET_HAS_muluh_i64;
2053     case INDEX_op_mulsh_i64:
2054         return TCG_TARGET_HAS_mulsh_i64;
2055 
2056     case INDEX_op_mov_vec:
2057     case INDEX_op_dup_vec:
2058     case INDEX_op_dupm_vec:
2059     case INDEX_op_ld_vec:
2060     case INDEX_op_st_vec:
2061     case INDEX_op_add_vec:
2062     case INDEX_op_sub_vec:
2063     case INDEX_op_and_vec:
2064     case INDEX_op_or_vec:
2065     case INDEX_op_xor_vec:
2066     case INDEX_op_cmp_vec:
2067         return have_vec;
2068     case INDEX_op_dup2_vec:
2069         return have_vec && TCG_TARGET_REG_BITS == 32;
2070     case INDEX_op_not_vec:
2071         return have_vec && TCG_TARGET_HAS_not_vec;
2072     case INDEX_op_neg_vec:
2073         return have_vec && TCG_TARGET_HAS_neg_vec;
2074     case INDEX_op_abs_vec:
2075         return have_vec && TCG_TARGET_HAS_abs_vec;
2076     case INDEX_op_andc_vec:
2077         return have_vec && TCG_TARGET_HAS_andc_vec;
2078     case INDEX_op_orc_vec:
2079         return have_vec && TCG_TARGET_HAS_orc_vec;
2080     case INDEX_op_nand_vec:
2081         return have_vec && TCG_TARGET_HAS_nand_vec;
2082     case INDEX_op_nor_vec:
2083         return have_vec && TCG_TARGET_HAS_nor_vec;
2084     case INDEX_op_eqv_vec:
2085         return have_vec && TCG_TARGET_HAS_eqv_vec;
2086     case INDEX_op_mul_vec:
2087         return have_vec && TCG_TARGET_HAS_mul_vec;
2088     case INDEX_op_shli_vec:
2089     case INDEX_op_shri_vec:
2090     case INDEX_op_sari_vec:
2091         return have_vec && TCG_TARGET_HAS_shi_vec;
2092     case INDEX_op_shls_vec:
2093     case INDEX_op_shrs_vec:
2094     case INDEX_op_sars_vec:
2095         return have_vec && TCG_TARGET_HAS_shs_vec;
2096     case INDEX_op_shlv_vec:
2097     case INDEX_op_shrv_vec:
2098     case INDEX_op_sarv_vec:
2099         return have_vec && TCG_TARGET_HAS_shv_vec;
2100     case INDEX_op_rotli_vec:
2101         return have_vec && TCG_TARGET_HAS_roti_vec;
2102     case INDEX_op_rotls_vec:
2103         return have_vec && TCG_TARGET_HAS_rots_vec;
2104     case INDEX_op_rotlv_vec:
2105     case INDEX_op_rotrv_vec:
2106         return have_vec && TCG_TARGET_HAS_rotv_vec;
2107     case INDEX_op_ssadd_vec:
2108     case INDEX_op_usadd_vec:
2109     case INDEX_op_sssub_vec:
2110     case INDEX_op_ussub_vec:
2111         return have_vec && TCG_TARGET_HAS_sat_vec;
2112     case INDEX_op_smin_vec:
2113     case INDEX_op_umin_vec:
2114     case INDEX_op_smax_vec:
2115     case INDEX_op_umax_vec:
2116         return have_vec && TCG_TARGET_HAS_minmax_vec;
2117     case INDEX_op_bitsel_vec:
2118         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2119     case INDEX_op_cmpsel_vec:
2120         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2121 
2122     default:
2123         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2124         return true;
2125     }
2126 }
2127 
2128 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2129 
2130 static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2131 {
2132     TCGv_i64 extend_free[MAX_CALL_IARGS];
2133     int n_extend = 0;
2134     TCGOp *op;
2135     int i, n, pi = 0, total_args;
2136 
2137     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2138         init_call_layout(info);
2139         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2140     }
2141 
2142     total_args = info->nr_out + info->nr_in + 2;
2143     op = tcg_op_alloc(INDEX_op_call, total_args);
2144 
2145 #ifdef CONFIG_PLUGIN
2146     /* Flag helpers that may affect guest state */
2147     if (tcg_ctx->plugin_insn &&
2148         !(info->flags & TCG_CALL_PLUGIN) &&
2149         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2150         tcg_ctx->plugin_insn->calls_helpers = true;
2151     }
2152 #endif
2153 
2154     TCGOP_CALLO(op) = n = info->nr_out;
2155     switch (n) {
2156     case 0:
2157         tcg_debug_assert(ret == NULL);
2158         break;
2159     case 1:
2160         tcg_debug_assert(ret != NULL);
2161         op->args[pi++] = temp_arg(ret);
2162         break;
2163     case 2:
2164     case 4:
2165         tcg_debug_assert(ret != NULL);
2166         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2167         tcg_debug_assert(ret->temp_subindex == 0);
2168         for (i = 0; i < n; ++i) {
2169             op->args[pi++] = temp_arg(ret + i);
2170         }
2171         break;
2172     default:
2173         g_assert_not_reached();
2174     }
2175 
2176     TCGOP_CALLI(op) = n = info->nr_in;
2177     for (i = 0; i < n; i++) {
2178         const TCGCallArgumentLoc *loc = &info->in[i];
2179         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2180 
2181         switch (loc->kind) {
2182         case TCG_CALL_ARG_NORMAL:
2183         case TCG_CALL_ARG_BY_REF:
2184         case TCG_CALL_ARG_BY_REF_N:
2185             op->args[pi++] = temp_arg(ts);
2186             break;
2187 
2188         case TCG_CALL_ARG_EXTEND_U:
2189         case TCG_CALL_ARG_EXTEND_S:
2190             {
2191                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2192                 TCGv_i32 orig = temp_tcgv_i32(ts);
2193 
2194                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2195                     tcg_gen_ext_i32_i64(temp, orig);
2196                 } else {
2197                     tcg_gen_extu_i32_i64(temp, orig);
2198                 }
2199                 op->args[pi++] = tcgv_i64_arg(temp);
2200                 extend_free[n_extend++] = temp;
2201             }
2202             break;
2203 
2204         default:
2205             g_assert_not_reached();
2206         }
2207     }
2208     op->args[pi++] = (uintptr_t)info->func;
2209     op->args[pi++] = (uintptr_t)info;
2210     tcg_debug_assert(pi == total_args);
2211 
2212     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2213 
2214     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2215     for (i = 0; i < n_extend; ++i) {
2216         tcg_temp_free_i64(extend_free[i]);
2217     }
2218 }
2219 
2220 void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2221 {
2222     tcg_gen_callN(info, ret, NULL);
2223 }
2224 
2225 void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2226 {
2227     tcg_gen_callN(info, ret, &t1);
2228 }
2229 
2230 void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2231 {
2232     TCGTemp *args[2] = { t1, t2 };
2233     tcg_gen_callN(info, ret, args);
2234 }
2235 
2236 void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2237                    TCGTemp *t2, TCGTemp *t3)
2238 {
2239     TCGTemp *args[3] = { t1, t2, t3 };
2240     tcg_gen_callN(info, ret, args);
2241 }
2242 
2243 void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2244                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2245 {
2246     TCGTemp *args[4] = { t1, t2, t3, t4 };
2247     tcg_gen_callN(info, ret, args);
2248 }
2249 
2250 void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2251                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2252 {
2253     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2254     tcg_gen_callN(info, ret, args);
2255 }
2256 
2257 void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2258                    TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2259 {
2260     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2261     tcg_gen_callN(info, ret, args);
2262 }
2263 
2264 void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2265                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2266                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2267 {
2268     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2269     tcg_gen_callN(info, ret, args);
2270 }
2271 
2272 static void tcg_reg_alloc_start(TCGContext *s)
2273 {
2274     int i, n;
2275 
2276     for (i = 0, n = s->nb_temps; i < n; i++) {
2277         TCGTemp *ts = &s->temps[i];
2278         TCGTempVal val = TEMP_VAL_MEM;
2279 
2280         switch (ts->kind) {
2281         case TEMP_CONST:
2282             val = TEMP_VAL_CONST;
2283             break;
2284         case TEMP_FIXED:
2285             val = TEMP_VAL_REG;
2286             break;
2287         case TEMP_GLOBAL:
2288             break;
2289         case TEMP_EBB:
2290             val = TEMP_VAL_DEAD;
2291             /* fall through */
2292         case TEMP_TB:
2293             ts->mem_allocated = 0;
2294             break;
2295         default:
2296             g_assert_not_reached();
2297         }
2298         ts->val_type = val;
2299     }
2300 
2301     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2302 }
2303 
2304 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2305                                  TCGTemp *ts)
2306 {
2307     int idx = temp_idx(ts);
2308 
2309     switch (ts->kind) {
2310     case TEMP_FIXED:
2311     case TEMP_GLOBAL:
2312         pstrcpy(buf, buf_size, ts->name);
2313         break;
2314     case TEMP_TB:
2315         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2316         break;
2317     case TEMP_EBB:
2318         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2319         break;
2320     case TEMP_CONST:
2321         switch (ts->type) {
2322         case TCG_TYPE_I32:
2323             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2324             break;
2325 #if TCG_TARGET_REG_BITS > 32
2326         case TCG_TYPE_I64:
2327             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2328             break;
2329 #endif
2330         case TCG_TYPE_V64:
2331         case TCG_TYPE_V128:
2332         case TCG_TYPE_V256:
2333             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2334                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2335             break;
2336         default:
2337             g_assert_not_reached();
2338         }
2339         break;
2340     }
2341     return buf;
2342 }
2343 
2344 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2345                              int buf_size, TCGArg arg)
2346 {
2347     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2348 }
2349 
2350 static const char * const cond_name[] =
2351 {
2352     [TCG_COND_NEVER] = "never",
2353     [TCG_COND_ALWAYS] = "always",
2354     [TCG_COND_EQ] = "eq",
2355     [TCG_COND_NE] = "ne",
2356     [TCG_COND_LT] = "lt",
2357     [TCG_COND_GE] = "ge",
2358     [TCG_COND_LE] = "le",
2359     [TCG_COND_GT] = "gt",
2360     [TCG_COND_LTU] = "ltu",
2361     [TCG_COND_GEU] = "geu",
2362     [TCG_COND_LEU] = "leu",
2363     [TCG_COND_GTU] = "gtu"
2364 };
2365 
2366 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2367 {
2368     [MO_UB]   = "ub",
2369     [MO_SB]   = "sb",
2370     [MO_LEUW] = "leuw",
2371     [MO_LESW] = "lesw",
2372     [MO_LEUL] = "leul",
2373     [MO_LESL] = "lesl",
2374     [MO_LEUQ] = "leq",
2375     [MO_BEUW] = "beuw",
2376     [MO_BESW] = "besw",
2377     [MO_BEUL] = "beul",
2378     [MO_BESL] = "besl",
2379     [MO_BEUQ] = "beq",
2380     [MO_128 + MO_BE] = "beo",
2381     [MO_128 + MO_LE] = "leo",
2382 };
2383 
2384 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2385     [MO_UNALN >> MO_ASHIFT]    = "un+",
2386     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2387     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2388     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2389     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2390     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2391     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2392     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2393 };
2394 
2395 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2396     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2397     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2398     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2399     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2400     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2401     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2402 };
2403 
2404 static const char bswap_flag_name[][6] = {
2405     [TCG_BSWAP_IZ] = "iz",
2406     [TCG_BSWAP_OZ] = "oz",
2407     [TCG_BSWAP_OS] = "os",
2408     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2409     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2410 };
2411 
2412 static inline bool tcg_regset_single(TCGRegSet d)
2413 {
2414     return (d & (d - 1)) == 0;
2415 }
2416 
2417 static inline TCGReg tcg_regset_first(TCGRegSet d)
2418 {
2419     if (TCG_TARGET_NB_REGS <= 32) {
2420         return ctz32(d);
2421     } else {
2422         return ctz64(d);
2423     }
2424 }
2425 
2426 /* Return only the number of characters output -- no error return. */
2427 #define ne_fprintf(...) \
2428     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2429 
2430 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2431 {
2432     char buf[128];
2433     TCGOp *op;
2434 
2435     QTAILQ_FOREACH(op, &s->ops, link) {
2436         int i, k, nb_oargs, nb_iargs, nb_cargs;
2437         const TCGOpDef *def;
2438         TCGOpcode c;
2439         int col = 0;
2440 
2441         c = op->opc;
2442         def = &tcg_op_defs[c];
2443 
2444         if (c == INDEX_op_insn_start) {
2445             nb_oargs = 0;
2446             col += ne_fprintf(f, "\n ----");
2447 
2448             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2449                 col += ne_fprintf(f, " %016" PRIx64,
2450                                   tcg_get_insn_start_param(op, i));
2451             }
2452         } else if (c == INDEX_op_call) {
2453             const TCGHelperInfo *info = tcg_call_info(op);
2454             void *func = tcg_call_func(op);
2455 
2456             /* variable number of arguments */
2457             nb_oargs = TCGOP_CALLO(op);
2458             nb_iargs = TCGOP_CALLI(op);
2459             nb_cargs = def->nb_cargs;
2460 
2461             col += ne_fprintf(f, " %s ", def->name);
2462 
2463             /*
2464              * Print the function name from TCGHelperInfo, if available.
2465              * Note that plugins have a template function for the info,
2466              * but the actual function pointer comes from the plugin.
2467              */
2468             if (func == info->func) {
2469                 col += ne_fprintf(f, "%s", info->name);
2470             } else {
2471                 col += ne_fprintf(f, "plugin(%p)", func);
2472             }
2473 
2474             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2475             for (i = 0; i < nb_oargs; i++) {
2476                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2477                                                             op->args[i]));
2478             }
2479             for (i = 0; i < nb_iargs; i++) {
2480                 TCGArg arg = op->args[nb_oargs + i];
2481                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2482                 col += ne_fprintf(f, ",%s", t);
2483             }
2484         } else {
2485             col += ne_fprintf(f, " %s ", def->name);
2486 
2487             nb_oargs = def->nb_oargs;
2488             nb_iargs = def->nb_iargs;
2489             nb_cargs = def->nb_cargs;
2490 
2491             if (def->flags & TCG_OPF_VECTOR) {
2492                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2493                                   8 << TCGOP_VECE(op));
2494             }
2495 
2496             k = 0;
2497             for (i = 0; i < nb_oargs; i++) {
2498                 const char *sep =  k ? "," : "";
2499                 col += ne_fprintf(f, "%s%s", sep,
2500                                   tcg_get_arg_str(s, buf, sizeof(buf),
2501                                                   op->args[k++]));
2502             }
2503             for (i = 0; i < nb_iargs; i++) {
2504                 const char *sep =  k ? "," : "";
2505                 col += ne_fprintf(f, "%s%s", sep,
2506                                   tcg_get_arg_str(s, buf, sizeof(buf),
2507                                                   op->args[k++]));
2508             }
2509             switch (c) {
2510             case INDEX_op_brcond_i32:
2511             case INDEX_op_setcond_i32:
2512             case INDEX_op_movcond_i32:
2513             case INDEX_op_brcond2_i32:
2514             case INDEX_op_setcond2_i32:
2515             case INDEX_op_brcond_i64:
2516             case INDEX_op_setcond_i64:
2517             case INDEX_op_movcond_i64:
2518             case INDEX_op_cmp_vec:
2519             case INDEX_op_cmpsel_vec:
2520                 if (op->args[k] < ARRAY_SIZE(cond_name)
2521                     && cond_name[op->args[k]]) {
2522                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2523                 } else {
2524                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2525                 }
2526                 i = 1;
2527                 break;
2528             case INDEX_op_qemu_ld_a32_i32:
2529             case INDEX_op_qemu_ld_a64_i32:
2530             case INDEX_op_qemu_st_a32_i32:
2531             case INDEX_op_qemu_st_a64_i32:
2532             case INDEX_op_qemu_st8_a32_i32:
2533             case INDEX_op_qemu_st8_a64_i32:
2534             case INDEX_op_qemu_ld_a32_i64:
2535             case INDEX_op_qemu_ld_a64_i64:
2536             case INDEX_op_qemu_st_a32_i64:
2537             case INDEX_op_qemu_st_a64_i64:
2538             case INDEX_op_qemu_ld_a32_i128:
2539             case INDEX_op_qemu_ld_a64_i128:
2540             case INDEX_op_qemu_st_a32_i128:
2541             case INDEX_op_qemu_st_a64_i128:
2542                 {
2543                     const char *s_al, *s_op, *s_at;
2544                     MemOpIdx oi = op->args[k++];
2545                     MemOp op = get_memop(oi);
2546                     unsigned ix = get_mmuidx(oi);
2547 
2548                     s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2549                     s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2550                     s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2551                     op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2552 
2553                     /* If all fields are accounted for, print symbolically. */
2554                     if (!op && s_al && s_op && s_at) {
2555                         col += ne_fprintf(f, ",%s%s%s,%u",
2556                                           s_at, s_al, s_op, ix);
2557                     } else {
2558                         op = get_memop(oi);
2559                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2560                     }
2561                     i = 1;
2562                 }
2563                 break;
2564             case INDEX_op_bswap16_i32:
2565             case INDEX_op_bswap16_i64:
2566             case INDEX_op_bswap32_i32:
2567             case INDEX_op_bswap32_i64:
2568             case INDEX_op_bswap64_i64:
2569                 {
2570                     TCGArg flags = op->args[k];
2571                     const char *name = NULL;
2572 
2573                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2574                         name = bswap_flag_name[flags];
2575                     }
2576                     if (name) {
2577                         col += ne_fprintf(f, ",%s", name);
2578                     } else {
2579                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2580                     }
2581                     i = k = 1;
2582                 }
2583                 break;
2584             default:
2585                 i = 0;
2586                 break;
2587             }
2588             switch (c) {
2589             case INDEX_op_set_label:
2590             case INDEX_op_br:
2591             case INDEX_op_brcond_i32:
2592             case INDEX_op_brcond_i64:
2593             case INDEX_op_brcond2_i32:
2594                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2595                                   arg_label(op->args[k])->id);
2596                 i++, k++;
2597                 break;
2598             case INDEX_op_mb:
2599                 {
2600                     TCGBar membar = op->args[k];
2601                     const char *b_op, *m_op;
2602 
2603                     switch (membar & TCG_BAR_SC) {
2604                     case 0:
2605                         b_op = "none";
2606                         break;
2607                     case TCG_BAR_LDAQ:
2608                         b_op = "acq";
2609                         break;
2610                     case TCG_BAR_STRL:
2611                         b_op = "rel";
2612                         break;
2613                     case TCG_BAR_SC:
2614                         b_op = "seq";
2615                         break;
2616                     default:
2617                         g_assert_not_reached();
2618                     }
2619 
2620                     switch (membar & TCG_MO_ALL) {
2621                     case 0:
2622                         m_op = "none";
2623                         break;
2624                     case TCG_MO_LD_LD:
2625                         m_op = "rr";
2626                         break;
2627                     case TCG_MO_LD_ST:
2628                         m_op = "rw";
2629                         break;
2630                     case TCG_MO_ST_LD:
2631                         m_op = "wr";
2632                         break;
2633                     case TCG_MO_ST_ST:
2634                         m_op = "ww";
2635                         break;
2636                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2637                         m_op = "rr+rw";
2638                         break;
2639                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2640                         m_op = "rr+wr";
2641                         break;
2642                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2643                         m_op = "rr+ww";
2644                         break;
2645                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2646                         m_op = "rw+wr";
2647                         break;
2648                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2649                         m_op = "rw+ww";
2650                         break;
2651                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2652                         m_op = "wr+ww";
2653                         break;
2654                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2655                         m_op = "rr+rw+wr";
2656                         break;
2657                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2658                         m_op = "rr+rw+ww";
2659                         break;
2660                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2661                         m_op = "rr+wr+ww";
2662                         break;
2663                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2664                         m_op = "rw+wr+ww";
2665                         break;
2666                     case TCG_MO_ALL:
2667                         m_op = "all";
2668                         break;
2669                     default:
2670                         g_assert_not_reached();
2671                     }
2672 
2673                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2674                     i++, k++;
2675                 }
2676                 break;
2677             default:
2678                 break;
2679             }
2680             for (; i < nb_cargs; i++, k++) {
2681                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2682                                   op->args[k]);
2683             }
2684         }
2685 
2686         if (have_prefs || op->life) {
2687             for (; col < 40; ++col) {
2688                 putc(' ', f);
2689             }
2690         }
2691 
2692         if (op->life) {
2693             unsigned life = op->life;
2694 
2695             if (life & (SYNC_ARG * 3)) {
2696                 ne_fprintf(f, "  sync:");
2697                 for (i = 0; i < 2; ++i) {
2698                     if (life & (SYNC_ARG << i)) {
2699                         ne_fprintf(f, " %d", i);
2700                     }
2701                 }
2702             }
2703             life /= DEAD_ARG;
2704             if (life) {
2705                 ne_fprintf(f, "  dead:");
2706                 for (i = 0; life; ++i, life >>= 1) {
2707                     if (life & 1) {
2708                         ne_fprintf(f, " %d", i);
2709                     }
2710                 }
2711             }
2712         }
2713 
2714         if (have_prefs) {
2715             for (i = 0; i < nb_oargs; ++i) {
2716                 TCGRegSet set = output_pref(op, i);
2717 
2718                 if (i == 0) {
2719                     ne_fprintf(f, "  pref=");
2720                 } else {
2721                     ne_fprintf(f, ",");
2722                 }
2723                 if (set == 0) {
2724                     ne_fprintf(f, "none");
2725                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2726                     ne_fprintf(f, "all");
2727 #ifdef CONFIG_DEBUG_TCG
2728                 } else if (tcg_regset_single(set)) {
2729                     TCGReg reg = tcg_regset_first(set);
2730                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2731 #endif
2732                 } else if (TCG_TARGET_NB_REGS <= 32) {
2733                     ne_fprintf(f, "0x%x", (uint32_t)set);
2734                 } else {
2735                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2736                 }
2737             }
2738         }
2739 
2740         putc('\n', f);
2741     }
2742 }
2743 
2744 /* we give more priority to constraints with less registers */
2745 static int get_constraint_priority(const TCGOpDef *def, int k)
2746 {
2747     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2748     int n = ctpop64(arg_ct->regs);
2749 
2750     /*
2751      * Sort constraints of a single register first, which includes output
2752      * aliases (which must exactly match the input already allocated).
2753      */
2754     if (n == 1 || arg_ct->oalias) {
2755         return INT_MAX;
2756     }
2757 
2758     /*
2759      * Sort register pairs next, first then second immediately after.
2760      * Arbitrarily sort multiple pairs by the index of the first reg;
2761      * there shouldn't be many pairs.
2762      */
2763     switch (arg_ct->pair) {
2764     case 1:
2765     case 3:
2766         return (k + 1) * 2;
2767     case 2:
2768         return (arg_ct->pair_index + 1) * 2 - 1;
2769     }
2770 
2771     /* Finally, sort by decreasing register count. */
2772     assert(n > 1);
2773     return -n;
2774 }
2775 
2776 /* sort from highest priority to lowest */
2777 static void sort_constraints(TCGOpDef *def, int start, int n)
2778 {
2779     int i, j;
2780     TCGArgConstraint *a = def->args_ct;
2781 
2782     for (i = 0; i < n; i++) {
2783         a[start + i].sort_index = start + i;
2784     }
2785     if (n <= 1) {
2786         return;
2787     }
2788     for (i = 0; i < n - 1; i++) {
2789         for (j = i + 1; j < n; j++) {
2790             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2791             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2792             if (p1 < p2) {
2793                 int tmp = a[start + i].sort_index;
2794                 a[start + i].sort_index = a[start + j].sort_index;
2795                 a[start + j].sort_index = tmp;
2796             }
2797         }
2798     }
2799 }
2800 
2801 static void process_op_defs(TCGContext *s)
2802 {
2803     TCGOpcode op;
2804 
2805     for (op = 0; op < NB_OPS; op++) {
2806         TCGOpDef *def = &tcg_op_defs[op];
2807         const TCGTargetOpDef *tdefs;
2808         bool saw_alias_pair = false;
2809         int i, o, i2, o2, nb_args;
2810 
2811         if (def->flags & TCG_OPF_NOT_PRESENT) {
2812             continue;
2813         }
2814 
2815         nb_args = def->nb_iargs + def->nb_oargs;
2816         if (nb_args == 0) {
2817             continue;
2818         }
2819 
2820         /*
2821          * Macro magic should make it impossible, but double-check that
2822          * the array index is in range.  Since the signness of an enum
2823          * is implementation defined, force the result to unsigned.
2824          */
2825         unsigned con_set = tcg_target_op_def(op);
2826         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2827         tdefs = &constraint_sets[con_set];
2828 
2829         for (i = 0; i < nb_args; i++) {
2830             const char *ct_str = tdefs->args_ct_str[i];
2831             bool input_p = i >= def->nb_oargs;
2832 
2833             /* Incomplete TCGTargetOpDef entry. */
2834             tcg_debug_assert(ct_str != NULL);
2835 
2836             switch (*ct_str) {
2837             case '0' ... '9':
2838                 o = *ct_str - '0';
2839                 tcg_debug_assert(input_p);
2840                 tcg_debug_assert(o < def->nb_oargs);
2841                 tcg_debug_assert(def->args_ct[o].regs != 0);
2842                 tcg_debug_assert(!def->args_ct[o].oalias);
2843                 def->args_ct[i] = def->args_ct[o];
2844                 /* The output sets oalias.  */
2845                 def->args_ct[o].oalias = 1;
2846                 def->args_ct[o].alias_index = i;
2847                 /* The input sets ialias. */
2848                 def->args_ct[i].ialias = 1;
2849                 def->args_ct[i].alias_index = o;
2850                 if (def->args_ct[i].pair) {
2851                     saw_alias_pair = true;
2852                 }
2853                 tcg_debug_assert(ct_str[1] == '\0');
2854                 continue;
2855 
2856             case '&':
2857                 tcg_debug_assert(!input_p);
2858                 def->args_ct[i].newreg = true;
2859                 ct_str++;
2860                 break;
2861 
2862             case 'p': /* plus */
2863                 /* Allocate to the register after the previous. */
2864                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2865                 o = i - 1;
2866                 tcg_debug_assert(!def->args_ct[o].pair);
2867                 tcg_debug_assert(!def->args_ct[o].ct);
2868                 def->args_ct[i] = (TCGArgConstraint){
2869                     .pair = 2,
2870                     .pair_index = o,
2871                     .regs = def->args_ct[o].regs << 1,
2872                 };
2873                 def->args_ct[o].pair = 1;
2874                 def->args_ct[o].pair_index = i;
2875                 tcg_debug_assert(ct_str[1] == '\0');
2876                 continue;
2877 
2878             case 'm': /* minus */
2879                 /* Allocate to the register before the previous. */
2880                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2881                 o = i - 1;
2882                 tcg_debug_assert(!def->args_ct[o].pair);
2883                 tcg_debug_assert(!def->args_ct[o].ct);
2884                 def->args_ct[i] = (TCGArgConstraint){
2885                     .pair = 1,
2886                     .pair_index = o,
2887                     .regs = def->args_ct[o].regs >> 1,
2888                 };
2889                 def->args_ct[o].pair = 2;
2890                 def->args_ct[o].pair_index = i;
2891                 tcg_debug_assert(ct_str[1] == '\0');
2892                 continue;
2893             }
2894 
2895             do {
2896                 switch (*ct_str) {
2897                 case 'i':
2898                     def->args_ct[i].ct |= TCG_CT_CONST;
2899                     break;
2900 
2901                 /* Include all of the target-specific constraints. */
2902 
2903 #undef CONST
2904 #define CONST(CASE, MASK) \
2905     case CASE: def->args_ct[i].ct |= MASK; break;
2906 #define REGS(CASE, MASK) \
2907     case CASE: def->args_ct[i].regs |= MASK; break;
2908 
2909 #include "tcg-target-con-str.h"
2910 
2911 #undef REGS
2912 #undef CONST
2913                 default:
2914                 case '0' ... '9':
2915                 case '&':
2916                 case 'p':
2917                 case 'm':
2918                     /* Typo in TCGTargetOpDef constraint. */
2919                     g_assert_not_reached();
2920                 }
2921             } while (*++ct_str != '\0');
2922         }
2923 
2924         /* TCGTargetOpDef entry with too much information? */
2925         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2926 
2927         /*
2928          * Fix up output pairs that are aliased with inputs.
2929          * When we created the alias, we copied pair from the output.
2930          * There are three cases:
2931          *    (1a) Pairs of inputs alias pairs of outputs.
2932          *    (1b) One input aliases the first of a pair of outputs.
2933          *    (2)  One input aliases the second of a pair of outputs.
2934          *
2935          * Case 1a is handled by making sure that the pair_index'es are
2936          * properly updated so that they appear the same as a pair of inputs.
2937          *
2938          * Case 1b is handled by setting the pair_index of the input to
2939          * itself, simply so it doesn't point to an unrelated argument.
2940          * Since we don't encounter the "second" during the input allocation
2941          * phase, nothing happens with the second half of the input pair.
2942          *
2943          * Case 2 is handled by setting the second input to pair=3, the
2944          * first output to pair=3, and the pair_index'es to match.
2945          */
2946         if (saw_alias_pair) {
2947             for (i = def->nb_oargs; i < nb_args; i++) {
2948                 /*
2949                  * Since [0-9pm] must be alone in the constraint string,
2950                  * the only way they can both be set is if the pair comes
2951                  * from the output alias.
2952                  */
2953                 if (!def->args_ct[i].ialias) {
2954                     continue;
2955                 }
2956                 switch (def->args_ct[i].pair) {
2957                 case 0:
2958                     break;
2959                 case 1:
2960                     o = def->args_ct[i].alias_index;
2961                     o2 = def->args_ct[o].pair_index;
2962                     tcg_debug_assert(def->args_ct[o].pair == 1);
2963                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2964                     if (def->args_ct[o2].oalias) {
2965                         /* Case 1a */
2966                         i2 = def->args_ct[o2].alias_index;
2967                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2968                         def->args_ct[i2].pair_index = i;
2969                         def->args_ct[i].pair_index = i2;
2970                     } else {
2971                         /* Case 1b */
2972                         def->args_ct[i].pair_index = i;
2973                     }
2974                     break;
2975                 case 2:
2976                     o = def->args_ct[i].alias_index;
2977                     o2 = def->args_ct[o].pair_index;
2978                     tcg_debug_assert(def->args_ct[o].pair == 2);
2979                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2980                     if (def->args_ct[o2].oalias) {
2981                         /* Case 1a */
2982                         i2 = def->args_ct[o2].alias_index;
2983                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2984                         def->args_ct[i2].pair_index = i;
2985                         def->args_ct[i].pair_index = i2;
2986                     } else {
2987                         /* Case 2 */
2988                         def->args_ct[i].pair = 3;
2989                         def->args_ct[o2].pair = 3;
2990                         def->args_ct[i].pair_index = o2;
2991                         def->args_ct[o2].pair_index = i;
2992                     }
2993                     break;
2994                 default:
2995                     g_assert_not_reached();
2996                 }
2997             }
2998         }
2999 
3000         /* sort the constraints (XXX: this is just an heuristic) */
3001         sort_constraints(def, 0, def->nb_oargs);
3002         sort_constraints(def, def->nb_oargs, def->nb_iargs);
3003     }
3004 }
3005 
3006 static void remove_label_use(TCGOp *op, int idx)
3007 {
3008     TCGLabel *label = arg_label(op->args[idx]);
3009     TCGLabelUse *use;
3010 
3011     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3012         if (use->op == op) {
3013             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3014             return;
3015         }
3016     }
3017     g_assert_not_reached();
3018 }
3019 
3020 void tcg_op_remove(TCGContext *s, TCGOp *op)
3021 {
3022     switch (op->opc) {
3023     case INDEX_op_br:
3024         remove_label_use(op, 0);
3025         break;
3026     case INDEX_op_brcond_i32:
3027     case INDEX_op_brcond_i64:
3028         remove_label_use(op, 3);
3029         break;
3030     case INDEX_op_brcond2_i32:
3031         remove_label_use(op, 5);
3032         break;
3033     default:
3034         break;
3035     }
3036 
3037     QTAILQ_REMOVE(&s->ops, op, link);
3038     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3039     s->nb_ops--;
3040 }
3041 
3042 void tcg_remove_ops_after(TCGOp *op)
3043 {
3044     TCGContext *s = tcg_ctx;
3045 
3046     while (true) {
3047         TCGOp *last = tcg_last_op();
3048         if (last == op) {
3049             return;
3050         }
3051         tcg_op_remove(s, last);
3052     }
3053 }
3054 
3055 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3056 {
3057     TCGContext *s = tcg_ctx;
3058     TCGOp *op = NULL;
3059 
3060     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3061         QTAILQ_FOREACH(op, &s->free_ops, link) {
3062             if (nargs <= op->nargs) {
3063                 QTAILQ_REMOVE(&s->free_ops, op, link);
3064                 nargs = op->nargs;
3065                 goto found;
3066             }
3067         }
3068     }
3069 
3070     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3071     nargs = MAX(4, nargs);
3072     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3073 
3074  found:
3075     memset(op, 0, offsetof(TCGOp, link));
3076     op->opc = opc;
3077     op->nargs = nargs;
3078 
3079     /* Check for bitfield overflow. */
3080     tcg_debug_assert(op->nargs == nargs);
3081 
3082     s->nb_ops++;
3083     return op;
3084 }
3085 
3086 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3087 {
3088     TCGOp *op = tcg_op_alloc(opc, nargs);
3089     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3090     return op;
3091 }
3092 
3093 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3094                             TCGOpcode opc, unsigned nargs)
3095 {
3096     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3097     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3098     return new_op;
3099 }
3100 
3101 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3102                            TCGOpcode opc, unsigned nargs)
3103 {
3104     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3105     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3106     return new_op;
3107 }
3108 
3109 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3110 {
3111     TCGLabelUse *u;
3112 
3113     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3114         TCGOp *op = u->op;
3115         switch (op->opc) {
3116         case INDEX_op_br:
3117             op->args[0] = label_arg(to);
3118             break;
3119         case INDEX_op_brcond_i32:
3120         case INDEX_op_brcond_i64:
3121             op->args[3] = label_arg(to);
3122             break;
3123         case INDEX_op_brcond2_i32:
3124             op->args[5] = label_arg(to);
3125             break;
3126         default:
3127             g_assert_not_reached();
3128         }
3129     }
3130 
3131     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3132 }
3133 
3134 /* Reachable analysis : remove unreachable code.  */
3135 static void __attribute__((noinline))
3136 reachable_code_pass(TCGContext *s)
3137 {
3138     TCGOp *op, *op_next, *op_prev;
3139     bool dead = false;
3140 
3141     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3142         bool remove = dead;
3143         TCGLabel *label;
3144 
3145         switch (op->opc) {
3146         case INDEX_op_set_label:
3147             label = arg_label(op->args[0]);
3148 
3149             /*
3150              * Note that the first op in the TB is always a load,
3151              * so there is always something before a label.
3152              */
3153             op_prev = QTAILQ_PREV(op, link);
3154 
3155             /*
3156              * If we find two sequential labels, move all branches to
3157              * reference the second label and remove the first label.
3158              * Do this before branch to next optimization, so that the
3159              * middle label is out of the way.
3160              */
3161             if (op_prev->opc == INDEX_op_set_label) {
3162                 move_label_uses(label, arg_label(op_prev->args[0]));
3163                 tcg_op_remove(s, op_prev);
3164                 op_prev = QTAILQ_PREV(op, link);
3165             }
3166 
3167             /*
3168              * Optimization can fold conditional branches to unconditional.
3169              * If we find a label which is preceded by an unconditional
3170              * branch to next, remove the branch.  We couldn't do this when
3171              * processing the branch because any dead code between the branch
3172              * and label had not yet been removed.
3173              */
3174             if (op_prev->opc == INDEX_op_br &&
3175                 label == arg_label(op_prev->args[0])) {
3176                 tcg_op_remove(s, op_prev);
3177                 /* Fall through means insns become live again.  */
3178                 dead = false;
3179             }
3180 
3181             if (QSIMPLEQ_EMPTY(&label->branches)) {
3182                 /*
3183                  * While there is an occasional backward branch, virtually
3184                  * all branches generated by the translators are forward.
3185                  * Which means that generally we will have already removed
3186                  * all references to the label that will be, and there is
3187                  * little to be gained by iterating.
3188                  */
3189                 remove = true;
3190             } else {
3191                 /* Once we see a label, insns become live again.  */
3192                 dead = false;
3193                 remove = false;
3194             }
3195             break;
3196 
3197         case INDEX_op_br:
3198         case INDEX_op_exit_tb:
3199         case INDEX_op_goto_ptr:
3200             /* Unconditional branches; everything following is dead.  */
3201             dead = true;
3202             break;
3203 
3204         case INDEX_op_call:
3205             /* Notice noreturn helper calls, raising exceptions.  */
3206             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3207                 dead = true;
3208             }
3209             break;
3210 
3211         case INDEX_op_insn_start:
3212             /* Never remove -- we need to keep these for unwind.  */
3213             remove = false;
3214             break;
3215 
3216         default:
3217             break;
3218         }
3219 
3220         if (remove) {
3221             tcg_op_remove(s, op);
3222         }
3223     }
3224 }
3225 
3226 #define TS_DEAD  1
3227 #define TS_MEM   2
3228 
3229 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3230 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3231 
3232 /* For liveness_pass_1, the register preferences for a given temp.  */
3233 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3234 {
3235     return ts->state_ptr;
3236 }
3237 
3238 /* For liveness_pass_1, reset the preferences for a given temp to the
3239  * maximal regset for its type.
3240  */
3241 static inline void la_reset_pref(TCGTemp *ts)
3242 {
3243     *la_temp_pref(ts)
3244         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3245 }
3246 
3247 /* liveness analysis: end of function: all temps are dead, and globals
3248    should be in memory. */
3249 static void la_func_end(TCGContext *s, int ng, int nt)
3250 {
3251     int i;
3252 
3253     for (i = 0; i < ng; ++i) {
3254         s->temps[i].state = TS_DEAD | TS_MEM;
3255         la_reset_pref(&s->temps[i]);
3256     }
3257     for (i = ng; i < nt; ++i) {
3258         s->temps[i].state = TS_DEAD;
3259         la_reset_pref(&s->temps[i]);
3260     }
3261 }
3262 
3263 /* liveness analysis: end of basic block: all temps are dead, globals
3264    and local temps should be in memory. */
3265 static void la_bb_end(TCGContext *s, int ng, int nt)
3266 {
3267     int i;
3268 
3269     for (i = 0; i < nt; ++i) {
3270         TCGTemp *ts = &s->temps[i];
3271         int state;
3272 
3273         switch (ts->kind) {
3274         case TEMP_FIXED:
3275         case TEMP_GLOBAL:
3276         case TEMP_TB:
3277             state = TS_DEAD | TS_MEM;
3278             break;
3279         case TEMP_EBB:
3280         case TEMP_CONST:
3281             state = TS_DEAD;
3282             break;
3283         default:
3284             g_assert_not_reached();
3285         }
3286         ts->state = state;
3287         la_reset_pref(ts);
3288     }
3289 }
3290 
3291 /* liveness analysis: sync globals back to memory.  */
3292 static void la_global_sync(TCGContext *s, int ng)
3293 {
3294     int i;
3295 
3296     for (i = 0; i < ng; ++i) {
3297         int state = s->temps[i].state;
3298         s->temps[i].state = state | TS_MEM;
3299         if (state == TS_DEAD) {
3300             /* If the global was previously dead, reset prefs.  */
3301             la_reset_pref(&s->temps[i]);
3302         }
3303     }
3304 }
3305 
3306 /*
3307  * liveness analysis: conditional branch: all temps are dead unless
3308  * explicitly live-across-conditional-branch, globals and local temps
3309  * should be synced.
3310  */
3311 static void la_bb_sync(TCGContext *s, int ng, int nt)
3312 {
3313     la_global_sync(s, ng);
3314 
3315     for (int i = ng; i < nt; ++i) {
3316         TCGTemp *ts = &s->temps[i];
3317         int state;
3318 
3319         switch (ts->kind) {
3320         case TEMP_TB:
3321             state = ts->state;
3322             ts->state = state | TS_MEM;
3323             if (state != TS_DEAD) {
3324                 continue;
3325             }
3326             break;
3327         case TEMP_EBB:
3328         case TEMP_CONST:
3329             continue;
3330         default:
3331             g_assert_not_reached();
3332         }
3333         la_reset_pref(&s->temps[i]);
3334     }
3335 }
3336 
3337 /* liveness analysis: sync globals back to memory and kill.  */
3338 static void la_global_kill(TCGContext *s, int ng)
3339 {
3340     int i;
3341 
3342     for (i = 0; i < ng; i++) {
3343         s->temps[i].state = TS_DEAD | TS_MEM;
3344         la_reset_pref(&s->temps[i]);
3345     }
3346 }
3347 
3348 /* liveness analysis: note live globals crossing calls.  */
3349 static void la_cross_call(TCGContext *s, int nt)
3350 {
3351     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3352     int i;
3353 
3354     for (i = 0; i < nt; i++) {
3355         TCGTemp *ts = &s->temps[i];
3356         if (!(ts->state & TS_DEAD)) {
3357             TCGRegSet *pset = la_temp_pref(ts);
3358             TCGRegSet set = *pset;
3359 
3360             set &= mask;
3361             /* If the combination is not possible, restart.  */
3362             if (set == 0) {
3363                 set = tcg_target_available_regs[ts->type] & mask;
3364             }
3365             *pset = set;
3366         }
3367     }
3368 }
3369 
3370 /*
3371  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3372  * to TEMP_EBB, if possible.
3373  */
3374 static void __attribute__((noinline))
3375 liveness_pass_0(TCGContext *s)
3376 {
3377     void * const multiple_ebb = (void *)(uintptr_t)-1;
3378     int nb_temps = s->nb_temps;
3379     TCGOp *op, *ebb;
3380 
3381     for (int i = s->nb_globals; i < nb_temps; ++i) {
3382         s->temps[i].state_ptr = NULL;
3383     }
3384 
3385     /*
3386      * Represent each EBB by the op at which it begins.  In the case of
3387      * the first EBB, this is the first op, otherwise it is a label.
3388      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3389      * within a single EBB, else MULTIPLE_EBB.
3390      */
3391     ebb = QTAILQ_FIRST(&s->ops);
3392     QTAILQ_FOREACH(op, &s->ops, link) {
3393         const TCGOpDef *def;
3394         int nb_oargs, nb_iargs;
3395 
3396         switch (op->opc) {
3397         case INDEX_op_set_label:
3398             ebb = op;
3399             continue;
3400         case INDEX_op_discard:
3401             continue;
3402         case INDEX_op_call:
3403             nb_oargs = TCGOP_CALLO(op);
3404             nb_iargs = TCGOP_CALLI(op);
3405             break;
3406         default:
3407             def = &tcg_op_defs[op->opc];
3408             nb_oargs = def->nb_oargs;
3409             nb_iargs = def->nb_iargs;
3410             break;
3411         }
3412 
3413         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3414             TCGTemp *ts = arg_temp(op->args[i]);
3415 
3416             if (ts->kind != TEMP_TB) {
3417                 continue;
3418             }
3419             if (ts->state_ptr == NULL) {
3420                 ts->state_ptr = ebb;
3421             } else if (ts->state_ptr != ebb) {
3422                 ts->state_ptr = multiple_ebb;
3423             }
3424         }
3425     }
3426 
3427     /*
3428      * For TEMP_TB that turned out not to be used beyond one EBB,
3429      * reduce the liveness to TEMP_EBB.
3430      */
3431     for (int i = s->nb_globals; i < nb_temps; ++i) {
3432         TCGTemp *ts = &s->temps[i];
3433         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3434             ts->kind = TEMP_EBB;
3435         }
3436     }
3437 }
3438 
3439 /* Liveness analysis : update the opc_arg_life array to tell if a
3440    given input arguments is dead. Instructions updating dead
3441    temporaries are removed. */
3442 static void __attribute__((noinline))
3443 liveness_pass_1(TCGContext *s)
3444 {
3445     int nb_globals = s->nb_globals;
3446     int nb_temps = s->nb_temps;
3447     TCGOp *op, *op_prev;
3448     TCGRegSet *prefs;
3449     int i;
3450 
3451     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3452     for (i = 0; i < nb_temps; ++i) {
3453         s->temps[i].state_ptr = prefs + i;
3454     }
3455 
3456     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3457     la_func_end(s, nb_globals, nb_temps);
3458 
3459     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3460         int nb_iargs, nb_oargs;
3461         TCGOpcode opc_new, opc_new2;
3462         bool have_opc_new2;
3463         TCGLifeData arg_life = 0;
3464         TCGTemp *ts;
3465         TCGOpcode opc = op->opc;
3466         const TCGOpDef *def = &tcg_op_defs[opc];
3467 
3468         switch (opc) {
3469         case INDEX_op_call:
3470             {
3471                 const TCGHelperInfo *info = tcg_call_info(op);
3472                 int call_flags = tcg_call_flags(op);
3473 
3474                 nb_oargs = TCGOP_CALLO(op);
3475                 nb_iargs = TCGOP_CALLI(op);
3476 
3477                 /* pure functions can be removed if their result is unused */
3478                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3479                     for (i = 0; i < nb_oargs; i++) {
3480                         ts = arg_temp(op->args[i]);
3481                         if (ts->state != TS_DEAD) {
3482                             goto do_not_remove_call;
3483                         }
3484                     }
3485                     goto do_remove;
3486                 }
3487             do_not_remove_call:
3488 
3489                 /* Output args are dead.  */
3490                 for (i = 0; i < nb_oargs; i++) {
3491                     ts = arg_temp(op->args[i]);
3492                     if (ts->state & TS_DEAD) {
3493                         arg_life |= DEAD_ARG << i;
3494                     }
3495                     if (ts->state & TS_MEM) {
3496                         arg_life |= SYNC_ARG << i;
3497                     }
3498                     ts->state = TS_DEAD;
3499                     la_reset_pref(ts);
3500                 }
3501 
3502                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3503                 memset(op->output_pref, 0, sizeof(op->output_pref));
3504 
3505                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3506                                     TCG_CALL_NO_READ_GLOBALS))) {
3507                     la_global_kill(s, nb_globals);
3508                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3509                     la_global_sync(s, nb_globals);
3510                 }
3511 
3512                 /* Record arguments that die in this helper.  */
3513                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3514                     ts = arg_temp(op->args[i]);
3515                     if (ts->state & TS_DEAD) {
3516                         arg_life |= DEAD_ARG << i;
3517                     }
3518                 }
3519 
3520                 /* For all live registers, remove call-clobbered prefs.  */
3521                 la_cross_call(s, nb_temps);
3522 
3523                 /*
3524                  * Input arguments are live for preceding opcodes.
3525                  *
3526                  * For those arguments that die, and will be allocated in
3527                  * registers, clear the register set for that arg, to be
3528                  * filled in below.  For args that will be on the stack,
3529                  * reset to any available reg.  Process arguments in reverse
3530                  * order so that if a temp is used more than once, the stack
3531                  * reset to max happens before the register reset to 0.
3532                  */
3533                 for (i = nb_iargs - 1; i >= 0; i--) {
3534                     const TCGCallArgumentLoc *loc = &info->in[i];
3535                     ts = arg_temp(op->args[nb_oargs + i]);
3536 
3537                     if (ts->state & TS_DEAD) {
3538                         switch (loc->kind) {
3539                         case TCG_CALL_ARG_NORMAL:
3540                         case TCG_CALL_ARG_EXTEND_U:
3541                         case TCG_CALL_ARG_EXTEND_S:
3542                             if (arg_slot_reg_p(loc->arg_slot)) {
3543                                 *la_temp_pref(ts) = 0;
3544                                 break;
3545                             }
3546                             /* fall through */
3547                         default:
3548                             *la_temp_pref(ts) =
3549                                 tcg_target_available_regs[ts->type];
3550                             break;
3551                         }
3552                         ts->state &= ~TS_DEAD;
3553                     }
3554                 }
3555 
3556                 /*
3557                  * For each input argument, add its input register to prefs.
3558                  * If a temp is used once, this produces a single set bit;
3559                  * if a temp is used multiple times, this produces a set.
3560                  */
3561                 for (i = 0; i < nb_iargs; i++) {
3562                     const TCGCallArgumentLoc *loc = &info->in[i];
3563                     ts = arg_temp(op->args[nb_oargs + i]);
3564 
3565                     switch (loc->kind) {
3566                     case TCG_CALL_ARG_NORMAL:
3567                     case TCG_CALL_ARG_EXTEND_U:
3568                     case TCG_CALL_ARG_EXTEND_S:
3569                         if (arg_slot_reg_p(loc->arg_slot)) {
3570                             tcg_regset_set_reg(*la_temp_pref(ts),
3571                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3572                         }
3573                         break;
3574                     default:
3575                         break;
3576                     }
3577                 }
3578             }
3579             break;
3580         case INDEX_op_insn_start:
3581             break;
3582         case INDEX_op_discard:
3583             /* mark the temporary as dead */
3584             ts = arg_temp(op->args[0]);
3585             ts->state = TS_DEAD;
3586             la_reset_pref(ts);
3587             break;
3588 
3589         case INDEX_op_add2_i32:
3590             opc_new = INDEX_op_add_i32;
3591             goto do_addsub2;
3592         case INDEX_op_sub2_i32:
3593             opc_new = INDEX_op_sub_i32;
3594             goto do_addsub2;
3595         case INDEX_op_add2_i64:
3596             opc_new = INDEX_op_add_i64;
3597             goto do_addsub2;
3598         case INDEX_op_sub2_i64:
3599             opc_new = INDEX_op_sub_i64;
3600         do_addsub2:
3601             nb_iargs = 4;
3602             nb_oargs = 2;
3603             /* Test if the high part of the operation is dead, but not
3604                the low part.  The result can be optimized to a simple
3605                add or sub.  This happens often for x86_64 guest when the
3606                cpu mode is set to 32 bit.  */
3607             if (arg_temp(op->args[1])->state == TS_DEAD) {
3608                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3609                     goto do_remove;
3610                 }
3611                 /* Replace the opcode and adjust the args in place,
3612                    leaving 3 unused args at the end.  */
3613                 op->opc = opc = opc_new;
3614                 op->args[1] = op->args[2];
3615                 op->args[2] = op->args[4];
3616                 /* Fall through and mark the single-word operation live.  */
3617                 nb_iargs = 2;
3618                 nb_oargs = 1;
3619             }
3620             goto do_not_remove;
3621 
3622         case INDEX_op_mulu2_i32:
3623             opc_new = INDEX_op_mul_i32;
3624             opc_new2 = INDEX_op_muluh_i32;
3625             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3626             goto do_mul2;
3627         case INDEX_op_muls2_i32:
3628             opc_new = INDEX_op_mul_i32;
3629             opc_new2 = INDEX_op_mulsh_i32;
3630             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3631             goto do_mul2;
3632         case INDEX_op_mulu2_i64:
3633             opc_new = INDEX_op_mul_i64;
3634             opc_new2 = INDEX_op_muluh_i64;
3635             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3636             goto do_mul2;
3637         case INDEX_op_muls2_i64:
3638             opc_new = INDEX_op_mul_i64;
3639             opc_new2 = INDEX_op_mulsh_i64;
3640             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3641             goto do_mul2;
3642         do_mul2:
3643             nb_iargs = 2;
3644             nb_oargs = 2;
3645             if (arg_temp(op->args[1])->state == TS_DEAD) {
3646                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3647                     /* Both parts of the operation are dead.  */
3648                     goto do_remove;
3649                 }
3650                 /* The high part of the operation is dead; generate the low. */
3651                 op->opc = opc = opc_new;
3652                 op->args[1] = op->args[2];
3653                 op->args[2] = op->args[3];
3654             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3655                 /* The low part of the operation is dead; generate the high. */
3656                 op->opc = opc = opc_new2;
3657                 op->args[0] = op->args[1];
3658                 op->args[1] = op->args[2];
3659                 op->args[2] = op->args[3];
3660             } else {
3661                 goto do_not_remove;
3662             }
3663             /* Mark the single-word operation live.  */
3664             nb_oargs = 1;
3665             goto do_not_remove;
3666 
3667         default:
3668             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3669             nb_iargs = def->nb_iargs;
3670             nb_oargs = def->nb_oargs;
3671 
3672             /* Test if the operation can be removed because all
3673                its outputs are dead. We assume that nb_oargs == 0
3674                implies side effects */
3675             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3676                 for (i = 0; i < nb_oargs; i++) {
3677                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3678                         goto do_not_remove;
3679                     }
3680                 }
3681                 goto do_remove;
3682             }
3683             goto do_not_remove;
3684 
3685         do_remove:
3686             tcg_op_remove(s, op);
3687             break;
3688 
3689         do_not_remove:
3690             for (i = 0; i < nb_oargs; i++) {
3691                 ts = arg_temp(op->args[i]);
3692 
3693                 /* Remember the preference of the uses that followed.  */
3694                 if (i < ARRAY_SIZE(op->output_pref)) {
3695                     op->output_pref[i] = *la_temp_pref(ts);
3696                 }
3697 
3698                 /* Output args are dead.  */
3699                 if (ts->state & TS_DEAD) {
3700                     arg_life |= DEAD_ARG << i;
3701                 }
3702                 if (ts->state & TS_MEM) {
3703                     arg_life |= SYNC_ARG << i;
3704                 }
3705                 ts->state = TS_DEAD;
3706                 la_reset_pref(ts);
3707             }
3708 
3709             /* If end of basic block, update.  */
3710             if (def->flags & TCG_OPF_BB_EXIT) {
3711                 la_func_end(s, nb_globals, nb_temps);
3712             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3713                 la_bb_sync(s, nb_globals, nb_temps);
3714             } else if (def->flags & TCG_OPF_BB_END) {
3715                 la_bb_end(s, nb_globals, nb_temps);
3716             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3717                 la_global_sync(s, nb_globals);
3718                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3719                     la_cross_call(s, nb_temps);
3720                 }
3721             }
3722 
3723             /* Record arguments that die in this opcode.  */
3724             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3725                 ts = arg_temp(op->args[i]);
3726                 if (ts->state & TS_DEAD) {
3727                     arg_life |= DEAD_ARG << i;
3728                 }
3729             }
3730 
3731             /* Input arguments are live for preceding opcodes.  */
3732             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3733                 ts = arg_temp(op->args[i]);
3734                 if (ts->state & TS_DEAD) {
3735                     /* For operands that were dead, initially allow
3736                        all regs for the type.  */
3737                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3738                     ts->state &= ~TS_DEAD;
3739                 }
3740             }
3741 
3742             /* Incorporate constraints for this operand.  */
3743             switch (opc) {
3744             case INDEX_op_mov_i32:
3745             case INDEX_op_mov_i64:
3746                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3747                    have proper constraints.  That said, special case
3748                    moves to propagate preferences backward.  */
3749                 if (IS_DEAD_ARG(1)) {
3750                     *la_temp_pref(arg_temp(op->args[0]))
3751                         = *la_temp_pref(arg_temp(op->args[1]));
3752                 }
3753                 break;
3754 
3755             default:
3756                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3757                     const TCGArgConstraint *ct = &def->args_ct[i];
3758                     TCGRegSet set, *pset;
3759 
3760                     ts = arg_temp(op->args[i]);
3761                     pset = la_temp_pref(ts);
3762                     set = *pset;
3763 
3764                     set &= ct->regs;
3765                     if (ct->ialias) {
3766                         set &= output_pref(op, ct->alias_index);
3767                     }
3768                     /* If the combination is not possible, restart.  */
3769                     if (set == 0) {
3770                         set = ct->regs;
3771                     }
3772                     *pset = set;
3773                 }
3774                 break;
3775             }
3776             break;
3777         }
3778         op->life = arg_life;
3779     }
3780 }
3781 
3782 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3783 static bool __attribute__((noinline))
3784 liveness_pass_2(TCGContext *s)
3785 {
3786     int nb_globals = s->nb_globals;
3787     int nb_temps, i;
3788     bool changes = false;
3789     TCGOp *op, *op_next;
3790 
3791     /* Create a temporary for each indirect global.  */
3792     for (i = 0; i < nb_globals; ++i) {
3793         TCGTemp *its = &s->temps[i];
3794         if (its->indirect_reg) {
3795             TCGTemp *dts = tcg_temp_alloc(s);
3796             dts->type = its->type;
3797             dts->base_type = its->base_type;
3798             dts->temp_subindex = its->temp_subindex;
3799             dts->kind = TEMP_EBB;
3800             its->state_ptr = dts;
3801         } else {
3802             its->state_ptr = NULL;
3803         }
3804         /* All globals begin dead.  */
3805         its->state = TS_DEAD;
3806     }
3807     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3808         TCGTemp *its = &s->temps[i];
3809         its->state_ptr = NULL;
3810         its->state = TS_DEAD;
3811     }
3812 
3813     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3814         TCGOpcode opc = op->opc;
3815         const TCGOpDef *def = &tcg_op_defs[opc];
3816         TCGLifeData arg_life = op->life;
3817         int nb_iargs, nb_oargs, call_flags;
3818         TCGTemp *arg_ts, *dir_ts;
3819 
3820         if (opc == INDEX_op_call) {
3821             nb_oargs = TCGOP_CALLO(op);
3822             nb_iargs = TCGOP_CALLI(op);
3823             call_flags = tcg_call_flags(op);
3824         } else {
3825             nb_iargs = def->nb_iargs;
3826             nb_oargs = def->nb_oargs;
3827 
3828             /* Set flags similar to how calls require.  */
3829             if (def->flags & TCG_OPF_COND_BRANCH) {
3830                 /* Like reading globals: sync_globals */
3831                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3832             } else if (def->flags & TCG_OPF_BB_END) {
3833                 /* Like writing globals: save_globals */
3834                 call_flags = 0;
3835             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3836                 /* Like reading globals: sync_globals */
3837                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3838             } else {
3839                 /* No effect on globals.  */
3840                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3841                               TCG_CALL_NO_WRITE_GLOBALS);
3842             }
3843         }
3844 
3845         /* Make sure that input arguments are available.  */
3846         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3847             arg_ts = arg_temp(op->args[i]);
3848             dir_ts = arg_ts->state_ptr;
3849             if (dir_ts && arg_ts->state == TS_DEAD) {
3850                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3851                                   ? INDEX_op_ld_i32
3852                                   : INDEX_op_ld_i64);
3853                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3854 
3855                 lop->args[0] = temp_arg(dir_ts);
3856                 lop->args[1] = temp_arg(arg_ts->mem_base);
3857                 lop->args[2] = arg_ts->mem_offset;
3858 
3859                 /* Loaded, but synced with memory.  */
3860                 arg_ts->state = TS_MEM;
3861             }
3862         }
3863 
3864         /* Perform input replacement, and mark inputs that became dead.
3865            No action is required except keeping temp_state up to date
3866            so that we reload when needed.  */
3867         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3868             arg_ts = arg_temp(op->args[i]);
3869             dir_ts = arg_ts->state_ptr;
3870             if (dir_ts) {
3871                 op->args[i] = temp_arg(dir_ts);
3872                 changes = true;
3873                 if (IS_DEAD_ARG(i)) {
3874                     arg_ts->state = TS_DEAD;
3875                 }
3876             }
3877         }
3878 
3879         /* Liveness analysis should ensure that the following are
3880            all correct, for call sites and basic block end points.  */
3881         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3882             /* Nothing to do */
3883         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3884             for (i = 0; i < nb_globals; ++i) {
3885                 /* Liveness should see that globals are synced back,
3886                    that is, either TS_DEAD or TS_MEM.  */
3887                 arg_ts = &s->temps[i];
3888                 tcg_debug_assert(arg_ts->state_ptr == 0
3889                                  || arg_ts->state != 0);
3890             }
3891         } else {
3892             for (i = 0; i < nb_globals; ++i) {
3893                 /* Liveness should see that globals are saved back,
3894                    that is, TS_DEAD, waiting to be reloaded.  */
3895                 arg_ts = &s->temps[i];
3896                 tcg_debug_assert(arg_ts->state_ptr == 0
3897                                  || arg_ts->state == TS_DEAD);
3898             }
3899         }
3900 
3901         /* Outputs become available.  */
3902         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3903             arg_ts = arg_temp(op->args[0]);
3904             dir_ts = arg_ts->state_ptr;
3905             if (dir_ts) {
3906                 op->args[0] = temp_arg(dir_ts);
3907                 changes = true;
3908 
3909                 /* The output is now live and modified.  */
3910                 arg_ts->state = 0;
3911 
3912                 if (NEED_SYNC_ARG(0)) {
3913                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3914                                       ? INDEX_op_st_i32
3915                                       : INDEX_op_st_i64);
3916                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3917                     TCGTemp *out_ts = dir_ts;
3918 
3919                     if (IS_DEAD_ARG(0)) {
3920                         out_ts = arg_temp(op->args[1]);
3921                         arg_ts->state = TS_DEAD;
3922                         tcg_op_remove(s, op);
3923                     } else {
3924                         arg_ts->state = TS_MEM;
3925                     }
3926 
3927                     sop->args[0] = temp_arg(out_ts);
3928                     sop->args[1] = temp_arg(arg_ts->mem_base);
3929                     sop->args[2] = arg_ts->mem_offset;
3930                 } else {
3931                     tcg_debug_assert(!IS_DEAD_ARG(0));
3932                 }
3933             }
3934         } else {
3935             for (i = 0; i < nb_oargs; i++) {
3936                 arg_ts = arg_temp(op->args[i]);
3937                 dir_ts = arg_ts->state_ptr;
3938                 if (!dir_ts) {
3939                     continue;
3940                 }
3941                 op->args[i] = temp_arg(dir_ts);
3942                 changes = true;
3943 
3944                 /* The output is now live and modified.  */
3945                 arg_ts->state = 0;
3946 
3947                 /* Sync outputs upon their last write.  */
3948                 if (NEED_SYNC_ARG(i)) {
3949                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3950                                       ? INDEX_op_st_i32
3951                                       : INDEX_op_st_i64);
3952                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3953 
3954                     sop->args[0] = temp_arg(dir_ts);
3955                     sop->args[1] = temp_arg(arg_ts->mem_base);
3956                     sop->args[2] = arg_ts->mem_offset;
3957 
3958                     arg_ts->state = TS_MEM;
3959                 }
3960                 /* Drop outputs that are dead.  */
3961                 if (IS_DEAD_ARG(i)) {
3962                     arg_ts->state = TS_DEAD;
3963                 }
3964             }
3965         }
3966     }
3967 
3968     return changes;
3969 }
3970 
3971 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3972 {
3973     intptr_t off;
3974     int size, align;
3975 
3976     /* When allocating an object, look at the full type. */
3977     size = tcg_type_size(ts->base_type);
3978     switch (ts->base_type) {
3979     case TCG_TYPE_I32:
3980         align = 4;
3981         break;
3982     case TCG_TYPE_I64:
3983     case TCG_TYPE_V64:
3984         align = 8;
3985         break;
3986     case TCG_TYPE_I128:
3987     case TCG_TYPE_V128:
3988     case TCG_TYPE_V256:
3989         /*
3990          * Note that we do not require aligned storage for V256,
3991          * and that we provide alignment for I128 to match V128,
3992          * even if that's above what the host ABI requires.
3993          */
3994         align = 16;
3995         break;
3996     default:
3997         g_assert_not_reached();
3998     }
3999 
4000     /*
4001      * Assume the stack is sufficiently aligned.
4002      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4003      * and do not require 16 byte vector alignment.  This seems slightly
4004      * easier than fully parameterizing the above switch statement.
4005      */
4006     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4007     off = ROUND_UP(s->current_frame_offset, align);
4008 
4009     /* If we've exhausted the stack frame, restart with a smaller TB. */
4010     if (off + size > s->frame_end) {
4011         tcg_raise_tb_overflow(s);
4012     }
4013     s->current_frame_offset = off + size;
4014 #if defined(__sparc__)
4015     off += TCG_TARGET_STACK_BIAS;
4016 #endif
4017 
4018     /* If the object was subdivided, assign memory to all the parts. */
4019     if (ts->base_type != ts->type) {
4020         int part_size = tcg_type_size(ts->type);
4021         int part_count = size / part_size;
4022 
4023         /*
4024          * Each part is allocated sequentially in tcg_temp_new_internal.
4025          * Jump back to the first part by subtracting the current index.
4026          */
4027         ts -= ts->temp_subindex;
4028         for (int i = 0; i < part_count; ++i) {
4029             ts[i].mem_offset = off + i * part_size;
4030             ts[i].mem_base = s->frame_temp;
4031             ts[i].mem_allocated = 1;
4032         }
4033     } else {
4034         ts->mem_offset = off;
4035         ts->mem_base = s->frame_temp;
4036         ts->mem_allocated = 1;
4037     }
4038 }
4039 
4040 /* Assign @reg to @ts, and update reg_to_temp[]. */
4041 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4042 {
4043     if (ts->val_type == TEMP_VAL_REG) {
4044         TCGReg old = ts->reg;
4045         tcg_debug_assert(s->reg_to_temp[old] == ts);
4046         if (old == reg) {
4047             return;
4048         }
4049         s->reg_to_temp[old] = NULL;
4050     }
4051     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4052     s->reg_to_temp[reg] = ts;
4053     ts->val_type = TEMP_VAL_REG;
4054     ts->reg = reg;
4055 }
4056 
4057 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4058 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4059 {
4060     tcg_debug_assert(type != TEMP_VAL_REG);
4061     if (ts->val_type == TEMP_VAL_REG) {
4062         TCGReg reg = ts->reg;
4063         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4064         s->reg_to_temp[reg] = NULL;
4065     }
4066     ts->val_type = type;
4067 }
4068 
4069 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4070 
4071 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4072    mark it free; otherwise mark it dead.  */
4073 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4074 {
4075     TCGTempVal new_type;
4076 
4077     switch (ts->kind) {
4078     case TEMP_FIXED:
4079         return;
4080     case TEMP_GLOBAL:
4081     case TEMP_TB:
4082         new_type = TEMP_VAL_MEM;
4083         break;
4084     case TEMP_EBB:
4085         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4086         break;
4087     case TEMP_CONST:
4088         new_type = TEMP_VAL_CONST;
4089         break;
4090     default:
4091         g_assert_not_reached();
4092     }
4093     set_temp_val_nonreg(s, ts, new_type);
4094 }
4095 
4096 /* Mark a temporary as dead.  */
4097 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4098 {
4099     temp_free_or_dead(s, ts, 1);
4100 }
4101 
4102 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4103    registers needs to be allocated to store a constant.  If 'free_or_dead'
4104    is non-zero, subsequently release the temporary; if it is positive, the
4105    temp is dead; if it is negative, the temp is free.  */
4106 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4107                       TCGRegSet preferred_regs, int free_or_dead)
4108 {
4109     if (!temp_readonly(ts) && !ts->mem_coherent) {
4110         if (!ts->mem_allocated) {
4111             temp_allocate_frame(s, ts);
4112         }
4113         switch (ts->val_type) {
4114         case TEMP_VAL_CONST:
4115             /* If we're going to free the temp immediately, then we won't
4116                require it later in a register, so attempt to store the
4117                constant to memory directly.  */
4118             if (free_or_dead
4119                 && tcg_out_sti(s, ts->type, ts->val,
4120                                ts->mem_base->reg, ts->mem_offset)) {
4121                 break;
4122             }
4123             temp_load(s, ts, tcg_target_available_regs[ts->type],
4124                       allocated_regs, preferred_regs);
4125             /* fallthrough */
4126 
4127         case TEMP_VAL_REG:
4128             tcg_out_st(s, ts->type, ts->reg,
4129                        ts->mem_base->reg, ts->mem_offset);
4130             break;
4131 
4132         case TEMP_VAL_MEM:
4133             break;
4134 
4135         case TEMP_VAL_DEAD:
4136         default:
4137             g_assert_not_reached();
4138         }
4139         ts->mem_coherent = 1;
4140     }
4141     if (free_or_dead) {
4142         temp_free_or_dead(s, ts, free_or_dead);
4143     }
4144 }
4145 
4146 /* free register 'reg' by spilling the corresponding temporary if necessary */
4147 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4148 {
4149     TCGTemp *ts = s->reg_to_temp[reg];
4150     if (ts != NULL) {
4151         temp_sync(s, ts, allocated_regs, 0, -1);
4152     }
4153 }
4154 
4155 /**
4156  * tcg_reg_alloc:
4157  * @required_regs: Set of registers in which we must allocate.
4158  * @allocated_regs: Set of registers which must be avoided.
4159  * @preferred_regs: Set of registers we should prefer.
4160  * @rev: True if we search the registers in "indirect" order.
4161  *
4162  * The allocated register must be in @required_regs & ~@allocated_regs,
4163  * but if we can put it in @preferred_regs we may save a move later.
4164  */
4165 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4166                             TCGRegSet allocated_regs,
4167                             TCGRegSet preferred_regs, bool rev)
4168 {
4169     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4170     TCGRegSet reg_ct[2];
4171     const int *order;
4172 
4173     reg_ct[1] = required_regs & ~allocated_regs;
4174     tcg_debug_assert(reg_ct[1] != 0);
4175     reg_ct[0] = reg_ct[1] & preferred_regs;
4176 
4177     /* Skip the preferred_regs option if it cannot be satisfied,
4178        or if the preference made no difference.  */
4179     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4180 
4181     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4182 
4183     /* Try free registers, preferences first.  */
4184     for (j = f; j < 2; j++) {
4185         TCGRegSet set = reg_ct[j];
4186 
4187         if (tcg_regset_single(set)) {
4188             /* One register in the set.  */
4189             TCGReg reg = tcg_regset_first(set);
4190             if (s->reg_to_temp[reg] == NULL) {
4191                 return reg;
4192             }
4193         } else {
4194             for (i = 0; i < n; i++) {
4195                 TCGReg reg = order[i];
4196                 if (s->reg_to_temp[reg] == NULL &&
4197                     tcg_regset_test_reg(set, reg)) {
4198                     return reg;
4199                 }
4200             }
4201         }
4202     }
4203 
4204     /* We must spill something.  */
4205     for (j = f; j < 2; j++) {
4206         TCGRegSet set = reg_ct[j];
4207 
4208         if (tcg_regset_single(set)) {
4209             /* One register in the set.  */
4210             TCGReg reg = tcg_regset_first(set);
4211             tcg_reg_free(s, reg, allocated_regs);
4212             return reg;
4213         } else {
4214             for (i = 0; i < n; i++) {
4215                 TCGReg reg = order[i];
4216                 if (tcg_regset_test_reg(set, reg)) {
4217                     tcg_reg_free(s, reg, allocated_regs);
4218                     return reg;
4219                 }
4220             }
4221         }
4222     }
4223 
4224     g_assert_not_reached();
4225 }
4226 
4227 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4228                                  TCGRegSet allocated_regs,
4229                                  TCGRegSet preferred_regs, bool rev)
4230 {
4231     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4232     TCGRegSet reg_ct[2];
4233     const int *order;
4234 
4235     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4236     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4237     tcg_debug_assert(reg_ct[1] != 0);
4238     reg_ct[0] = reg_ct[1] & preferred_regs;
4239 
4240     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4241 
4242     /*
4243      * Skip the preferred_regs option if it cannot be satisfied,
4244      * or if the preference made no difference.
4245      */
4246     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4247 
4248     /*
4249      * Minimize the number of flushes by looking for 2 free registers first,
4250      * then a single flush, then two flushes.
4251      */
4252     for (fmin = 2; fmin >= 0; fmin--) {
4253         for (j = k; j < 2; j++) {
4254             TCGRegSet set = reg_ct[j];
4255 
4256             for (i = 0; i < n; i++) {
4257                 TCGReg reg = order[i];
4258 
4259                 if (tcg_regset_test_reg(set, reg)) {
4260                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4261                     if (f >= fmin) {
4262                         tcg_reg_free(s, reg, allocated_regs);
4263                         tcg_reg_free(s, reg + 1, allocated_regs);
4264                         return reg;
4265                     }
4266                 }
4267             }
4268         }
4269     }
4270     g_assert_not_reached();
4271 }
4272 
4273 /* Make sure the temporary is in a register.  If needed, allocate the register
4274    from DESIRED while avoiding ALLOCATED.  */
4275 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4276                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4277 {
4278     TCGReg reg;
4279 
4280     switch (ts->val_type) {
4281     case TEMP_VAL_REG:
4282         return;
4283     case TEMP_VAL_CONST:
4284         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4285                             preferred_regs, ts->indirect_base);
4286         if (ts->type <= TCG_TYPE_I64) {
4287             tcg_out_movi(s, ts->type, reg, ts->val);
4288         } else {
4289             uint64_t val = ts->val;
4290             MemOp vece = MO_64;
4291 
4292             /*
4293              * Find the minimal vector element that matches the constant.
4294              * The targets will, in general, have to do this search anyway,
4295              * do this generically.
4296              */
4297             if (val == dup_const(MO_8, val)) {
4298                 vece = MO_8;
4299             } else if (val == dup_const(MO_16, val)) {
4300                 vece = MO_16;
4301             } else if (val == dup_const(MO_32, val)) {
4302                 vece = MO_32;
4303             }
4304 
4305             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4306         }
4307         ts->mem_coherent = 0;
4308         break;
4309     case TEMP_VAL_MEM:
4310         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4311                             preferred_regs, ts->indirect_base);
4312         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4313         ts->mem_coherent = 1;
4314         break;
4315     case TEMP_VAL_DEAD:
4316     default:
4317         g_assert_not_reached();
4318     }
4319     set_temp_val_reg(s, ts, reg);
4320 }
4321 
4322 /* Save a temporary to memory. 'allocated_regs' is used in case a
4323    temporary registers needs to be allocated to store a constant.  */
4324 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4325 {
4326     /* The liveness analysis already ensures that globals are back
4327        in memory. Keep an tcg_debug_assert for safety. */
4328     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4329 }
4330 
4331 /* save globals to their canonical location and assume they can be
4332    modified be the following code. 'allocated_regs' is used in case a
4333    temporary registers needs to be allocated to store a constant. */
4334 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4335 {
4336     int i, n;
4337 
4338     for (i = 0, n = s->nb_globals; i < n; i++) {
4339         temp_save(s, &s->temps[i], allocated_regs);
4340     }
4341 }
4342 
4343 /* sync globals to their canonical location and assume they can be
4344    read by the following code. 'allocated_regs' is used in case a
4345    temporary registers needs to be allocated to store a constant. */
4346 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4347 {
4348     int i, n;
4349 
4350     for (i = 0, n = s->nb_globals; i < n; i++) {
4351         TCGTemp *ts = &s->temps[i];
4352         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4353                          || ts->kind == TEMP_FIXED
4354                          || ts->mem_coherent);
4355     }
4356 }
4357 
4358 /* at the end of a basic block, we assume all temporaries are dead and
4359    all globals are stored at their canonical location. */
4360 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4361 {
4362     int i;
4363 
4364     for (i = s->nb_globals; i < s->nb_temps; i++) {
4365         TCGTemp *ts = &s->temps[i];
4366 
4367         switch (ts->kind) {
4368         case TEMP_TB:
4369             temp_save(s, ts, allocated_regs);
4370             break;
4371         case TEMP_EBB:
4372             /* The liveness analysis already ensures that temps are dead.
4373                Keep an tcg_debug_assert for safety. */
4374             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4375             break;
4376         case TEMP_CONST:
4377             /* Similarly, we should have freed any allocated register. */
4378             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4379             break;
4380         default:
4381             g_assert_not_reached();
4382         }
4383     }
4384 
4385     save_globals(s, allocated_regs);
4386 }
4387 
4388 /*
4389  * At a conditional branch, we assume all temporaries are dead unless
4390  * explicitly live-across-conditional-branch; all globals and local
4391  * temps are synced to their location.
4392  */
4393 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4394 {
4395     sync_globals(s, allocated_regs);
4396 
4397     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4398         TCGTemp *ts = &s->temps[i];
4399         /*
4400          * The liveness analysis already ensures that temps are dead.
4401          * Keep tcg_debug_asserts for safety.
4402          */
4403         switch (ts->kind) {
4404         case TEMP_TB:
4405             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4406             break;
4407         case TEMP_EBB:
4408         case TEMP_CONST:
4409             break;
4410         default:
4411             g_assert_not_reached();
4412         }
4413     }
4414 }
4415 
4416 /*
4417  * Specialized code generation for INDEX_op_mov_* with a constant.
4418  */
4419 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4420                                   tcg_target_ulong val, TCGLifeData arg_life,
4421                                   TCGRegSet preferred_regs)
4422 {
4423     /* ENV should not be modified.  */
4424     tcg_debug_assert(!temp_readonly(ots));
4425 
4426     /* The movi is not explicitly generated here.  */
4427     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4428     ots->val = val;
4429     ots->mem_coherent = 0;
4430     if (NEED_SYNC_ARG(0)) {
4431         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4432     } else if (IS_DEAD_ARG(0)) {
4433         temp_dead(s, ots);
4434     }
4435 }
4436 
4437 /*
4438  * Specialized code generation for INDEX_op_mov_*.
4439  */
4440 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4441 {
4442     const TCGLifeData arg_life = op->life;
4443     TCGRegSet allocated_regs, preferred_regs;
4444     TCGTemp *ts, *ots;
4445     TCGType otype, itype;
4446     TCGReg oreg, ireg;
4447 
4448     allocated_regs = s->reserved_regs;
4449     preferred_regs = output_pref(op, 0);
4450     ots = arg_temp(op->args[0]);
4451     ts = arg_temp(op->args[1]);
4452 
4453     /* ENV should not be modified.  */
4454     tcg_debug_assert(!temp_readonly(ots));
4455 
4456     /* Note that otype != itype for no-op truncation.  */
4457     otype = ots->type;
4458     itype = ts->type;
4459 
4460     if (ts->val_type == TEMP_VAL_CONST) {
4461         /* propagate constant or generate sti */
4462         tcg_target_ulong val = ts->val;
4463         if (IS_DEAD_ARG(1)) {
4464             temp_dead(s, ts);
4465         }
4466         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4467         return;
4468     }
4469 
4470     /* If the source value is in memory we're going to be forced
4471        to have it in a register in order to perform the copy.  Copy
4472        the SOURCE value into its own register first, that way we
4473        don't have to reload SOURCE the next time it is used. */
4474     if (ts->val_type == TEMP_VAL_MEM) {
4475         temp_load(s, ts, tcg_target_available_regs[itype],
4476                   allocated_regs, preferred_regs);
4477     }
4478     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4479     ireg = ts->reg;
4480 
4481     if (IS_DEAD_ARG(0)) {
4482         /* mov to a non-saved dead register makes no sense (even with
4483            liveness analysis disabled). */
4484         tcg_debug_assert(NEED_SYNC_ARG(0));
4485         if (!ots->mem_allocated) {
4486             temp_allocate_frame(s, ots);
4487         }
4488         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4489         if (IS_DEAD_ARG(1)) {
4490             temp_dead(s, ts);
4491         }
4492         temp_dead(s, ots);
4493         return;
4494     }
4495 
4496     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4497         /*
4498          * The mov can be suppressed.  Kill input first, so that it
4499          * is unlinked from reg_to_temp, then set the output to the
4500          * reg that we saved from the input.
4501          */
4502         temp_dead(s, ts);
4503         oreg = ireg;
4504     } else {
4505         if (ots->val_type == TEMP_VAL_REG) {
4506             oreg = ots->reg;
4507         } else {
4508             /* Make sure to not spill the input register during allocation. */
4509             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4510                                  allocated_regs | ((TCGRegSet)1 << ireg),
4511                                  preferred_regs, ots->indirect_base);
4512         }
4513         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4514             /*
4515              * Cross register class move not supported.
4516              * Store the source register into the destination slot
4517              * and leave the destination temp as TEMP_VAL_MEM.
4518              */
4519             assert(!temp_readonly(ots));
4520             if (!ts->mem_allocated) {
4521                 temp_allocate_frame(s, ots);
4522             }
4523             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4524             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4525             ots->mem_coherent = 1;
4526             return;
4527         }
4528     }
4529     set_temp_val_reg(s, ots, oreg);
4530     ots->mem_coherent = 0;
4531 
4532     if (NEED_SYNC_ARG(0)) {
4533         temp_sync(s, ots, allocated_regs, 0, 0);
4534     }
4535 }
4536 
4537 /*
4538  * Specialized code generation for INDEX_op_dup_vec.
4539  */
4540 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4541 {
4542     const TCGLifeData arg_life = op->life;
4543     TCGRegSet dup_out_regs, dup_in_regs;
4544     TCGTemp *its, *ots;
4545     TCGType itype, vtype;
4546     unsigned vece;
4547     int lowpart_ofs;
4548     bool ok;
4549 
4550     ots = arg_temp(op->args[0]);
4551     its = arg_temp(op->args[1]);
4552 
4553     /* ENV should not be modified.  */
4554     tcg_debug_assert(!temp_readonly(ots));
4555 
4556     itype = its->type;
4557     vece = TCGOP_VECE(op);
4558     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4559 
4560     if (its->val_type == TEMP_VAL_CONST) {
4561         /* Propagate constant via movi -> dupi.  */
4562         tcg_target_ulong val = its->val;
4563         if (IS_DEAD_ARG(1)) {
4564             temp_dead(s, its);
4565         }
4566         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4567         return;
4568     }
4569 
4570     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4571     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4572 
4573     /* Allocate the output register now.  */
4574     if (ots->val_type != TEMP_VAL_REG) {
4575         TCGRegSet allocated_regs = s->reserved_regs;
4576         TCGReg oreg;
4577 
4578         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4579             /* Make sure to not spill the input register. */
4580             tcg_regset_set_reg(allocated_regs, its->reg);
4581         }
4582         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4583                              output_pref(op, 0), ots->indirect_base);
4584         set_temp_val_reg(s, ots, oreg);
4585     }
4586 
4587     switch (its->val_type) {
4588     case TEMP_VAL_REG:
4589         /*
4590          * The dup constriaints must be broad, covering all possible VECE.
4591          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4592          * to fail, indicating that extra moves are required for that case.
4593          */
4594         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4595             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4596                 goto done;
4597             }
4598             /* Try again from memory or a vector input register.  */
4599         }
4600         if (!its->mem_coherent) {
4601             /*
4602              * The input register is not synced, and so an extra store
4603              * would be required to use memory.  Attempt an integer-vector
4604              * register move first.  We do not have a TCGRegSet for this.
4605              */
4606             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4607                 break;
4608             }
4609             /* Sync the temp back to its slot and load from there.  */
4610             temp_sync(s, its, s->reserved_regs, 0, 0);
4611         }
4612         /* fall through */
4613 
4614     case TEMP_VAL_MEM:
4615         lowpart_ofs = 0;
4616         if (HOST_BIG_ENDIAN) {
4617             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4618         }
4619         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4620                              its->mem_offset + lowpart_ofs)) {
4621             goto done;
4622         }
4623         /* Load the input into the destination vector register. */
4624         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4625         break;
4626 
4627     default:
4628         g_assert_not_reached();
4629     }
4630 
4631     /* We now have a vector input register, so dup must succeed. */
4632     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4633     tcg_debug_assert(ok);
4634 
4635  done:
4636     ots->mem_coherent = 0;
4637     if (IS_DEAD_ARG(1)) {
4638         temp_dead(s, its);
4639     }
4640     if (NEED_SYNC_ARG(0)) {
4641         temp_sync(s, ots, s->reserved_regs, 0, 0);
4642     }
4643     if (IS_DEAD_ARG(0)) {
4644         temp_dead(s, ots);
4645     }
4646 }
4647 
4648 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4649 {
4650     const TCGLifeData arg_life = op->life;
4651     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4652     TCGRegSet i_allocated_regs;
4653     TCGRegSet o_allocated_regs;
4654     int i, k, nb_iargs, nb_oargs;
4655     TCGReg reg;
4656     TCGArg arg;
4657     const TCGArgConstraint *arg_ct;
4658     TCGTemp *ts;
4659     TCGArg new_args[TCG_MAX_OP_ARGS];
4660     int const_args[TCG_MAX_OP_ARGS];
4661 
4662     nb_oargs = def->nb_oargs;
4663     nb_iargs = def->nb_iargs;
4664 
4665     /* copy constants */
4666     memcpy(new_args + nb_oargs + nb_iargs,
4667            op->args + nb_oargs + nb_iargs,
4668            sizeof(TCGArg) * def->nb_cargs);
4669 
4670     i_allocated_regs = s->reserved_regs;
4671     o_allocated_regs = s->reserved_regs;
4672 
4673     /* satisfy input constraints */
4674     for (k = 0; k < nb_iargs; k++) {
4675         TCGRegSet i_preferred_regs, i_required_regs;
4676         bool allocate_new_reg, copyto_new_reg;
4677         TCGTemp *ts2;
4678         int i1, i2;
4679 
4680         i = def->args_ct[nb_oargs + k].sort_index;
4681         arg = op->args[i];
4682         arg_ct = &def->args_ct[i];
4683         ts = arg_temp(arg);
4684 
4685         if (ts->val_type == TEMP_VAL_CONST
4686             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4687             /* constant is OK for instruction */
4688             const_args[i] = 1;
4689             new_args[i] = ts->val;
4690             continue;
4691         }
4692 
4693         reg = ts->reg;
4694         i_preferred_regs = 0;
4695         i_required_regs = arg_ct->regs;
4696         allocate_new_reg = false;
4697         copyto_new_reg = false;
4698 
4699         switch (arg_ct->pair) {
4700         case 0: /* not paired */
4701             if (arg_ct->ialias) {
4702                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4703 
4704                 /*
4705                  * If the input is readonly, then it cannot also be an
4706                  * output and aliased to itself.  If the input is not
4707                  * dead after the instruction, we must allocate a new
4708                  * register and move it.
4709                  */
4710                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4711                     || def->args_ct[arg_ct->alias_index].newreg) {
4712                     allocate_new_reg = true;
4713                 } else if (ts->val_type == TEMP_VAL_REG) {
4714                     /*
4715                      * Check if the current register has already been
4716                      * allocated for another input.
4717                      */
4718                     allocate_new_reg =
4719                         tcg_regset_test_reg(i_allocated_regs, reg);
4720                 }
4721             }
4722             if (!allocate_new_reg) {
4723                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4724                           i_preferred_regs);
4725                 reg = ts->reg;
4726                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4727             }
4728             if (allocate_new_reg) {
4729                 /*
4730                  * Allocate a new register matching the constraint
4731                  * and move the temporary register into it.
4732                  */
4733                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4734                           i_allocated_regs, 0);
4735                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4736                                     i_preferred_regs, ts->indirect_base);
4737                 copyto_new_reg = true;
4738             }
4739             break;
4740 
4741         case 1:
4742             /* First of an input pair; if i1 == i2, the second is an output. */
4743             i1 = i;
4744             i2 = arg_ct->pair_index;
4745             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4746 
4747             /*
4748              * It is easier to default to allocating a new pair
4749              * and to identify a few cases where it's not required.
4750              */
4751             if (arg_ct->ialias) {
4752                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4753                 if (IS_DEAD_ARG(i1) &&
4754                     IS_DEAD_ARG(i2) &&
4755                     !temp_readonly(ts) &&
4756                     ts->val_type == TEMP_VAL_REG &&
4757                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4758                     tcg_regset_test_reg(i_required_regs, reg) &&
4759                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4760                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4761                     (ts2
4762                      ? ts2->val_type == TEMP_VAL_REG &&
4763                        ts2->reg == reg + 1 &&
4764                        !temp_readonly(ts2)
4765                      : s->reg_to_temp[reg + 1] == NULL)) {
4766                     break;
4767                 }
4768             } else {
4769                 /* Without aliasing, the pair must also be an input. */
4770                 tcg_debug_assert(ts2);
4771                 if (ts->val_type == TEMP_VAL_REG &&
4772                     ts2->val_type == TEMP_VAL_REG &&
4773                     ts2->reg == reg + 1 &&
4774                     tcg_regset_test_reg(i_required_regs, reg)) {
4775                     break;
4776                 }
4777             }
4778             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4779                                      0, ts->indirect_base);
4780             goto do_pair;
4781 
4782         case 2: /* pair second */
4783             reg = new_args[arg_ct->pair_index] + 1;
4784             goto do_pair;
4785 
4786         case 3: /* ialias with second output, no first input */
4787             tcg_debug_assert(arg_ct->ialias);
4788             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4789 
4790             if (IS_DEAD_ARG(i) &&
4791                 !temp_readonly(ts) &&
4792                 ts->val_type == TEMP_VAL_REG &&
4793                 reg > 0 &&
4794                 s->reg_to_temp[reg - 1] == NULL &&
4795                 tcg_regset_test_reg(i_required_regs, reg) &&
4796                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4797                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4798                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4799                 break;
4800             }
4801             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4802                                      i_allocated_regs, 0,
4803                                      ts->indirect_base);
4804             tcg_regset_set_reg(i_allocated_regs, reg);
4805             reg += 1;
4806             goto do_pair;
4807 
4808         do_pair:
4809             /*
4810              * If an aliased input is not dead after the instruction,
4811              * we must allocate a new register and move it.
4812              */
4813             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4814                 TCGRegSet t_allocated_regs = i_allocated_regs;
4815 
4816                 /*
4817                  * Because of the alias, and the continued life, make sure
4818                  * that the temp is somewhere *other* than the reg pair,
4819                  * and we get a copy in reg.
4820                  */
4821                 tcg_regset_set_reg(t_allocated_regs, reg);
4822                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4823                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4824                     /* If ts was already in reg, copy it somewhere else. */
4825                     TCGReg nr;
4826                     bool ok;
4827 
4828                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4829                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4830                                        t_allocated_regs, 0, ts->indirect_base);
4831                     ok = tcg_out_mov(s, ts->type, nr, reg);
4832                     tcg_debug_assert(ok);
4833 
4834                     set_temp_val_reg(s, ts, nr);
4835                 } else {
4836                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4837                               t_allocated_regs, 0);
4838                     copyto_new_reg = true;
4839                 }
4840             } else {
4841                 /* Preferably allocate to reg, otherwise copy. */
4842                 i_required_regs = (TCGRegSet)1 << reg;
4843                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4844                           i_preferred_regs);
4845                 copyto_new_reg = ts->reg != reg;
4846             }
4847             break;
4848 
4849         default:
4850             g_assert_not_reached();
4851         }
4852 
4853         if (copyto_new_reg) {
4854             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4855                 /*
4856                  * Cross register class move not supported.  Sync the
4857                  * temp back to its slot and load from there.
4858                  */
4859                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4860                 tcg_out_ld(s, ts->type, reg,
4861                            ts->mem_base->reg, ts->mem_offset);
4862             }
4863         }
4864         new_args[i] = reg;
4865         const_args[i] = 0;
4866         tcg_regset_set_reg(i_allocated_regs, reg);
4867     }
4868 
4869     /* mark dead temporaries and free the associated registers */
4870     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4871         if (IS_DEAD_ARG(i)) {
4872             temp_dead(s, arg_temp(op->args[i]));
4873         }
4874     }
4875 
4876     if (def->flags & TCG_OPF_COND_BRANCH) {
4877         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4878     } else if (def->flags & TCG_OPF_BB_END) {
4879         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4880     } else {
4881         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4882             /* XXX: permit generic clobber register list ? */
4883             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4884                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4885                     tcg_reg_free(s, i, i_allocated_regs);
4886                 }
4887             }
4888         }
4889         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4890             /* sync globals if the op has side effects and might trigger
4891                an exception. */
4892             sync_globals(s, i_allocated_regs);
4893         }
4894 
4895         /* satisfy the output constraints */
4896         for(k = 0; k < nb_oargs; k++) {
4897             i = def->args_ct[k].sort_index;
4898             arg = op->args[i];
4899             arg_ct = &def->args_ct[i];
4900             ts = arg_temp(arg);
4901 
4902             /* ENV should not be modified.  */
4903             tcg_debug_assert(!temp_readonly(ts));
4904 
4905             switch (arg_ct->pair) {
4906             case 0: /* not paired */
4907                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4908                     reg = new_args[arg_ct->alias_index];
4909                 } else if (arg_ct->newreg) {
4910                     reg = tcg_reg_alloc(s, arg_ct->regs,
4911                                         i_allocated_regs | o_allocated_regs,
4912                                         output_pref(op, k), ts->indirect_base);
4913                 } else {
4914                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4915                                         output_pref(op, k), ts->indirect_base);
4916                 }
4917                 break;
4918 
4919             case 1: /* first of pair */
4920                 tcg_debug_assert(!arg_ct->newreg);
4921                 if (arg_ct->oalias) {
4922                     reg = new_args[arg_ct->alias_index];
4923                     break;
4924                 }
4925                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4926                                          output_pref(op, k), ts->indirect_base);
4927                 break;
4928 
4929             case 2: /* second of pair */
4930                 tcg_debug_assert(!arg_ct->newreg);
4931                 if (arg_ct->oalias) {
4932                     reg = new_args[arg_ct->alias_index];
4933                 } else {
4934                     reg = new_args[arg_ct->pair_index] + 1;
4935                 }
4936                 break;
4937 
4938             case 3: /* first of pair, aliasing with a second input */
4939                 tcg_debug_assert(!arg_ct->newreg);
4940                 reg = new_args[arg_ct->pair_index] - 1;
4941                 break;
4942 
4943             default:
4944                 g_assert_not_reached();
4945             }
4946             tcg_regset_set_reg(o_allocated_regs, reg);
4947             set_temp_val_reg(s, ts, reg);
4948             ts->mem_coherent = 0;
4949             new_args[i] = reg;
4950         }
4951     }
4952 
4953     /* emit instruction */
4954     switch (op->opc) {
4955     case INDEX_op_ext8s_i32:
4956         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4957         break;
4958     case INDEX_op_ext8s_i64:
4959         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4960         break;
4961     case INDEX_op_ext8u_i32:
4962     case INDEX_op_ext8u_i64:
4963         tcg_out_ext8u(s, new_args[0], new_args[1]);
4964         break;
4965     case INDEX_op_ext16s_i32:
4966         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4967         break;
4968     case INDEX_op_ext16s_i64:
4969         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4970         break;
4971     case INDEX_op_ext16u_i32:
4972     case INDEX_op_ext16u_i64:
4973         tcg_out_ext16u(s, new_args[0], new_args[1]);
4974         break;
4975     case INDEX_op_ext32s_i64:
4976         tcg_out_ext32s(s, new_args[0], new_args[1]);
4977         break;
4978     case INDEX_op_ext32u_i64:
4979         tcg_out_ext32u(s, new_args[0], new_args[1]);
4980         break;
4981     case INDEX_op_ext_i32_i64:
4982         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4983         break;
4984     case INDEX_op_extu_i32_i64:
4985         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4986         break;
4987     case INDEX_op_extrl_i64_i32:
4988         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
4989         break;
4990     default:
4991         if (def->flags & TCG_OPF_VECTOR) {
4992             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4993                            new_args, const_args);
4994         } else {
4995             tcg_out_op(s, op->opc, new_args, const_args);
4996         }
4997         break;
4998     }
4999 
5000     /* move the outputs in the correct register if needed */
5001     for(i = 0; i < nb_oargs; i++) {
5002         ts = arg_temp(op->args[i]);
5003 
5004         /* ENV should not be modified.  */
5005         tcg_debug_assert(!temp_readonly(ts));
5006 
5007         if (NEED_SYNC_ARG(i)) {
5008             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5009         } else if (IS_DEAD_ARG(i)) {
5010             temp_dead(s, ts);
5011         }
5012     }
5013 }
5014 
5015 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5016 {
5017     const TCGLifeData arg_life = op->life;
5018     TCGTemp *ots, *itsl, *itsh;
5019     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5020 
5021     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5022     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5023     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5024 
5025     ots = arg_temp(op->args[0]);
5026     itsl = arg_temp(op->args[1]);
5027     itsh = arg_temp(op->args[2]);
5028 
5029     /* ENV should not be modified.  */
5030     tcg_debug_assert(!temp_readonly(ots));
5031 
5032     /* Allocate the output register now.  */
5033     if (ots->val_type != TEMP_VAL_REG) {
5034         TCGRegSet allocated_regs = s->reserved_regs;
5035         TCGRegSet dup_out_regs =
5036             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5037         TCGReg oreg;
5038 
5039         /* Make sure to not spill the input registers. */
5040         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5041             tcg_regset_set_reg(allocated_regs, itsl->reg);
5042         }
5043         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5044             tcg_regset_set_reg(allocated_regs, itsh->reg);
5045         }
5046 
5047         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5048                              output_pref(op, 0), ots->indirect_base);
5049         set_temp_val_reg(s, ots, oreg);
5050     }
5051 
5052     /* Promote dup2 of immediates to dupi_vec. */
5053     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5054         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5055         MemOp vece = MO_64;
5056 
5057         if (val == dup_const(MO_8, val)) {
5058             vece = MO_8;
5059         } else if (val == dup_const(MO_16, val)) {
5060             vece = MO_16;
5061         } else if (val == dup_const(MO_32, val)) {
5062             vece = MO_32;
5063         }
5064 
5065         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5066         goto done;
5067     }
5068 
5069     /* If the two inputs form one 64-bit value, try dupm_vec. */
5070     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5071         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5072         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5073         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5074 
5075         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5076         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5077 
5078         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5079                              its->mem_base->reg, its->mem_offset)) {
5080             goto done;
5081         }
5082     }
5083 
5084     /* Fall back to generic expansion. */
5085     return false;
5086 
5087  done:
5088     ots->mem_coherent = 0;
5089     if (IS_DEAD_ARG(1)) {
5090         temp_dead(s, itsl);
5091     }
5092     if (IS_DEAD_ARG(2)) {
5093         temp_dead(s, itsh);
5094     }
5095     if (NEED_SYNC_ARG(0)) {
5096         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5097     } else if (IS_DEAD_ARG(0)) {
5098         temp_dead(s, ots);
5099     }
5100     return true;
5101 }
5102 
5103 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5104                          TCGRegSet allocated_regs)
5105 {
5106     if (ts->val_type == TEMP_VAL_REG) {
5107         if (ts->reg != reg) {
5108             tcg_reg_free(s, reg, allocated_regs);
5109             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5110                 /*
5111                  * Cross register class move not supported.  Sync the
5112                  * temp back to its slot and load from there.
5113                  */
5114                 temp_sync(s, ts, allocated_regs, 0, 0);
5115                 tcg_out_ld(s, ts->type, reg,
5116                            ts->mem_base->reg, ts->mem_offset);
5117             }
5118         }
5119     } else {
5120         TCGRegSet arg_set = 0;
5121 
5122         tcg_reg_free(s, reg, allocated_regs);
5123         tcg_regset_set_reg(arg_set, reg);
5124         temp_load(s, ts, arg_set, allocated_regs, 0);
5125     }
5126 }
5127 
5128 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5129                          TCGRegSet allocated_regs)
5130 {
5131     /*
5132      * When the destination is on the stack, load up the temp and store.
5133      * If there are many call-saved registers, the temp might live to
5134      * see another use; otherwise it'll be discarded.
5135      */
5136     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5137     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5138                arg_slot_stk_ofs(arg_slot));
5139 }
5140 
5141 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5142                             TCGTemp *ts, TCGRegSet *allocated_regs)
5143 {
5144     if (arg_slot_reg_p(l->arg_slot)) {
5145         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5146         load_arg_reg(s, reg, ts, *allocated_regs);
5147         tcg_regset_set_reg(*allocated_regs, reg);
5148     } else {
5149         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5150     }
5151 }
5152 
5153 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5154                          intptr_t ref_off, TCGRegSet *allocated_regs)
5155 {
5156     TCGReg reg;
5157 
5158     if (arg_slot_reg_p(arg_slot)) {
5159         reg = tcg_target_call_iarg_regs[arg_slot];
5160         tcg_reg_free(s, reg, *allocated_regs);
5161         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5162         tcg_regset_set_reg(*allocated_regs, reg);
5163     } else {
5164         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5165                             *allocated_regs, 0, false);
5166         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5167         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5168                    arg_slot_stk_ofs(arg_slot));
5169     }
5170 }
5171 
5172 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5173 {
5174     const int nb_oargs = TCGOP_CALLO(op);
5175     const int nb_iargs = TCGOP_CALLI(op);
5176     const TCGLifeData arg_life = op->life;
5177     const TCGHelperInfo *info = tcg_call_info(op);
5178     TCGRegSet allocated_regs = s->reserved_regs;
5179     int i;
5180 
5181     /*
5182      * Move inputs into place in reverse order,
5183      * so that we place stacked arguments first.
5184      */
5185     for (i = nb_iargs - 1; i >= 0; --i) {
5186         const TCGCallArgumentLoc *loc = &info->in[i];
5187         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5188 
5189         switch (loc->kind) {
5190         case TCG_CALL_ARG_NORMAL:
5191         case TCG_CALL_ARG_EXTEND_U:
5192         case TCG_CALL_ARG_EXTEND_S:
5193             load_arg_normal(s, loc, ts, &allocated_regs);
5194             break;
5195         case TCG_CALL_ARG_BY_REF:
5196             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5197             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5198                          arg_slot_stk_ofs(loc->ref_slot),
5199                          &allocated_regs);
5200             break;
5201         case TCG_CALL_ARG_BY_REF_N:
5202             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5203             break;
5204         default:
5205             g_assert_not_reached();
5206         }
5207     }
5208 
5209     /* Mark dead temporaries and free the associated registers.  */
5210     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5211         if (IS_DEAD_ARG(i)) {
5212             temp_dead(s, arg_temp(op->args[i]));
5213         }
5214     }
5215 
5216     /* Clobber call registers.  */
5217     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5218         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5219             tcg_reg_free(s, i, allocated_regs);
5220         }
5221     }
5222 
5223     /*
5224      * Save globals if they might be written by the helper,
5225      * sync them if they might be read.
5226      */
5227     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5228         /* Nothing to do */
5229     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5230         sync_globals(s, allocated_regs);
5231     } else {
5232         save_globals(s, allocated_regs);
5233     }
5234 
5235     /*
5236      * If the ABI passes a pointer to the returned struct as the first
5237      * argument, load that now.  Pass a pointer to the output home slot.
5238      */
5239     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5240         TCGTemp *ts = arg_temp(op->args[0]);
5241 
5242         if (!ts->mem_allocated) {
5243             temp_allocate_frame(s, ts);
5244         }
5245         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5246     }
5247 
5248     tcg_out_call(s, tcg_call_func(op), info);
5249 
5250     /* Assign output registers and emit moves if needed.  */
5251     switch (info->out_kind) {
5252     case TCG_CALL_RET_NORMAL:
5253         for (i = 0; i < nb_oargs; i++) {
5254             TCGTemp *ts = arg_temp(op->args[i]);
5255             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5256 
5257             /* ENV should not be modified.  */
5258             tcg_debug_assert(!temp_readonly(ts));
5259 
5260             set_temp_val_reg(s, ts, reg);
5261             ts->mem_coherent = 0;
5262         }
5263         break;
5264 
5265     case TCG_CALL_RET_BY_VEC:
5266         {
5267             TCGTemp *ts = arg_temp(op->args[0]);
5268 
5269             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5270             tcg_debug_assert(ts->temp_subindex == 0);
5271             if (!ts->mem_allocated) {
5272                 temp_allocate_frame(s, ts);
5273             }
5274             tcg_out_st(s, TCG_TYPE_V128,
5275                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5276                        ts->mem_base->reg, ts->mem_offset);
5277         }
5278         /* fall through to mark all parts in memory */
5279 
5280     case TCG_CALL_RET_BY_REF:
5281         /* The callee has performed a write through the reference. */
5282         for (i = 0; i < nb_oargs; i++) {
5283             TCGTemp *ts = arg_temp(op->args[i]);
5284             ts->val_type = TEMP_VAL_MEM;
5285         }
5286         break;
5287 
5288     default:
5289         g_assert_not_reached();
5290     }
5291 
5292     /* Flush or discard output registers as needed. */
5293     for (i = 0; i < nb_oargs; i++) {
5294         TCGTemp *ts = arg_temp(op->args[i]);
5295         if (NEED_SYNC_ARG(i)) {
5296             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5297         } else if (IS_DEAD_ARG(i)) {
5298             temp_dead(s, ts);
5299         }
5300     }
5301 }
5302 
5303 /**
5304  * atom_and_align_for_opc:
5305  * @s: tcg context
5306  * @opc: memory operation code
5307  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5308  * @allow_two_ops: true if we are prepared to issue two operations
5309  *
5310  * Return the alignment and atomicity to use for the inline fast path
5311  * for the given memory operation.  The alignment may be larger than
5312  * that specified in @opc, and the correct alignment will be diagnosed
5313  * by the slow path helper.
5314  *
5315  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5316  * and issue two loads or stores for subalignment.
5317  */
5318 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5319                                            MemOp host_atom, bool allow_two_ops)
5320 {
5321     MemOp align = get_alignment_bits(opc);
5322     MemOp size = opc & MO_SIZE;
5323     MemOp half = size ? size - 1 : 0;
5324     MemOp atmax;
5325     MemOp atom;
5326 
5327     /* When serialized, no further atomicity required.  */
5328     if (s->gen_tb->cflags & CF_PARALLEL) {
5329         atom = opc & MO_ATOM_MASK;
5330     } else {
5331         atom = MO_ATOM_NONE;
5332     }
5333 
5334     switch (atom) {
5335     case MO_ATOM_NONE:
5336         /* The operation requires no specific atomicity. */
5337         atmax = MO_8;
5338         break;
5339 
5340     case MO_ATOM_IFALIGN:
5341         atmax = size;
5342         break;
5343 
5344     case MO_ATOM_IFALIGN_PAIR:
5345         atmax = half;
5346         break;
5347 
5348     case MO_ATOM_WITHIN16:
5349         atmax = size;
5350         if (size == MO_128) {
5351             /* Misalignment implies !within16, and therefore no atomicity. */
5352         } else if (host_atom != MO_ATOM_WITHIN16) {
5353             /* The host does not implement within16, so require alignment. */
5354             align = MAX(align, size);
5355         }
5356         break;
5357 
5358     case MO_ATOM_WITHIN16_PAIR:
5359         atmax = size;
5360         /*
5361          * Misalignment implies !within16, and therefore half atomicity.
5362          * Any host prepared for two operations can implement this with
5363          * half alignment.
5364          */
5365         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5366             align = MAX(align, half);
5367         }
5368         break;
5369 
5370     case MO_ATOM_SUBALIGN:
5371         atmax = size;
5372         if (host_atom != MO_ATOM_SUBALIGN) {
5373             /* If unaligned but not odd, there are subobjects up to half. */
5374             if (allow_two_ops) {
5375                 align = MAX(align, half);
5376             } else {
5377                 align = MAX(align, size);
5378             }
5379         }
5380         break;
5381 
5382     default:
5383         g_assert_not_reached();
5384     }
5385 
5386     return (TCGAtomAlign){ .atom = atmax, .align = align };
5387 }
5388 
5389 /*
5390  * Similarly for qemu_ld/st slow path helpers.
5391  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5392  * using only the provided backend tcg_out_* functions.
5393  */
5394 
5395 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5396 {
5397     int ofs = arg_slot_stk_ofs(slot);
5398 
5399     /*
5400      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5401      * require extension to uint64_t, adjust the address for uint32_t.
5402      */
5403     if (HOST_BIG_ENDIAN &&
5404         TCG_TARGET_REG_BITS == 64 &&
5405         type == TCG_TYPE_I32) {
5406         ofs += 4;
5407     }
5408     return ofs;
5409 }
5410 
5411 static void tcg_out_helper_load_slots(TCGContext *s,
5412                                       unsigned nmov, TCGMovExtend *mov,
5413                                       const TCGLdstHelperParam *parm)
5414 {
5415     unsigned i;
5416     TCGReg dst3;
5417 
5418     /*
5419      * Start from the end, storing to the stack first.
5420      * This frees those registers, so we need not consider overlap.
5421      */
5422     for (i = nmov; i-- > 0; ) {
5423         unsigned slot = mov[i].dst;
5424 
5425         if (arg_slot_reg_p(slot)) {
5426             goto found_reg;
5427         }
5428 
5429         TCGReg src = mov[i].src;
5430         TCGType dst_type = mov[i].dst_type;
5431         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5432 
5433         /* The argument is going onto the stack; extend into scratch. */
5434         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5435             tcg_debug_assert(parm->ntmp != 0);
5436             mov[i].dst = src = parm->tmp[0];
5437             tcg_out_movext1(s, &mov[i]);
5438         }
5439 
5440         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5441                    tcg_out_helper_stk_ofs(dst_type, slot));
5442     }
5443     return;
5444 
5445  found_reg:
5446     /*
5447      * The remaining arguments are in registers.
5448      * Convert slot numbers to argument registers.
5449      */
5450     nmov = i + 1;
5451     for (i = 0; i < nmov; ++i) {
5452         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5453     }
5454 
5455     switch (nmov) {
5456     case 4:
5457         /* The backend must have provided enough temps for the worst case. */
5458         tcg_debug_assert(parm->ntmp >= 2);
5459 
5460         dst3 = mov[3].dst;
5461         for (unsigned j = 0; j < 3; ++j) {
5462             if (dst3 == mov[j].src) {
5463                 /*
5464                  * Conflict. Copy the source to a temporary, perform the
5465                  * remaining moves, then the extension from our scratch
5466                  * on the way out.
5467                  */
5468                 TCGReg scratch = parm->tmp[1];
5469 
5470                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5471                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5472                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5473                 break;
5474             }
5475         }
5476 
5477         /* No conflicts: perform this move and continue. */
5478         tcg_out_movext1(s, &mov[3]);
5479         /* fall through */
5480 
5481     case 3:
5482         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5483                         parm->ntmp ? parm->tmp[0] : -1);
5484         break;
5485     case 2:
5486         tcg_out_movext2(s, mov, mov + 1,
5487                         parm->ntmp ? parm->tmp[0] : -1);
5488         break;
5489     case 1:
5490         tcg_out_movext1(s, mov);
5491         break;
5492     default:
5493         g_assert_not_reached();
5494     }
5495 }
5496 
5497 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5498                                     TCGType type, tcg_target_long imm,
5499                                     const TCGLdstHelperParam *parm)
5500 {
5501     if (arg_slot_reg_p(slot)) {
5502         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5503     } else {
5504         int ofs = tcg_out_helper_stk_ofs(type, slot);
5505         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5506             tcg_debug_assert(parm->ntmp != 0);
5507             tcg_out_movi(s, type, parm->tmp[0], imm);
5508             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5509         }
5510     }
5511 }
5512 
5513 static void tcg_out_helper_load_common_args(TCGContext *s,
5514                                             const TCGLabelQemuLdst *ldst,
5515                                             const TCGLdstHelperParam *parm,
5516                                             const TCGHelperInfo *info,
5517                                             unsigned next_arg)
5518 {
5519     TCGMovExtend ptr_mov = {
5520         .dst_type = TCG_TYPE_PTR,
5521         .src_type = TCG_TYPE_PTR,
5522         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5523     };
5524     const TCGCallArgumentLoc *loc = &info->in[0];
5525     TCGType type;
5526     unsigned slot;
5527     tcg_target_ulong imm;
5528 
5529     /*
5530      * Handle env, which is always first.
5531      */
5532     ptr_mov.dst = loc->arg_slot;
5533     ptr_mov.src = TCG_AREG0;
5534     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5535 
5536     /*
5537      * Handle oi.
5538      */
5539     imm = ldst->oi;
5540     loc = &info->in[next_arg];
5541     type = TCG_TYPE_I32;
5542     switch (loc->kind) {
5543     case TCG_CALL_ARG_NORMAL:
5544         break;
5545     case TCG_CALL_ARG_EXTEND_U:
5546     case TCG_CALL_ARG_EXTEND_S:
5547         /* No extension required for MemOpIdx. */
5548         tcg_debug_assert(imm <= INT32_MAX);
5549         type = TCG_TYPE_REG;
5550         break;
5551     default:
5552         g_assert_not_reached();
5553     }
5554     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5555     next_arg++;
5556 
5557     /*
5558      * Handle ra.
5559      */
5560     loc = &info->in[next_arg];
5561     slot = loc->arg_slot;
5562     if (parm->ra_gen) {
5563         int arg_reg = -1;
5564         TCGReg ra_reg;
5565 
5566         if (arg_slot_reg_p(slot)) {
5567             arg_reg = tcg_target_call_iarg_regs[slot];
5568         }
5569         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5570 
5571         ptr_mov.dst = slot;
5572         ptr_mov.src = ra_reg;
5573         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5574     } else {
5575         imm = (uintptr_t)ldst->raddr;
5576         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5577     }
5578 }
5579 
5580 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5581                                        const TCGCallArgumentLoc *loc,
5582                                        TCGType dst_type, TCGType src_type,
5583                                        TCGReg lo, TCGReg hi)
5584 {
5585     MemOp reg_mo;
5586 
5587     if (dst_type <= TCG_TYPE_REG) {
5588         MemOp src_ext;
5589 
5590         switch (loc->kind) {
5591         case TCG_CALL_ARG_NORMAL:
5592             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5593             break;
5594         case TCG_CALL_ARG_EXTEND_U:
5595             dst_type = TCG_TYPE_REG;
5596             src_ext = MO_UL;
5597             break;
5598         case TCG_CALL_ARG_EXTEND_S:
5599             dst_type = TCG_TYPE_REG;
5600             src_ext = MO_SL;
5601             break;
5602         default:
5603             g_assert_not_reached();
5604         }
5605 
5606         mov[0].dst = loc->arg_slot;
5607         mov[0].dst_type = dst_type;
5608         mov[0].src = lo;
5609         mov[0].src_type = src_type;
5610         mov[0].src_ext = src_ext;
5611         return 1;
5612     }
5613 
5614     if (TCG_TARGET_REG_BITS == 32) {
5615         assert(dst_type == TCG_TYPE_I64);
5616         reg_mo = MO_32;
5617     } else {
5618         assert(dst_type == TCG_TYPE_I128);
5619         reg_mo = MO_64;
5620     }
5621 
5622     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5623     mov[0].src = lo;
5624     mov[0].dst_type = TCG_TYPE_REG;
5625     mov[0].src_type = TCG_TYPE_REG;
5626     mov[0].src_ext = reg_mo;
5627 
5628     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5629     mov[1].src = hi;
5630     mov[1].dst_type = TCG_TYPE_REG;
5631     mov[1].src_type = TCG_TYPE_REG;
5632     mov[1].src_ext = reg_mo;
5633 
5634     return 2;
5635 }
5636 
5637 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5638                                    const TCGLdstHelperParam *parm)
5639 {
5640     const TCGHelperInfo *info;
5641     const TCGCallArgumentLoc *loc;
5642     TCGMovExtend mov[2];
5643     unsigned next_arg, nmov;
5644     MemOp mop = get_memop(ldst->oi);
5645 
5646     switch (mop & MO_SIZE) {
5647     case MO_8:
5648     case MO_16:
5649     case MO_32:
5650         info = &info_helper_ld32_mmu;
5651         break;
5652     case MO_64:
5653         info = &info_helper_ld64_mmu;
5654         break;
5655     case MO_128:
5656         info = &info_helper_ld128_mmu;
5657         break;
5658     default:
5659         g_assert_not_reached();
5660     }
5661 
5662     /* Defer env argument. */
5663     next_arg = 1;
5664 
5665     loc = &info->in[next_arg];
5666     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5667         /*
5668          * 32-bit host with 32-bit guest: zero-extend the guest address
5669          * to 64-bits for the helper by storing the low part, then
5670          * load a zero for the high part.
5671          */
5672         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5673                                TCG_TYPE_I32, TCG_TYPE_I32,
5674                                ldst->addrlo_reg, -1);
5675         tcg_out_helper_load_slots(s, 1, mov, parm);
5676 
5677         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5678                                 TCG_TYPE_I32, 0, parm);
5679         next_arg += 2;
5680     } else {
5681         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5682                                       ldst->addrlo_reg, ldst->addrhi_reg);
5683         tcg_out_helper_load_slots(s, nmov, mov, parm);
5684         next_arg += nmov;
5685     }
5686 
5687     switch (info->out_kind) {
5688     case TCG_CALL_RET_NORMAL:
5689     case TCG_CALL_RET_BY_VEC:
5690         break;
5691     case TCG_CALL_RET_BY_REF:
5692         /*
5693          * The return reference is in the first argument slot.
5694          * We need memory in which to return: re-use the top of stack.
5695          */
5696         {
5697             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5698 
5699             if (arg_slot_reg_p(0)) {
5700                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5701                                  TCG_REG_CALL_STACK, ofs_slot0);
5702             } else {
5703                 tcg_debug_assert(parm->ntmp != 0);
5704                 tcg_out_addi_ptr(s, parm->tmp[0],
5705                                  TCG_REG_CALL_STACK, ofs_slot0);
5706                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5707                            TCG_REG_CALL_STACK, ofs_slot0);
5708             }
5709         }
5710         break;
5711     default:
5712         g_assert_not_reached();
5713     }
5714 
5715     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5716 }
5717 
5718 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5719                                   bool load_sign,
5720                                   const TCGLdstHelperParam *parm)
5721 {
5722     MemOp mop = get_memop(ldst->oi);
5723     TCGMovExtend mov[2];
5724     int ofs_slot0;
5725 
5726     switch (ldst->type) {
5727     case TCG_TYPE_I64:
5728         if (TCG_TARGET_REG_BITS == 32) {
5729             break;
5730         }
5731         /* fall through */
5732 
5733     case TCG_TYPE_I32:
5734         mov[0].dst = ldst->datalo_reg;
5735         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5736         mov[0].dst_type = ldst->type;
5737         mov[0].src_type = TCG_TYPE_REG;
5738 
5739         /*
5740          * If load_sign, then we allowed the helper to perform the
5741          * appropriate sign extension to tcg_target_ulong, and all
5742          * we need now is a plain move.
5743          *
5744          * If they do not, then we expect the relevant extension
5745          * instruction to be no more expensive than a move, and
5746          * we thus save the icache etc by only using one of two
5747          * helper functions.
5748          */
5749         if (load_sign || !(mop & MO_SIGN)) {
5750             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5751                 mov[0].src_ext = MO_32;
5752             } else {
5753                 mov[0].src_ext = MO_64;
5754             }
5755         } else {
5756             mov[0].src_ext = mop & MO_SSIZE;
5757         }
5758         tcg_out_movext1(s, mov);
5759         return;
5760 
5761     case TCG_TYPE_I128:
5762         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5763         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5764         switch (TCG_TARGET_CALL_RET_I128) {
5765         case TCG_CALL_RET_NORMAL:
5766             break;
5767         case TCG_CALL_RET_BY_VEC:
5768             tcg_out_st(s, TCG_TYPE_V128,
5769                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5770                        TCG_REG_CALL_STACK, ofs_slot0);
5771             /* fall through */
5772         case TCG_CALL_RET_BY_REF:
5773             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5774                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5775             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5776                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5777             return;
5778         default:
5779             g_assert_not_reached();
5780         }
5781         break;
5782 
5783     default:
5784         g_assert_not_reached();
5785     }
5786 
5787     mov[0].dst = ldst->datalo_reg;
5788     mov[0].src =
5789         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5790     mov[0].dst_type = TCG_TYPE_REG;
5791     mov[0].src_type = TCG_TYPE_REG;
5792     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5793 
5794     mov[1].dst = ldst->datahi_reg;
5795     mov[1].src =
5796         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5797     mov[1].dst_type = TCG_TYPE_REG;
5798     mov[1].src_type = TCG_TYPE_REG;
5799     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5800 
5801     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5802 }
5803 
5804 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5805                                    const TCGLdstHelperParam *parm)
5806 {
5807     const TCGHelperInfo *info;
5808     const TCGCallArgumentLoc *loc;
5809     TCGMovExtend mov[4];
5810     TCGType data_type;
5811     unsigned next_arg, nmov, n;
5812     MemOp mop = get_memop(ldst->oi);
5813 
5814     switch (mop & MO_SIZE) {
5815     case MO_8:
5816     case MO_16:
5817     case MO_32:
5818         info = &info_helper_st32_mmu;
5819         data_type = TCG_TYPE_I32;
5820         break;
5821     case MO_64:
5822         info = &info_helper_st64_mmu;
5823         data_type = TCG_TYPE_I64;
5824         break;
5825     case MO_128:
5826         info = &info_helper_st128_mmu;
5827         data_type = TCG_TYPE_I128;
5828         break;
5829     default:
5830         g_assert_not_reached();
5831     }
5832 
5833     /* Defer env argument. */
5834     next_arg = 1;
5835     nmov = 0;
5836 
5837     /* Handle addr argument. */
5838     loc = &info->in[next_arg];
5839     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5840         /*
5841          * 32-bit host with 32-bit guest: zero-extend the guest address
5842          * to 64-bits for the helper by storing the low part.  Later,
5843          * after we have processed the register inputs, we will load a
5844          * zero for the high part.
5845          */
5846         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5847                                TCG_TYPE_I32, TCG_TYPE_I32,
5848                                ldst->addrlo_reg, -1);
5849         next_arg += 2;
5850         nmov += 1;
5851     } else {
5852         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5853                                    ldst->addrlo_reg, ldst->addrhi_reg);
5854         next_arg += n;
5855         nmov += n;
5856     }
5857 
5858     /* Handle data argument. */
5859     loc = &info->in[next_arg];
5860     switch (loc->kind) {
5861     case TCG_CALL_ARG_NORMAL:
5862     case TCG_CALL_ARG_EXTEND_U:
5863     case TCG_CALL_ARG_EXTEND_S:
5864         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5865                                    ldst->datalo_reg, ldst->datahi_reg);
5866         next_arg += n;
5867         nmov += n;
5868         tcg_out_helper_load_slots(s, nmov, mov, parm);
5869         break;
5870 
5871     case TCG_CALL_ARG_BY_REF:
5872         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5873         tcg_debug_assert(data_type == TCG_TYPE_I128);
5874         tcg_out_st(s, TCG_TYPE_I64,
5875                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5876                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5877         tcg_out_st(s, TCG_TYPE_I64,
5878                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5879                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5880 
5881         tcg_out_helper_load_slots(s, nmov, mov, parm);
5882 
5883         if (arg_slot_reg_p(loc->arg_slot)) {
5884             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
5885                              TCG_REG_CALL_STACK,
5886                              arg_slot_stk_ofs(loc->ref_slot));
5887         } else {
5888             tcg_debug_assert(parm->ntmp != 0);
5889             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
5890                              arg_slot_stk_ofs(loc->ref_slot));
5891             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5892                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
5893         }
5894         next_arg += 2;
5895         break;
5896 
5897     default:
5898         g_assert_not_reached();
5899     }
5900 
5901     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5902         /* Zero extend the address by loading a zero for the high part. */
5903         loc = &info->in[1 + !HOST_BIG_ENDIAN];
5904         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
5905     }
5906 
5907     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5908 }
5909 
5910 void tcg_dump_op_count(GString *buf)
5911 {
5912     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5913 }
5914 
5915 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
5916 {
5917     int i, start_words, num_insns;
5918     TCGOp *op;
5919 
5920     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
5921                  && qemu_log_in_addr_range(pc_start))) {
5922         FILE *logfile = qemu_log_trylock();
5923         if (logfile) {
5924             fprintf(logfile, "OP:\n");
5925             tcg_dump_ops(s, logfile, false);
5926             fprintf(logfile, "\n");
5927             qemu_log_unlock(logfile);
5928         }
5929     }
5930 
5931 #ifdef CONFIG_DEBUG_TCG
5932     /* Ensure all labels referenced have been emitted.  */
5933     {
5934         TCGLabel *l;
5935         bool error = false;
5936 
5937         QSIMPLEQ_FOREACH(l, &s->labels, next) {
5938             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
5939                 qemu_log_mask(CPU_LOG_TB_OP,
5940                               "$L%d referenced but not present.\n", l->id);
5941                 error = true;
5942             }
5943         }
5944         assert(!error);
5945     }
5946 #endif
5947 
5948     tcg_optimize(s);
5949 
5950     reachable_code_pass(s);
5951     liveness_pass_0(s);
5952     liveness_pass_1(s);
5953 
5954     if (s->nb_indirects > 0) {
5955         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
5956                      && qemu_log_in_addr_range(pc_start))) {
5957             FILE *logfile = qemu_log_trylock();
5958             if (logfile) {
5959                 fprintf(logfile, "OP before indirect lowering:\n");
5960                 tcg_dump_ops(s, logfile, false);
5961                 fprintf(logfile, "\n");
5962                 qemu_log_unlock(logfile);
5963             }
5964         }
5965 
5966         /* Replace indirect temps with direct temps.  */
5967         if (liveness_pass_2(s)) {
5968             /* If changes were made, re-run liveness.  */
5969             liveness_pass_1(s);
5970         }
5971     }
5972 
5973     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5974                  && qemu_log_in_addr_range(pc_start))) {
5975         FILE *logfile = qemu_log_trylock();
5976         if (logfile) {
5977             fprintf(logfile, "OP after optimization and liveness analysis:\n");
5978             tcg_dump_ops(s, logfile, true);
5979             fprintf(logfile, "\n");
5980             qemu_log_unlock(logfile);
5981         }
5982     }
5983 
5984     /* Initialize goto_tb jump offsets. */
5985     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
5986     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
5987     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
5988     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
5989 
5990     tcg_reg_alloc_start(s);
5991 
5992     /*
5993      * Reset the buffer pointers when restarting after overflow.
5994      * TODO: Move this into translate-all.c with the rest of the
5995      * buffer management.  Having only this done here is confusing.
5996      */
5997     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
5998     s->code_ptr = s->code_buf;
5999 
6000 #ifdef TCG_TARGET_NEED_LDST_LABELS
6001     QSIMPLEQ_INIT(&s->ldst_labels);
6002 #endif
6003 #ifdef TCG_TARGET_NEED_POOL_LABELS
6004     s->pool_labels = NULL;
6005 #endif
6006 
6007     start_words = s->insn_start_words;
6008     s->gen_insn_data =
6009         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6010 
6011     num_insns = -1;
6012     QTAILQ_FOREACH(op, &s->ops, link) {
6013         TCGOpcode opc = op->opc;
6014 
6015         switch (opc) {
6016         case INDEX_op_mov_i32:
6017         case INDEX_op_mov_i64:
6018         case INDEX_op_mov_vec:
6019             tcg_reg_alloc_mov(s, op);
6020             break;
6021         case INDEX_op_dup_vec:
6022             tcg_reg_alloc_dup(s, op);
6023             break;
6024         case INDEX_op_insn_start:
6025             if (num_insns >= 0) {
6026                 size_t off = tcg_current_code_size(s);
6027                 s->gen_insn_end_off[num_insns] = off;
6028                 /* Assert that we do not overflow our stored offset.  */
6029                 assert(s->gen_insn_end_off[num_insns] == off);
6030             }
6031             num_insns++;
6032             for (i = 0; i < start_words; ++i) {
6033                 s->gen_insn_data[num_insns * start_words + i] =
6034                     tcg_get_insn_start_param(op, i);
6035             }
6036             break;
6037         case INDEX_op_discard:
6038             temp_dead(s, arg_temp(op->args[0]));
6039             break;
6040         case INDEX_op_set_label:
6041             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6042             tcg_out_label(s, arg_label(op->args[0]));
6043             break;
6044         case INDEX_op_call:
6045             tcg_reg_alloc_call(s, op);
6046             break;
6047         case INDEX_op_exit_tb:
6048             tcg_out_exit_tb(s, op->args[0]);
6049             break;
6050         case INDEX_op_goto_tb:
6051             tcg_out_goto_tb(s, op->args[0]);
6052             break;
6053         case INDEX_op_dup2_vec:
6054             if (tcg_reg_alloc_dup2(s, op)) {
6055                 break;
6056             }
6057             /* fall through */
6058         default:
6059             /* Sanity check that we've not introduced any unhandled opcodes. */
6060             tcg_debug_assert(tcg_op_supported(opc));
6061             /* Note: in order to speed up the code, it would be much
6062                faster to have specialized register allocator functions for
6063                some common argument patterns */
6064             tcg_reg_alloc_op(s, op);
6065             break;
6066         }
6067         /* Test for (pending) buffer overflow.  The assumption is that any
6068            one operation beginning below the high water mark cannot overrun
6069            the buffer completely.  Thus we can test for overflow after
6070            generating code without having to check during generation.  */
6071         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6072             return -1;
6073         }
6074         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6075         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6076             return -2;
6077         }
6078     }
6079     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6080     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6081 
6082     /* Generate TB finalization at the end of block */
6083 #ifdef TCG_TARGET_NEED_LDST_LABELS
6084     i = tcg_out_ldst_finalize(s);
6085     if (i < 0) {
6086         return i;
6087     }
6088 #endif
6089 #ifdef TCG_TARGET_NEED_POOL_LABELS
6090     i = tcg_out_pool_finalize(s);
6091     if (i < 0) {
6092         return i;
6093     }
6094 #endif
6095     if (!tcg_resolve_relocs(s)) {
6096         return -2;
6097     }
6098 
6099 #ifndef CONFIG_TCG_INTERPRETER
6100     /* flush instruction cache */
6101     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6102                         (uintptr_t)s->code_buf,
6103                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6104 #endif
6105 
6106     return tcg_current_code_size(s);
6107 }
6108 
6109 void tcg_dump_info(GString *buf)
6110 {
6111     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
6112 }
6113 
6114 #ifdef ELF_HOST_MACHINE
6115 /* In order to use this feature, the backend needs to do three things:
6116 
6117    (1) Define ELF_HOST_MACHINE to indicate both what value to
6118        put into the ELF image and to indicate support for the feature.
6119 
6120    (2) Define tcg_register_jit.  This should create a buffer containing
6121        the contents of a .debug_frame section that describes the post-
6122        prologue unwind info for the tcg machine.
6123 
6124    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6125 */
6126 
6127 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6128 typedef enum {
6129     JIT_NOACTION = 0,
6130     JIT_REGISTER_FN,
6131     JIT_UNREGISTER_FN
6132 } jit_actions_t;
6133 
6134 struct jit_code_entry {
6135     struct jit_code_entry *next_entry;
6136     struct jit_code_entry *prev_entry;
6137     const void *symfile_addr;
6138     uint64_t symfile_size;
6139 };
6140 
6141 struct jit_descriptor {
6142     uint32_t version;
6143     uint32_t action_flag;
6144     struct jit_code_entry *relevant_entry;
6145     struct jit_code_entry *first_entry;
6146 };
6147 
6148 void __jit_debug_register_code(void) __attribute__((noinline));
6149 void __jit_debug_register_code(void)
6150 {
6151     asm("");
6152 }
6153 
6154 /* Must statically initialize the version, because GDB may check
6155    the version before we can set it.  */
6156 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6157 
6158 /* End GDB interface.  */
6159 
6160 static int find_string(const char *strtab, const char *str)
6161 {
6162     const char *p = strtab + 1;
6163 
6164     while (1) {
6165         if (strcmp(p, str) == 0) {
6166             return p - strtab;
6167         }
6168         p += strlen(p) + 1;
6169     }
6170 }
6171 
6172 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6173                                  const void *debug_frame,
6174                                  size_t debug_frame_size)
6175 {
6176     struct __attribute__((packed)) DebugInfo {
6177         uint32_t  len;
6178         uint16_t  version;
6179         uint32_t  abbrev;
6180         uint8_t   ptr_size;
6181         uint8_t   cu_die;
6182         uint16_t  cu_lang;
6183         uintptr_t cu_low_pc;
6184         uintptr_t cu_high_pc;
6185         uint8_t   fn_die;
6186         char      fn_name[16];
6187         uintptr_t fn_low_pc;
6188         uintptr_t fn_high_pc;
6189         uint8_t   cu_eoc;
6190     };
6191 
6192     struct ElfImage {
6193         ElfW(Ehdr) ehdr;
6194         ElfW(Phdr) phdr;
6195         ElfW(Shdr) shdr[7];
6196         ElfW(Sym)  sym[2];
6197         struct DebugInfo di;
6198         uint8_t    da[24];
6199         char       str[80];
6200     };
6201 
6202     struct ElfImage *img;
6203 
6204     static const struct ElfImage img_template = {
6205         .ehdr = {
6206             .e_ident[EI_MAG0] = ELFMAG0,
6207             .e_ident[EI_MAG1] = ELFMAG1,
6208             .e_ident[EI_MAG2] = ELFMAG2,
6209             .e_ident[EI_MAG3] = ELFMAG3,
6210             .e_ident[EI_CLASS] = ELF_CLASS,
6211             .e_ident[EI_DATA] = ELF_DATA,
6212             .e_ident[EI_VERSION] = EV_CURRENT,
6213             .e_type = ET_EXEC,
6214             .e_machine = ELF_HOST_MACHINE,
6215             .e_version = EV_CURRENT,
6216             .e_phoff = offsetof(struct ElfImage, phdr),
6217             .e_shoff = offsetof(struct ElfImage, shdr),
6218             .e_ehsize = sizeof(ElfW(Shdr)),
6219             .e_phentsize = sizeof(ElfW(Phdr)),
6220             .e_phnum = 1,
6221             .e_shentsize = sizeof(ElfW(Shdr)),
6222             .e_shnum = ARRAY_SIZE(img->shdr),
6223             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6224 #ifdef ELF_HOST_FLAGS
6225             .e_flags = ELF_HOST_FLAGS,
6226 #endif
6227 #ifdef ELF_OSABI
6228             .e_ident[EI_OSABI] = ELF_OSABI,
6229 #endif
6230         },
6231         .phdr = {
6232             .p_type = PT_LOAD,
6233             .p_flags = PF_X,
6234         },
6235         .shdr = {
6236             [0] = { .sh_type = SHT_NULL },
6237             /* Trick: The contents of code_gen_buffer are not present in
6238                this fake ELF file; that got allocated elsewhere.  Therefore
6239                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6240                will not look for contents.  We can record any address.  */
6241             [1] = { /* .text */
6242                 .sh_type = SHT_NOBITS,
6243                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6244             },
6245             [2] = { /* .debug_info */
6246                 .sh_type = SHT_PROGBITS,
6247                 .sh_offset = offsetof(struct ElfImage, di),
6248                 .sh_size = sizeof(struct DebugInfo),
6249             },
6250             [3] = { /* .debug_abbrev */
6251                 .sh_type = SHT_PROGBITS,
6252                 .sh_offset = offsetof(struct ElfImage, da),
6253                 .sh_size = sizeof(img->da),
6254             },
6255             [4] = { /* .debug_frame */
6256                 .sh_type = SHT_PROGBITS,
6257                 .sh_offset = sizeof(struct ElfImage),
6258             },
6259             [5] = { /* .symtab */
6260                 .sh_type = SHT_SYMTAB,
6261                 .sh_offset = offsetof(struct ElfImage, sym),
6262                 .sh_size = sizeof(img->sym),
6263                 .sh_info = 1,
6264                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6265                 .sh_entsize = sizeof(ElfW(Sym)),
6266             },
6267             [6] = { /* .strtab */
6268                 .sh_type = SHT_STRTAB,
6269                 .sh_offset = offsetof(struct ElfImage, str),
6270                 .sh_size = sizeof(img->str),
6271             }
6272         },
6273         .sym = {
6274             [1] = { /* code_gen_buffer */
6275                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6276                 .st_shndx = 1,
6277             }
6278         },
6279         .di = {
6280             .len = sizeof(struct DebugInfo) - 4,
6281             .version = 2,
6282             .ptr_size = sizeof(void *),
6283             .cu_die = 1,
6284             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6285             .fn_die = 2,
6286             .fn_name = "code_gen_buffer"
6287         },
6288         .da = {
6289             1,          /* abbrev number (the cu) */
6290             0x11, 1,    /* DW_TAG_compile_unit, has children */
6291             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6292             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6293             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6294             0, 0,       /* end of abbrev */
6295             2,          /* abbrev number (the fn) */
6296             0x2e, 0,    /* DW_TAG_subprogram, no children */
6297             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6298             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6299             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6300             0, 0,       /* end of abbrev */
6301             0           /* no more abbrev */
6302         },
6303         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6304                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6305     };
6306 
6307     /* We only need a single jit entry; statically allocate it.  */
6308     static struct jit_code_entry one_entry;
6309 
6310     uintptr_t buf = (uintptr_t)buf_ptr;
6311     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6312     DebugFrameHeader *dfh;
6313 
6314     img = g_malloc(img_size);
6315     *img = img_template;
6316 
6317     img->phdr.p_vaddr = buf;
6318     img->phdr.p_paddr = buf;
6319     img->phdr.p_memsz = buf_size;
6320 
6321     img->shdr[1].sh_name = find_string(img->str, ".text");
6322     img->shdr[1].sh_addr = buf;
6323     img->shdr[1].sh_size = buf_size;
6324 
6325     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6326     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6327 
6328     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6329     img->shdr[4].sh_size = debug_frame_size;
6330 
6331     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6332     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6333 
6334     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6335     img->sym[1].st_value = buf;
6336     img->sym[1].st_size = buf_size;
6337 
6338     img->di.cu_low_pc = buf;
6339     img->di.cu_high_pc = buf + buf_size;
6340     img->di.fn_low_pc = buf;
6341     img->di.fn_high_pc = buf + buf_size;
6342 
6343     dfh = (DebugFrameHeader *)(img + 1);
6344     memcpy(dfh, debug_frame, debug_frame_size);
6345     dfh->fde.func_start = buf;
6346     dfh->fde.func_len = buf_size;
6347 
6348 #ifdef DEBUG_JIT
6349     /* Enable this block to be able to debug the ELF image file creation.
6350        One can use readelf, objdump, or other inspection utilities.  */
6351     {
6352         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6353         FILE *f = fopen(jit, "w+b");
6354         if (f) {
6355             if (fwrite(img, img_size, 1, f) != img_size) {
6356                 /* Avoid stupid unused return value warning for fwrite.  */
6357             }
6358             fclose(f);
6359         }
6360     }
6361 #endif
6362 
6363     one_entry.symfile_addr = img;
6364     one_entry.symfile_size = img_size;
6365 
6366     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6367     __jit_debug_descriptor.relevant_entry = &one_entry;
6368     __jit_debug_descriptor.first_entry = &one_entry;
6369     __jit_debug_register_code();
6370 }
6371 #else
6372 /* No support for the feature.  Provide the entry point expected by exec.c,
6373    and implement the internal function we declared earlier.  */
6374 
6375 static void tcg_register_jit_int(const void *buf, size_t size,
6376                                  const void *debug_frame,
6377                                  size_t debug_frame_size)
6378 {
6379 }
6380 
6381 void tcg_register_jit(const void *buf, size_t buf_size)
6382 {
6383 }
6384 #endif /* ELF_HOST_MACHINE */
6385 
6386 #if !TCG_TARGET_MAYBE_vec
6387 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6388 {
6389     g_assert_not_reached();
6390 }
6391 #endif
6392