xref: /openbmc/qemu/tcg/tcg.c (revision 70f168f8)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 
38 /* Note: the long term plan is to reduce the dependencies on the QEMU
39    CPU definitions. Currently they are used for qemu_ld/st
40    instructions */
41 #define NO_CPU_IO_DEFS
42 
43 #include "exec/exec-all.h"
44 #include "exec/tlb-common.h"
45 #include "tcg/tcg-op.h"
46 
47 #if UINTPTR_MAX == UINT32_MAX
48 # define ELF_CLASS  ELFCLASS32
49 #else
50 # define ELF_CLASS  ELFCLASS64
51 #endif
52 #if HOST_BIG_ENDIAN
53 # define ELF_DATA   ELFDATA2MSB
54 #else
55 # define ELF_DATA   ELFDATA2LSB
56 #endif
57 
58 #include "elf.h"
59 #include "exec/log.h"
60 #include "tcg/tcg-ldst.h"
61 #include "tcg/tcg-temp-internal.h"
62 #include "tcg-internal.h"
63 #include "accel/tcg/perf.h"
64 #ifdef CONFIG_USER_ONLY
65 #include "exec/user/guest-base.h"
66 #endif
67 
68 /* Forward declarations for functions declared in tcg-target.c.inc and
69    used here. */
70 static void tcg_target_init(TCGContext *s);
71 static void tcg_target_qemu_prologue(TCGContext *s);
72 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
73                         intptr_t value, intptr_t addend);
74 
75 /* The CIE and FDE header definitions will be common to all hosts.  */
76 typedef struct {
77     uint32_t len __attribute__((aligned((sizeof(void *)))));
78     uint32_t id;
79     uint8_t version;
80     char augmentation[1];
81     uint8_t code_align;
82     uint8_t data_align;
83     uint8_t return_column;
84 } DebugFrameCIE;
85 
86 typedef struct QEMU_PACKED {
87     uint32_t len __attribute__((aligned((sizeof(void *)))));
88     uint32_t cie_offset;
89     uintptr_t func_start;
90     uintptr_t func_len;
91 } DebugFrameFDEHeader;
92 
93 typedef struct QEMU_PACKED {
94     DebugFrameCIE cie;
95     DebugFrameFDEHeader fde;
96 } DebugFrameHeader;
97 
98 typedef struct TCGLabelQemuLdst {
99     bool is_ld;             /* qemu_ld: true, qemu_st: false */
100     MemOpIdx oi;
101     TCGType type;           /* result type of a load */
102     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
103     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 } TCGLabelQemuLdst;
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
118                        intptr_t arg2);
119 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
120 static void tcg_out_movi(TCGContext *s, TCGType type,
121                          TCGReg ret, tcg_target_long arg);
122 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
123 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
132 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
133 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
134 static void tcg_out_goto_tb(TCGContext *s, int which);
135 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
136                        const TCGArg args[TCG_MAX_OP_ARGS],
137                        const int const_args[TCG_MAX_OP_ARGS]);
138 #if TCG_TARGET_MAYBE_vec
139 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
140                             TCGReg dst, TCGReg src);
141 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
142                              TCGReg dst, TCGReg base, intptr_t offset);
143 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
144                              TCGReg dst, int64_t arg);
145 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
146                            unsigned vecl, unsigned vece,
147                            const TCGArg args[TCG_MAX_OP_ARGS],
148                            const int const_args[TCG_MAX_OP_ARGS]);
149 #else
150 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
151                                    TCGReg dst, TCGReg src)
152 {
153     g_assert_not_reached();
154 }
155 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
156                                     TCGReg dst, TCGReg base, intptr_t offset)
157 {
158     g_assert_not_reached();
159 }
160 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
161                                     TCGReg dst, int64_t arg)
162 {
163     g_assert_not_reached();
164 }
165 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
166                                   unsigned vecl, unsigned vece,
167                                   const TCGArg args[TCG_MAX_OP_ARGS],
168                                   const int const_args[TCG_MAX_OP_ARGS])
169 {
170     g_assert_not_reached();
171 }
172 #endif
173 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
174                        intptr_t arg2);
175 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
176                         TCGReg base, intptr_t ofs);
177 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
178                          const TCGHelperInfo *info);
179 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
180 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
181 #ifdef TCG_TARGET_NEED_LDST_LABELS
182 static int tcg_out_ldst_finalize(TCGContext *s);
183 #endif
184 
185 typedef struct TCGLdstHelperParam {
186     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
187     unsigned ntmp;
188     int tmp[3];
189 } TCGLdstHelperParam;
190 
191 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
192                                    const TCGLdstHelperParam *p)
193     __attribute__((unused));
194 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
195                                   bool load_sign, const TCGLdstHelperParam *p)
196     __attribute__((unused));
197 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
198                                    const TCGLdstHelperParam *p)
199     __attribute__((unused));
200 
201 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
202     [MO_UB] = helper_ldub_mmu,
203     [MO_SB] = helper_ldsb_mmu,
204     [MO_UW] = helper_lduw_mmu,
205     [MO_SW] = helper_ldsw_mmu,
206     [MO_UL] = helper_ldul_mmu,
207     [MO_UQ] = helper_ldq_mmu,
208 #if TCG_TARGET_REG_BITS == 64
209     [MO_SL] = helper_ldsl_mmu,
210     [MO_128] = helper_ld16_mmu,
211 #endif
212 };
213 
214 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
215     [MO_8]  = helper_stb_mmu,
216     [MO_16] = helper_stw_mmu,
217     [MO_32] = helper_stl_mmu,
218     [MO_64] = helper_stq_mmu,
219 #if TCG_TARGET_REG_BITS == 64
220     [MO_128] = helper_st16_mmu,
221 #endif
222 };
223 
224 typedef struct {
225     MemOp atom;   /* lg2 bits of atomicity required */
226     MemOp align;  /* lg2 bits of alignment to use */
227 } TCGAtomAlign;
228 
229 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
230                                            MemOp host_atom, bool allow_two_ops)
231     __attribute__((unused));
232 
233 TCGContext tcg_init_ctx;
234 __thread TCGContext *tcg_ctx;
235 
236 TCGContext **tcg_ctxs;
237 unsigned int tcg_cur_ctxs;
238 unsigned int tcg_max_ctxs;
239 TCGv_env cpu_env = 0;
240 const void *tcg_code_gen_epilogue;
241 uintptr_t tcg_splitwx_diff;
242 
243 #ifndef CONFIG_TCG_INTERPRETER
244 tcg_prologue_fn *tcg_qemu_tb_exec;
245 #endif
246 
247 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
248 static TCGRegSet tcg_target_call_clobber_regs;
249 
250 #if TCG_TARGET_INSN_UNIT_SIZE == 1
251 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
252 {
253     *s->code_ptr++ = v;
254 }
255 
256 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
257                                                       uint8_t v)
258 {
259     *p = v;
260 }
261 #endif
262 
263 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
264 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
265 {
266     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
267         *s->code_ptr++ = v;
268     } else {
269         tcg_insn_unit *p = s->code_ptr;
270         memcpy(p, &v, sizeof(v));
271         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
272     }
273 }
274 
275 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
276                                                        uint16_t v)
277 {
278     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
279         *p = v;
280     } else {
281         memcpy(p, &v, sizeof(v));
282     }
283 }
284 #endif
285 
286 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
287 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
290         *s->code_ptr++ = v;
291     } else {
292         tcg_insn_unit *p = s->code_ptr;
293         memcpy(p, &v, sizeof(v));
294         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
295     }
296 }
297 
298 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
299                                                        uint32_t v)
300 {
301     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
302         *p = v;
303     } else {
304         memcpy(p, &v, sizeof(v));
305     }
306 }
307 #endif
308 
309 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
310 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
313         *s->code_ptr++ = v;
314     } else {
315         tcg_insn_unit *p = s->code_ptr;
316         memcpy(p, &v, sizeof(v));
317         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
318     }
319 }
320 
321 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
322                                                        uint64_t v)
323 {
324     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
325         *p = v;
326     } else {
327         memcpy(p, &v, sizeof(v));
328     }
329 }
330 #endif
331 
332 /* label relocation processing */
333 
334 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
335                           TCGLabel *l, intptr_t addend)
336 {
337     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
338 
339     r->type = type;
340     r->ptr = code_ptr;
341     r->addend = addend;
342     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
343 }
344 
345 static void tcg_out_label(TCGContext *s, TCGLabel *l)
346 {
347     tcg_debug_assert(!l->has_value);
348     l->has_value = 1;
349     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
350 }
351 
352 TCGLabel *gen_new_label(void)
353 {
354     TCGContext *s = tcg_ctx;
355     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
356 
357     memset(l, 0, sizeof(TCGLabel));
358     l->id = s->nb_labels++;
359     QSIMPLEQ_INIT(&l->branches);
360     QSIMPLEQ_INIT(&l->relocs);
361 
362     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
363 
364     return l;
365 }
366 
367 static bool tcg_resolve_relocs(TCGContext *s)
368 {
369     TCGLabel *l;
370 
371     QSIMPLEQ_FOREACH(l, &s->labels, next) {
372         TCGRelocation *r;
373         uintptr_t value = l->u.value;
374 
375         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
376             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
377                 return false;
378             }
379         }
380     }
381     return true;
382 }
383 
384 static void set_jmp_reset_offset(TCGContext *s, int which)
385 {
386     /*
387      * We will check for overflow at the end of the opcode loop in
388      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
389      */
390     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
391 }
392 
393 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
394 {
395     /*
396      * We will check for overflow at the end of the opcode loop in
397      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
398      */
399     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
400 }
401 
402 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
403 {
404     /*
405      * Return the read-execute version of the pointer, for the benefit
406      * of any pc-relative addressing mode.
407      */
408     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
409 }
410 
411 #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
412 static int tlb_mask_table_ofs(TCGContext *s, int which)
413 {
414     return s->tlb_fast_offset + which * sizeof(CPUTLBDescFast);
415 }
416 #endif
417 
418 /* Signal overflow, starting over with fewer guest insns. */
419 static G_NORETURN
420 void tcg_raise_tb_overflow(TCGContext *s)
421 {
422     siglongjmp(s->jmp_trans, -2);
423 }
424 
425 /*
426  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
427  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
428  *
429  * However, tcg_out_helper_load_slots reuses this field to hold an
430  * argument slot number (which may designate a argument register or an
431  * argument stack slot), converting to TCGReg once all arguments that
432  * are destined for the stack are processed.
433  */
434 typedef struct TCGMovExtend {
435     unsigned dst;
436     TCGReg src;
437     TCGType dst_type;
438     TCGType src_type;
439     MemOp src_ext;
440 } TCGMovExtend;
441 
442 /**
443  * tcg_out_movext -- move and extend
444  * @s: tcg context
445  * @dst_type: integral type for destination
446  * @dst: destination register
447  * @src_type: integral type for source
448  * @src_ext: extension to apply to source
449  * @src: source register
450  *
451  * Move or extend @src into @dst, depending on @src_ext and the types.
452  */
453 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
454                            TCGType src_type, MemOp src_ext, TCGReg src)
455 {
456     switch (src_ext) {
457     case MO_UB:
458         tcg_out_ext8u(s, dst, src);
459         break;
460     case MO_SB:
461         tcg_out_ext8s(s, dst_type, dst, src);
462         break;
463     case MO_UW:
464         tcg_out_ext16u(s, dst, src);
465         break;
466     case MO_SW:
467         tcg_out_ext16s(s, dst_type, dst, src);
468         break;
469     case MO_UL:
470     case MO_SL:
471         if (dst_type == TCG_TYPE_I32) {
472             if (src_type == TCG_TYPE_I32) {
473                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
474             } else {
475                 tcg_out_extrl_i64_i32(s, dst, src);
476             }
477         } else if (src_type == TCG_TYPE_I32) {
478             if (src_ext & MO_SIGN) {
479                 tcg_out_exts_i32_i64(s, dst, src);
480             } else {
481                 tcg_out_extu_i32_i64(s, dst, src);
482             }
483         } else {
484             if (src_ext & MO_SIGN) {
485                 tcg_out_ext32s(s, dst, src);
486             } else {
487                 tcg_out_ext32u(s, dst, src);
488             }
489         }
490         break;
491     case MO_UQ:
492         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
493         if (dst_type == TCG_TYPE_I32) {
494             tcg_out_extrl_i64_i32(s, dst, src);
495         } else {
496             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
497         }
498         break;
499     default:
500         g_assert_not_reached();
501     }
502 }
503 
504 /* Minor variations on a theme, using a structure. */
505 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
506                                     TCGReg src)
507 {
508     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
509 }
510 
511 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
512 {
513     tcg_out_movext1_new_src(s, i, i->src);
514 }
515 
516 /**
517  * tcg_out_movext2 -- move and extend two pair
518  * @s: tcg context
519  * @i1: first move description
520  * @i2: second move description
521  * @scratch: temporary register, or -1 for none
522  *
523  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
524  * between the sources and destinations.
525  */
526 
527 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
528                             const TCGMovExtend *i2, int scratch)
529 {
530     TCGReg src1 = i1->src;
531     TCGReg src2 = i2->src;
532 
533     if (i1->dst != src2) {
534         tcg_out_movext1(s, i1);
535         tcg_out_movext1(s, i2);
536         return;
537     }
538     if (i2->dst == src1) {
539         TCGType src1_type = i1->src_type;
540         TCGType src2_type = i2->src_type;
541 
542         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
543             /* The data is now in the correct registers, now extend. */
544             src1 = i2->src;
545             src2 = i1->src;
546         } else {
547             tcg_debug_assert(scratch >= 0);
548             tcg_out_mov(s, src1_type, scratch, src1);
549             src1 = scratch;
550         }
551     }
552     tcg_out_movext1_new_src(s, i2, src2);
553     tcg_out_movext1_new_src(s, i1, src1);
554 }
555 
556 /**
557  * tcg_out_movext3 -- move and extend three pair
558  * @s: tcg context
559  * @i1: first move description
560  * @i2: second move description
561  * @i3: third move description
562  * @scratch: temporary register, or -1 for none
563  *
564  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
565  * between the sources and destinations.
566  */
567 
568 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
569                             const TCGMovExtend *i2, const TCGMovExtend *i3,
570                             int scratch)
571 {
572     TCGReg src1 = i1->src;
573     TCGReg src2 = i2->src;
574     TCGReg src3 = i3->src;
575 
576     if (i1->dst != src2 && i1->dst != src3) {
577         tcg_out_movext1(s, i1);
578         tcg_out_movext2(s, i2, i3, scratch);
579         return;
580     }
581     if (i2->dst != src1 && i2->dst != src3) {
582         tcg_out_movext1(s, i2);
583         tcg_out_movext2(s, i1, i3, scratch);
584         return;
585     }
586     if (i3->dst != src1 && i3->dst != src2) {
587         tcg_out_movext1(s, i3);
588         tcg_out_movext2(s, i1, i2, scratch);
589         return;
590     }
591 
592     /*
593      * There is a cycle.  Since there are only 3 nodes, the cycle is
594      * either "clockwise" or "anti-clockwise", and can be solved with
595      * a single scratch or two xchg.
596      */
597     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
598         /* "Clockwise" */
599         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
600             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
601             /* The data is now in the correct registers, now extend. */
602             tcg_out_movext1_new_src(s, i1, i1->dst);
603             tcg_out_movext1_new_src(s, i2, i2->dst);
604             tcg_out_movext1_new_src(s, i3, i3->dst);
605         } else {
606             tcg_debug_assert(scratch >= 0);
607             tcg_out_mov(s, i1->src_type, scratch, src1);
608             tcg_out_movext1(s, i3);
609             tcg_out_movext1(s, i2);
610             tcg_out_movext1_new_src(s, i1, scratch);
611         }
612     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
613         /* "Anti-clockwise" */
614         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
615             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
616             /* The data is now in the correct registers, now extend. */
617             tcg_out_movext1_new_src(s, i1, i1->dst);
618             tcg_out_movext1_new_src(s, i2, i2->dst);
619             tcg_out_movext1_new_src(s, i3, i3->dst);
620         } else {
621             tcg_debug_assert(scratch >= 0);
622             tcg_out_mov(s, i1->src_type, scratch, src1);
623             tcg_out_movext1(s, i2);
624             tcg_out_movext1(s, i3);
625             tcg_out_movext1_new_src(s, i1, scratch);
626         }
627     } else {
628         g_assert_not_reached();
629     }
630 }
631 
632 #define C_PFX1(P, A)                    P##A
633 #define C_PFX2(P, A, B)                 P##A##_##B
634 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
635 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
636 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
637 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
638 
639 /* Define an enumeration for the various combinations. */
640 
641 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
642 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
643 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
644 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
645 
646 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
647 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
648 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
649 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
650 
651 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
652 
653 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
654 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
655 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
656 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
657 
658 typedef enum {
659 #include "tcg-target-con-set.h"
660 } TCGConstraintSetIndex;
661 
662 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
663 
664 #undef C_O0_I1
665 #undef C_O0_I2
666 #undef C_O0_I3
667 #undef C_O0_I4
668 #undef C_O1_I1
669 #undef C_O1_I2
670 #undef C_O1_I3
671 #undef C_O1_I4
672 #undef C_N1_I2
673 #undef C_O2_I1
674 #undef C_O2_I2
675 #undef C_O2_I3
676 #undef C_O2_I4
677 
678 /* Put all of the constraint sets into an array, indexed by the enum. */
679 
680 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
681 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
682 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
683 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
684 
685 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
686 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
687 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
688 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
689 
690 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
691 
692 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
693 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
694 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
695 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
696 
697 static const TCGTargetOpDef constraint_sets[] = {
698 #include "tcg-target-con-set.h"
699 };
700 
701 
702 #undef C_O0_I1
703 #undef C_O0_I2
704 #undef C_O0_I3
705 #undef C_O0_I4
706 #undef C_O1_I1
707 #undef C_O1_I2
708 #undef C_O1_I3
709 #undef C_O1_I4
710 #undef C_N1_I2
711 #undef C_O2_I1
712 #undef C_O2_I2
713 #undef C_O2_I3
714 #undef C_O2_I4
715 
716 /* Expand the enumerator to be returned from tcg_target_op_def(). */
717 
718 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
719 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
720 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
721 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
722 
723 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
724 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
725 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
726 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
727 
728 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
729 
730 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
731 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
732 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
733 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
734 
735 #include "tcg-target.c.inc"
736 
737 static void alloc_tcg_plugin_context(TCGContext *s)
738 {
739 #ifdef CONFIG_PLUGIN
740     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
741     s->plugin_tb->insns =
742         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
743 #endif
744 }
745 
746 /*
747  * All TCG threads except the parent (i.e. the one that called tcg_context_init
748  * and registered the target's TCG globals) must register with this function
749  * before initiating translation.
750  *
751  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
752  * of tcg_region_init() for the reasoning behind this.
753  *
754  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
755  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
756  * is not used anymore for translation once this function is called.
757  *
758  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
759  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
760  */
761 #ifdef CONFIG_USER_ONLY
762 void tcg_register_thread(void)
763 {
764     tcg_ctx = &tcg_init_ctx;
765 }
766 #else
767 void tcg_register_thread(void)
768 {
769     TCGContext *s = g_malloc(sizeof(*s));
770     unsigned int i, n;
771 
772     *s = tcg_init_ctx;
773 
774     /* Relink mem_base.  */
775     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
776         if (tcg_init_ctx.temps[i].mem_base) {
777             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
778             tcg_debug_assert(b >= 0 && b < n);
779             s->temps[i].mem_base = &s->temps[b];
780         }
781     }
782 
783     /* Claim an entry in tcg_ctxs */
784     n = qatomic_fetch_inc(&tcg_cur_ctxs);
785     g_assert(n < tcg_max_ctxs);
786     qatomic_set(&tcg_ctxs[n], s);
787 
788     if (n > 0) {
789         alloc_tcg_plugin_context(s);
790         tcg_region_initial_alloc(s);
791     }
792 
793     tcg_ctx = s;
794 }
795 #endif /* !CONFIG_USER_ONLY */
796 
797 /* pool based memory allocation */
798 void *tcg_malloc_internal(TCGContext *s, int size)
799 {
800     TCGPool *p;
801     int pool_size;
802 
803     if (size > TCG_POOL_CHUNK_SIZE) {
804         /* big malloc: insert a new pool (XXX: could optimize) */
805         p = g_malloc(sizeof(TCGPool) + size);
806         p->size = size;
807         p->next = s->pool_first_large;
808         s->pool_first_large = p;
809         return p->data;
810     } else {
811         p = s->pool_current;
812         if (!p) {
813             p = s->pool_first;
814             if (!p)
815                 goto new_pool;
816         } else {
817             if (!p->next) {
818             new_pool:
819                 pool_size = TCG_POOL_CHUNK_SIZE;
820                 p = g_malloc(sizeof(TCGPool) + pool_size);
821                 p->size = pool_size;
822                 p->next = NULL;
823                 if (s->pool_current) {
824                     s->pool_current->next = p;
825                 } else {
826                     s->pool_first = p;
827                 }
828             } else {
829                 p = p->next;
830             }
831         }
832     }
833     s->pool_current = p;
834     s->pool_cur = p->data + size;
835     s->pool_end = p->data + p->size;
836     return p->data;
837 }
838 
839 void tcg_pool_reset(TCGContext *s)
840 {
841     TCGPool *p, *t;
842     for (p = s->pool_first_large; p; p = t) {
843         t = p->next;
844         g_free(p);
845     }
846     s->pool_first_large = NULL;
847     s->pool_cur = s->pool_end = NULL;
848     s->pool_current = NULL;
849 }
850 
851 #include "exec/helper-proto.h"
852 
853 static TCGHelperInfo all_helpers[] = {
854 #include "exec/helper-tcg.h"
855 };
856 static GHashTable *helper_table;
857 
858 /*
859  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
860  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
861  * We only use these for layout in tcg_out_ld_helper_ret and
862  * tcg_out_st_helper_args, and share them between several of
863  * the helpers, with the end result that it's easier to build manually.
864  */
865 
866 #if TCG_TARGET_REG_BITS == 32
867 # define dh_typecode_ttl  dh_typecode_i32
868 #else
869 # define dh_typecode_ttl  dh_typecode_i64
870 #endif
871 
872 static TCGHelperInfo info_helper_ld32_mmu = {
873     .flags = TCG_CALL_NO_WG,
874     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
875               | dh_typemask(env, 1)
876               | dh_typemask(i64, 2)  /* uint64_t addr */
877               | dh_typemask(i32, 3)  /* unsigned oi */
878               | dh_typemask(ptr, 4)  /* uintptr_t ra */
879 };
880 
881 static TCGHelperInfo info_helper_ld64_mmu = {
882     .flags = TCG_CALL_NO_WG,
883     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
884               | dh_typemask(env, 1)
885               | dh_typemask(i64, 2)  /* uint64_t addr */
886               | dh_typemask(i32, 3)  /* unsigned oi */
887               | dh_typemask(ptr, 4)  /* uintptr_t ra */
888 };
889 
890 static TCGHelperInfo info_helper_ld128_mmu = {
891     .flags = TCG_CALL_NO_WG,
892     .typemask = dh_typemask(i128, 0) /* return Int128 */
893               | dh_typemask(env, 1)
894               | dh_typemask(i64, 2)  /* uint64_t addr */
895               | dh_typemask(i32, 3)  /* unsigned oi */
896               | dh_typemask(ptr, 4)  /* uintptr_t ra */
897 };
898 
899 static TCGHelperInfo info_helper_st32_mmu = {
900     .flags = TCG_CALL_NO_WG,
901     .typemask = dh_typemask(void, 0)
902               | dh_typemask(env, 1)
903               | dh_typemask(i64, 2)  /* uint64_t addr */
904               | dh_typemask(i32, 3)  /* uint32_t data */
905               | dh_typemask(i32, 4)  /* unsigned oi */
906               | dh_typemask(ptr, 5)  /* uintptr_t ra */
907 };
908 
909 static TCGHelperInfo info_helper_st64_mmu = {
910     .flags = TCG_CALL_NO_WG,
911     .typemask = dh_typemask(void, 0)
912               | dh_typemask(env, 1)
913               | dh_typemask(i64, 2)  /* uint64_t addr */
914               | dh_typemask(i64, 3)  /* uint64_t data */
915               | dh_typemask(i32, 4)  /* unsigned oi */
916               | dh_typemask(ptr, 5)  /* uintptr_t ra */
917 };
918 
919 static TCGHelperInfo info_helper_st128_mmu = {
920     .flags = TCG_CALL_NO_WG,
921     .typemask = dh_typemask(void, 0)
922               | dh_typemask(env, 1)
923               | dh_typemask(i64, 2)  /* uint64_t addr */
924               | dh_typemask(i128, 3) /* Int128 data */
925               | dh_typemask(i32, 4)  /* unsigned oi */
926               | dh_typemask(ptr, 5)  /* uintptr_t ra */
927 };
928 
929 #ifdef CONFIG_TCG_INTERPRETER
930 static ffi_type *typecode_to_ffi(int argmask)
931 {
932     /*
933      * libffi does not support __int128_t, so we have forced Int128
934      * to use the structure definition instead of the builtin type.
935      */
936     static ffi_type *ffi_type_i128_elements[3] = {
937         &ffi_type_uint64,
938         &ffi_type_uint64,
939         NULL
940     };
941     static ffi_type ffi_type_i128 = {
942         .size = 16,
943         .alignment = __alignof__(Int128),
944         .type = FFI_TYPE_STRUCT,
945         .elements = ffi_type_i128_elements,
946     };
947 
948     switch (argmask) {
949     case dh_typecode_void:
950         return &ffi_type_void;
951     case dh_typecode_i32:
952         return &ffi_type_uint32;
953     case dh_typecode_s32:
954         return &ffi_type_sint32;
955     case dh_typecode_i64:
956         return &ffi_type_uint64;
957     case dh_typecode_s64:
958         return &ffi_type_sint64;
959     case dh_typecode_ptr:
960         return &ffi_type_pointer;
961     case dh_typecode_i128:
962         return &ffi_type_i128;
963     }
964     g_assert_not_reached();
965 }
966 
967 static void init_ffi_layouts(void)
968 {
969     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
970     GHashTable *ffi_table = g_hash_table_new(NULL, NULL);
971 
972     for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
973         TCGHelperInfo *info = &all_helpers[i];
974         unsigned typemask = info->typemask;
975         gpointer hash = (gpointer)(uintptr_t)typemask;
976         struct {
977             ffi_cif cif;
978             ffi_type *args[];
979         } *ca;
980         ffi_status status;
981         int nargs;
982         ffi_cif *cif;
983 
984         cif = g_hash_table_lookup(ffi_table, hash);
985         if (cif) {
986             info->cif = cif;
987             continue;
988         }
989 
990         /* Ignoring the return type, find the last non-zero field. */
991         nargs = 32 - clz32(typemask >> 3);
992         nargs = DIV_ROUND_UP(nargs, 3);
993         assert(nargs <= MAX_CALL_IARGS);
994 
995         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
996         ca->cif.rtype = typecode_to_ffi(typemask & 7);
997         ca->cif.nargs = nargs;
998 
999         if (nargs != 0) {
1000             ca->cif.arg_types = ca->args;
1001             for (int j = 0; j < nargs; ++j) {
1002                 int typecode = extract32(typemask, (j + 1) * 3, 3);
1003                 ca->args[j] = typecode_to_ffi(typecode);
1004             }
1005         }
1006 
1007         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1008                               ca->cif.rtype, ca->cif.arg_types);
1009         assert(status == FFI_OK);
1010 
1011         cif = &ca->cif;
1012         info->cif = cif;
1013         g_hash_table_insert(ffi_table, hash, (gpointer)cif);
1014     }
1015 
1016     g_hash_table_destroy(ffi_table);
1017 }
1018 #endif /* CONFIG_TCG_INTERPRETER */
1019 
1020 static inline bool arg_slot_reg_p(unsigned arg_slot)
1021 {
1022     /*
1023      * Split the sizeof away from the comparison to avoid Werror from
1024      * "unsigned < 0 is always false", when iarg_regs is empty.
1025      */
1026     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1027     return arg_slot < nreg;
1028 }
1029 
1030 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1031 {
1032     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1033     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1034 
1035     tcg_debug_assert(stk_slot < max);
1036     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1037 }
1038 
1039 typedef struct TCGCumulativeArgs {
1040     int arg_idx;                /* tcg_gen_callN args[] */
1041     int info_in_idx;            /* TCGHelperInfo in[] */
1042     int arg_slot;               /* regs+stack slot */
1043     int ref_slot;               /* stack slots for references */
1044 } TCGCumulativeArgs;
1045 
1046 static void layout_arg_even(TCGCumulativeArgs *cum)
1047 {
1048     cum->arg_slot += cum->arg_slot & 1;
1049 }
1050 
1051 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1052                          TCGCallArgumentKind kind)
1053 {
1054     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1055 
1056     *loc = (TCGCallArgumentLoc){
1057         .kind = kind,
1058         .arg_idx = cum->arg_idx,
1059         .arg_slot = cum->arg_slot,
1060     };
1061     cum->info_in_idx++;
1062     cum->arg_slot++;
1063 }
1064 
1065 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1066                                 TCGHelperInfo *info, int n)
1067 {
1068     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1069 
1070     for (int i = 0; i < n; ++i) {
1071         /* Layout all using the same arg_idx, adjusting the subindex. */
1072         loc[i] = (TCGCallArgumentLoc){
1073             .kind = TCG_CALL_ARG_NORMAL,
1074             .arg_idx = cum->arg_idx,
1075             .tmp_subindex = i,
1076             .arg_slot = cum->arg_slot + i,
1077         };
1078     }
1079     cum->info_in_idx += n;
1080     cum->arg_slot += n;
1081 }
1082 
1083 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1084 {
1085     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1086     int n = 128 / TCG_TARGET_REG_BITS;
1087 
1088     /* The first subindex carries the pointer. */
1089     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1090 
1091     /*
1092      * The callee is allowed to clobber memory associated with
1093      * structure pass by-reference.  Therefore we must make copies.
1094      * Allocate space from "ref_slot", which will be adjusted to
1095      * follow the parameters on the stack.
1096      */
1097     loc[0].ref_slot = cum->ref_slot;
1098 
1099     /*
1100      * Subsequent words also go into the reference slot, but
1101      * do not accumulate into the regular arguments.
1102      */
1103     for (int i = 1; i < n; ++i) {
1104         loc[i] = (TCGCallArgumentLoc){
1105             .kind = TCG_CALL_ARG_BY_REF_N,
1106             .arg_idx = cum->arg_idx,
1107             .tmp_subindex = i,
1108             .ref_slot = cum->ref_slot + i,
1109         };
1110     }
1111     cum->info_in_idx += n;
1112     cum->ref_slot += n;
1113 }
1114 
1115 static void init_call_layout(TCGHelperInfo *info)
1116 {
1117     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1118     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1119     unsigned typemask = info->typemask;
1120     unsigned typecode;
1121     TCGCumulativeArgs cum = { };
1122 
1123     /*
1124      * Parse and place any function return value.
1125      */
1126     typecode = typemask & 7;
1127     switch (typecode) {
1128     case dh_typecode_void:
1129         info->nr_out = 0;
1130         break;
1131     case dh_typecode_i32:
1132     case dh_typecode_s32:
1133     case dh_typecode_ptr:
1134         info->nr_out = 1;
1135         info->out_kind = TCG_CALL_RET_NORMAL;
1136         break;
1137     case dh_typecode_i64:
1138     case dh_typecode_s64:
1139         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1140         info->out_kind = TCG_CALL_RET_NORMAL;
1141         /* Query the last register now to trigger any assert early. */
1142         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1143         break;
1144     case dh_typecode_i128:
1145         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1146         info->out_kind = TCG_TARGET_CALL_RET_I128;
1147         switch (TCG_TARGET_CALL_RET_I128) {
1148         case TCG_CALL_RET_NORMAL:
1149             /* Query the last register now to trigger any assert early. */
1150             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1151             break;
1152         case TCG_CALL_RET_BY_VEC:
1153             /* Query the single register now to trigger any assert early. */
1154             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1155             break;
1156         case TCG_CALL_RET_BY_REF:
1157             /*
1158              * Allocate the first argument to the output.
1159              * We don't need to store this anywhere, just make it
1160              * unavailable for use in the input loop below.
1161              */
1162             cum.arg_slot = 1;
1163             break;
1164         default:
1165             qemu_build_not_reached();
1166         }
1167         break;
1168     default:
1169         g_assert_not_reached();
1170     }
1171 
1172     /*
1173      * Parse and place function arguments.
1174      */
1175     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1176         TCGCallArgumentKind kind;
1177         TCGType type;
1178 
1179         typecode = typemask & 7;
1180         switch (typecode) {
1181         case dh_typecode_i32:
1182         case dh_typecode_s32:
1183             type = TCG_TYPE_I32;
1184             break;
1185         case dh_typecode_i64:
1186         case dh_typecode_s64:
1187             type = TCG_TYPE_I64;
1188             break;
1189         case dh_typecode_ptr:
1190             type = TCG_TYPE_PTR;
1191             break;
1192         case dh_typecode_i128:
1193             type = TCG_TYPE_I128;
1194             break;
1195         default:
1196             g_assert_not_reached();
1197         }
1198 
1199         switch (type) {
1200         case TCG_TYPE_I32:
1201             switch (TCG_TARGET_CALL_ARG_I32) {
1202             case TCG_CALL_ARG_EVEN:
1203                 layout_arg_even(&cum);
1204                 /* fall through */
1205             case TCG_CALL_ARG_NORMAL:
1206                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1207                 break;
1208             case TCG_CALL_ARG_EXTEND:
1209                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1210                 layout_arg_1(&cum, info, kind);
1211                 break;
1212             default:
1213                 qemu_build_not_reached();
1214             }
1215             break;
1216 
1217         case TCG_TYPE_I64:
1218             switch (TCG_TARGET_CALL_ARG_I64) {
1219             case TCG_CALL_ARG_EVEN:
1220                 layout_arg_even(&cum);
1221                 /* fall through */
1222             case TCG_CALL_ARG_NORMAL:
1223                 if (TCG_TARGET_REG_BITS == 32) {
1224                     layout_arg_normal_n(&cum, info, 2);
1225                 } else {
1226                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1227                 }
1228                 break;
1229             default:
1230                 qemu_build_not_reached();
1231             }
1232             break;
1233 
1234         case TCG_TYPE_I128:
1235             switch (TCG_TARGET_CALL_ARG_I128) {
1236             case TCG_CALL_ARG_EVEN:
1237                 layout_arg_even(&cum);
1238                 /* fall through */
1239             case TCG_CALL_ARG_NORMAL:
1240                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1241                 break;
1242             case TCG_CALL_ARG_BY_REF:
1243                 layout_arg_by_ref(&cum, info);
1244                 break;
1245             default:
1246                 qemu_build_not_reached();
1247             }
1248             break;
1249 
1250         default:
1251             g_assert_not_reached();
1252         }
1253     }
1254     info->nr_in = cum.info_in_idx;
1255 
1256     /* Validate that we didn't overrun the input array. */
1257     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1258     /* Validate the backend has enough argument space. */
1259     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1260 
1261     /*
1262      * Relocate the "ref_slot" area to the end of the parameters.
1263      * Minimizing this stack offset helps code size for x86,
1264      * which has a signed 8-bit offset encoding.
1265      */
1266     if (cum.ref_slot != 0) {
1267         int ref_base = 0;
1268 
1269         if (cum.arg_slot > max_reg_slots) {
1270             int align = __alignof(Int128) / sizeof(tcg_target_long);
1271 
1272             ref_base = cum.arg_slot - max_reg_slots;
1273             if (align > 1) {
1274                 ref_base = ROUND_UP(ref_base, align);
1275             }
1276         }
1277         assert(ref_base + cum.ref_slot <= max_stk_slots);
1278         ref_base += max_reg_slots;
1279 
1280         if (ref_base != 0) {
1281             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1282                 TCGCallArgumentLoc *loc = &info->in[i];
1283                 switch (loc->kind) {
1284                 case TCG_CALL_ARG_BY_REF:
1285                 case TCG_CALL_ARG_BY_REF_N:
1286                     loc->ref_slot += ref_base;
1287                     break;
1288                 default:
1289                     break;
1290                 }
1291             }
1292         }
1293     }
1294 }
1295 
1296 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1297 static void process_op_defs(TCGContext *s);
1298 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1299                                             TCGReg reg, const char *name);
1300 
1301 static void tcg_context_init(unsigned max_cpus)
1302 {
1303     TCGContext *s = &tcg_init_ctx;
1304     int op, total_args, n, i;
1305     TCGOpDef *def;
1306     TCGArgConstraint *args_ct;
1307     TCGTemp *ts;
1308 
1309     memset(s, 0, sizeof(*s));
1310     s->nb_globals = 0;
1311 
1312     /* Count total number of arguments and allocate the corresponding
1313        space */
1314     total_args = 0;
1315     for(op = 0; op < NB_OPS; op++) {
1316         def = &tcg_op_defs[op];
1317         n = def->nb_iargs + def->nb_oargs;
1318         total_args += n;
1319     }
1320 
1321     args_ct = g_new0(TCGArgConstraint, total_args);
1322 
1323     for(op = 0; op < NB_OPS; op++) {
1324         def = &tcg_op_defs[op];
1325         def->args_ct = args_ct;
1326         n = def->nb_iargs + def->nb_oargs;
1327         args_ct += n;
1328     }
1329 
1330     /* Register helpers.  */
1331     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1332     helper_table = g_hash_table_new(NULL, NULL);
1333 
1334     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1335         init_call_layout(&all_helpers[i]);
1336         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1337                             (gpointer)&all_helpers[i]);
1338     }
1339 
1340     init_call_layout(&info_helper_ld32_mmu);
1341     init_call_layout(&info_helper_ld64_mmu);
1342     init_call_layout(&info_helper_ld128_mmu);
1343     init_call_layout(&info_helper_st32_mmu);
1344     init_call_layout(&info_helper_st64_mmu);
1345     init_call_layout(&info_helper_st128_mmu);
1346 
1347 #ifdef CONFIG_TCG_INTERPRETER
1348     init_ffi_layouts();
1349 #endif
1350 
1351     tcg_target_init(s);
1352     process_op_defs(s);
1353 
1354     /* Reverse the order of the saved registers, assuming they're all at
1355        the start of tcg_target_reg_alloc_order.  */
1356     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1357         int r = tcg_target_reg_alloc_order[n];
1358         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1359             break;
1360         }
1361     }
1362     for (i = 0; i < n; ++i) {
1363         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1364     }
1365     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1366         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1367     }
1368 
1369     alloc_tcg_plugin_context(s);
1370 
1371     tcg_ctx = s;
1372     /*
1373      * In user-mode we simply share the init context among threads, since we
1374      * use a single region. See the documentation tcg_region_init() for the
1375      * reasoning behind this.
1376      * In softmmu we will have at most max_cpus TCG threads.
1377      */
1378 #ifdef CONFIG_USER_ONLY
1379     tcg_ctxs = &tcg_ctx;
1380     tcg_cur_ctxs = 1;
1381     tcg_max_ctxs = 1;
1382 #else
1383     tcg_max_ctxs = max_cpus;
1384     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1385 #endif
1386 
1387     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1388     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1389     cpu_env = temp_tcgv_ptr(ts);
1390 }
1391 
1392 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1393 {
1394     tcg_context_init(max_cpus);
1395     tcg_region_init(tb_size, splitwx, max_cpus);
1396 }
1397 
1398 /*
1399  * Allocate TBs right before their corresponding translated code, making
1400  * sure that TBs and code are on different cache lines.
1401  */
1402 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1403 {
1404     uintptr_t align = qemu_icache_linesize;
1405     TranslationBlock *tb;
1406     void *next;
1407 
1408  retry:
1409     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1410     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1411 
1412     if (unlikely(next > s->code_gen_highwater)) {
1413         if (tcg_region_alloc(s)) {
1414             return NULL;
1415         }
1416         goto retry;
1417     }
1418     qatomic_set(&s->code_gen_ptr, next);
1419     s->data_gen_ptr = NULL;
1420     return tb;
1421 }
1422 
1423 void tcg_prologue_init(TCGContext *s)
1424 {
1425     size_t prologue_size;
1426 
1427     s->code_ptr = s->code_gen_ptr;
1428     s->code_buf = s->code_gen_ptr;
1429     s->data_gen_ptr = NULL;
1430 
1431 #ifndef CONFIG_TCG_INTERPRETER
1432     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1433 #endif
1434 
1435 #ifdef TCG_TARGET_NEED_POOL_LABELS
1436     s->pool_labels = NULL;
1437 #endif
1438 
1439     qemu_thread_jit_write();
1440     /* Generate the prologue.  */
1441     tcg_target_qemu_prologue(s);
1442 
1443 #ifdef TCG_TARGET_NEED_POOL_LABELS
1444     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1445     {
1446         int result = tcg_out_pool_finalize(s);
1447         tcg_debug_assert(result == 0);
1448     }
1449 #endif
1450 
1451     prologue_size = tcg_current_code_size(s);
1452     perf_report_prologue(s->code_gen_ptr, prologue_size);
1453 
1454 #ifndef CONFIG_TCG_INTERPRETER
1455     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1456                         (uintptr_t)s->code_buf, prologue_size);
1457 #endif
1458 
1459     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1460         FILE *logfile = qemu_log_trylock();
1461         if (logfile) {
1462             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1463             if (s->data_gen_ptr) {
1464                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1465                 size_t data_size = prologue_size - code_size;
1466                 size_t i;
1467 
1468                 disas(logfile, s->code_gen_ptr, code_size);
1469 
1470                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1471                     if (sizeof(tcg_target_ulong) == 8) {
1472                         fprintf(logfile,
1473                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1474                                 (uintptr_t)s->data_gen_ptr + i,
1475                                 *(uint64_t *)(s->data_gen_ptr + i));
1476                     } else {
1477                         fprintf(logfile,
1478                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1479                                 (uintptr_t)s->data_gen_ptr + i,
1480                                 *(uint32_t *)(s->data_gen_ptr + i));
1481                     }
1482                 }
1483             } else {
1484                 disas(logfile, s->code_gen_ptr, prologue_size);
1485             }
1486             fprintf(logfile, "\n");
1487             qemu_log_unlock(logfile);
1488         }
1489     }
1490 
1491 #ifndef CONFIG_TCG_INTERPRETER
1492     /*
1493      * Assert that goto_ptr is implemented completely, setting an epilogue.
1494      * For tci, we use NULL as the signal to return from the interpreter,
1495      * so skip this check.
1496      */
1497     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1498 #endif
1499 
1500     tcg_region_prologue_set(s);
1501 }
1502 
1503 void tcg_func_start(TCGContext *s)
1504 {
1505     tcg_pool_reset(s);
1506     s->nb_temps = s->nb_globals;
1507 
1508     /* No temps have been previously allocated for size or locality.  */
1509     memset(s->free_temps, 0, sizeof(s->free_temps));
1510 
1511     /* No constant temps have been previously allocated. */
1512     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1513         if (s->const_table[i]) {
1514             g_hash_table_remove_all(s->const_table[i]);
1515         }
1516     }
1517 
1518     s->nb_ops = 0;
1519     s->nb_labels = 0;
1520     s->current_frame_offset = s->frame_start;
1521 
1522 #ifdef CONFIG_DEBUG_TCG
1523     s->goto_tb_issue_mask = 0;
1524 #endif
1525 
1526     QTAILQ_INIT(&s->ops);
1527     QTAILQ_INIT(&s->free_ops);
1528     QSIMPLEQ_INIT(&s->labels);
1529 
1530     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1531                      s->addr_type == TCG_TYPE_I64);
1532 
1533 #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
1534     tcg_debug_assert(s->tlb_fast_offset < 0);
1535     tcg_debug_assert(s->tlb_fast_offset >= MIN_TLB_MASK_TABLE_OFS);
1536 #endif
1537 }
1538 
1539 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1540 {
1541     int n = s->nb_temps++;
1542 
1543     if (n >= TCG_MAX_TEMPS) {
1544         tcg_raise_tb_overflow(s);
1545     }
1546     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1547 }
1548 
1549 static TCGTemp *tcg_global_alloc(TCGContext *s)
1550 {
1551     TCGTemp *ts;
1552 
1553     tcg_debug_assert(s->nb_globals == s->nb_temps);
1554     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1555     s->nb_globals++;
1556     ts = tcg_temp_alloc(s);
1557     ts->kind = TEMP_GLOBAL;
1558 
1559     return ts;
1560 }
1561 
1562 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1563                                             TCGReg reg, const char *name)
1564 {
1565     TCGTemp *ts;
1566 
1567     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1568 
1569     ts = tcg_global_alloc(s);
1570     ts->base_type = type;
1571     ts->type = type;
1572     ts->kind = TEMP_FIXED;
1573     ts->reg = reg;
1574     ts->name = name;
1575     tcg_regset_set_reg(s->reserved_regs, reg);
1576 
1577     return ts;
1578 }
1579 
1580 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1581 {
1582     s->frame_start = start;
1583     s->frame_end = start + size;
1584     s->frame_temp
1585         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1586 }
1587 
1588 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1589                                      intptr_t offset, const char *name)
1590 {
1591     TCGContext *s = tcg_ctx;
1592     TCGTemp *base_ts = tcgv_ptr_temp(base);
1593     TCGTemp *ts = tcg_global_alloc(s);
1594     int indirect_reg = 0;
1595 
1596     switch (base_ts->kind) {
1597     case TEMP_FIXED:
1598         break;
1599     case TEMP_GLOBAL:
1600         /* We do not support double-indirect registers.  */
1601         tcg_debug_assert(!base_ts->indirect_reg);
1602         base_ts->indirect_base = 1;
1603         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1604                             ? 2 : 1);
1605         indirect_reg = 1;
1606         break;
1607     default:
1608         g_assert_not_reached();
1609     }
1610 
1611     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1612         TCGTemp *ts2 = tcg_global_alloc(s);
1613         char buf[64];
1614 
1615         ts->base_type = TCG_TYPE_I64;
1616         ts->type = TCG_TYPE_I32;
1617         ts->indirect_reg = indirect_reg;
1618         ts->mem_allocated = 1;
1619         ts->mem_base = base_ts;
1620         ts->mem_offset = offset;
1621         pstrcpy(buf, sizeof(buf), name);
1622         pstrcat(buf, sizeof(buf), "_0");
1623         ts->name = strdup(buf);
1624 
1625         tcg_debug_assert(ts2 == ts + 1);
1626         ts2->base_type = TCG_TYPE_I64;
1627         ts2->type = TCG_TYPE_I32;
1628         ts2->indirect_reg = indirect_reg;
1629         ts2->mem_allocated = 1;
1630         ts2->mem_base = base_ts;
1631         ts2->mem_offset = offset + 4;
1632         ts2->temp_subindex = 1;
1633         pstrcpy(buf, sizeof(buf), name);
1634         pstrcat(buf, sizeof(buf), "_1");
1635         ts2->name = strdup(buf);
1636     } else {
1637         ts->base_type = type;
1638         ts->type = type;
1639         ts->indirect_reg = indirect_reg;
1640         ts->mem_allocated = 1;
1641         ts->mem_base = base_ts;
1642         ts->mem_offset = offset;
1643         ts->name = name;
1644     }
1645     return ts;
1646 }
1647 
1648 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1649 {
1650     TCGContext *s = tcg_ctx;
1651     TCGTemp *ts;
1652     int n;
1653 
1654     if (kind == TEMP_EBB) {
1655         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1656 
1657         if (idx < TCG_MAX_TEMPS) {
1658             /* There is already an available temp with the right type.  */
1659             clear_bit(idx, s->free_temps[type].l);
1660 
1661             ts = &s->temps[idx];
1662             ts->temp_allocated = 1;
1663             tcg_debug_assert(ts->base_type == type);
1664             tcg_debug_assert(ts->kind == kind);
1665             return ts;
1666         }
1667     } else {
1668         tcg_debug_assert(kind == TEMP_TB);
1669     }
1670 
1671     switch (type) {
1672     case TCG_TYPE_I32:
1673     case TCG_TYPE_V64:
1674     case TCG_TYPE_V128:
1675     case TCG_TYPE_V256:
1676         n = 1;
1677         break;
1678     case TCG_TYPE_I64:
1679         n = 64 / TCG_TARGET_REG_BITS;
1680         break;
1681     case TCG_TYPE_I128:
1682         n = 128 / TCG_TARGET_REG_BITS;
1683         break;
1684     default:
1685         g_assert_not_reached();
1686     }
1687 
1688     ts = tcg_temp_alloc(s);
1689     ts->base_type = type;
1690     ts->temp_allocated = 1;
1691     ts->kind = kind;
1692 
1693     if (n == 1) {
1694         ts->type = type;
1695     } else {
1696         ts->type = TCG_TYPE_REG;
1697 
1698         for (int i = 1; i < n; ++i) {
1699             TCGTemp *ts2 = tcg_temp_alloc(s);
1700 
1701             tcg_debug_assert(ts2 == ts + i);
1702             ts2->base_type = type;
1703             ts2->type = TCG_TYPE_REG;
1704             ts2->temp_allocated = 1;
1705             ts2->temp_subindex = i;
1706             ts2->kind = kind;
1707         }
1708     }
1709     return ts;
1710 }
1711 
1712 TCGv_vec tcg_temp_new_vec(TCGType type)
1713 {
1714     TCGTemp *t;
1715 
1716 #ifdef CONFIG_DEBUG_TCG
1717     switch (type) {
1718     case TCG_TYPE_V64:
1719         assert(TCG_TARGET_HAS_v64);
1720         break;
1721     case TCG_TYPE_V128:
1722         assert(TCG_TARGET_HAS_v128);
1723         break;
1724     case TCG_TYPE_V256:
1725         assert(TCG_TARGET_HAS_v256);
1726         break;
1727     default:
1728         g_assert_not_reached();
1729     }
1730 #endif
1731 
1732     t = tcg_temp_new_internal(type, TEMP_EBB);
1733     return temp_tcgv_vec(t);
1734 }
1735 
1736 /* Create a new temp of the same type as an existing temp.  */
1737 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1738 {
1739     TCGTemp *t = tcgv_vec_temp(match);
1740 
1741     tcg_debug_assert(t->temp_allocated != 0);
1742 
1743     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1744     return temp_tcgv_vec(t);
1745 }
1746 
1747 void tcg_temp_free_internal(TCGTemp *ts)
1748 {
1749     TCGContext *s = tcg_ctx;
1750 
1751     switch (ts->kind) {
1752     case TEMP_CONST:
1753     case TEMP_TB:
1754         /* Silently ignore free. */
1755         break;
1756     case TEMP_EBB:
1757         tcg_debug_assert(ts->temp_allocated != 0);
1758         ts->temp_allocated = 0;
1759         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1760         break;
1761     default:
1762         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1763         g_assert_not_reached();
1764     }
1765 }
1766 
1767 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1768 {
1769     TCGContext *s = tcg_ctx;
1770     GHashTable *h = s->const_table[type];
1771     TCGTemp *ts;
1772 
1773     if (h == NULL) {
1774         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1775         s->const_table[type] = h;
1776     }
1777 
1778     ts = g_hash_table_lookup(h, &val);
1779     if (ts == NULL) {
1780         int64_t *val_ptr;
1781 
1782         ts = tcg_temp_alloc(s);
1783 
1784         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1785             TCGTemp *ts2 = tcg_temp_alloc(s);
1786 
1787             tcg_debug_assert(ts2 == ts + 1);
1788 
1789             ts->base_type = TCG_TYPE_I64;
1790             ts->type = TCG_TYPE_I32;
1791             ts->kind = TEMP_CONST;
1792             ts->temp_allocated = 1;
1793 
1794             ts2->base_type = TCG_TYPE_I64;
1795             ts2->type = TCG_TYPE_I32;
1796             ts2->kind = TEMP_CONST;
1797             ts2->temp_allocated = 1;
1798             ts2->temp_subindex = 1;
1799 
1800             /*
1801              * Retain the full value of the 64-bit constant in the low
1802              * part, so that the hash table works.  Actual uses will
1803              * truncate the value to the low part.
1804              */
1805             ts[HOST_BIG_ENDIAN].val = val;
1806             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1807             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1808         } else {
1809             ts->base_type = type;
1810             ts->type = type;
1811             ts->kind = TEMP_CONST;
1812             ts->temp_allocated = 1;
1813             ts->val = val;
1814             val_ptr = &ts->val;
1815         }
1816         g_hash_table_insert(h, val_ptr, ts);
1817     }
1818 
1819     return ts;
1820 }
1821 
1822 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1823 {
1824     val = dup_const(vece, val);
1825     return temp_tcgv_vec(tcg_constant_internal(type, val));
1826 }
1827 
1828 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1829 {
1830     TCGTemp *t = tcgv_vec_temp(match);
1831 
1832     tcg_debug_assert(t->temp_allocated != 0);
1833     return tcg_constant_vec(t->base_type, vece, val);
1834 }
1835 
1836 /* Return true if OP may appear in the opcode stream.
1837    Test the runtime variable that controls each opcode.  */
1838 bool tcg_op_supported(TCGOpcode op)
1839 {
1840     const bool have_vec
1841         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1842 
1843     switch (op) {
1844     case INDEX_op_discard:
1845     case INDEX_op_set_label:
1846     case INDEX_op_call:
1847     case INDEX_op_br:
1848     case INDEX_op_mb:
1849     case INDEX_op_insn_start:
1850     case INDEX_op_exit_tb:
1851     case INDEX_op_goto_tb:
1852     case INDEX_op_goto_ptr:
1853     case INDEX_op_qemu_ld_a32_i32:
1854     case INDEX_op_qemu_ld_a64_i32:
1855     case INDEX_op_qemu_st_a32_i32:
1856     case INDEX_op_qemu_st_a64_i32:
1857     case INDEX_op_qemu_ld_a32_i64:
1858     case INDEX_op_qemu_ld_a64_i64:
1859     case INDEX_op_qemu_st_a32_i64:
1860     case INDEX_op_qemu_st_a64_i64:
1861         return true;
1862 
1863     case INDEX_op_qemu_st8_a32_i32:
1864     case INDEX_op_qemu_st8_a64_i32:
1865         return TCG_TARGET_HAS_qemu_st8_i32;
1866 
1867     case INDEX_op_qemu_ld_a32_i128:
1868     case INDEX_op_qemu_ld_a64_i128:
1869     case INDEX_op_qemu_st_a32_i128:
1870     case INDEX_op_qemu_st_a64_i128:
1871         return TCG_TARGET_HAS_qemu_ldst_i128;
1872 
1873     case INDEX_op_mov_i32:
1874     case INDEX_op_setcond_i32:
1875     case INDEX_op_brcond_i32:
1876     case INDEX_op_ld8u_i32:
1877     case INDEX_op_ld8s_i32:
1878     case INDEX_op_ld16u_i32:
1879     case INDEX_op_ld16s_i32:
1880     case INDEX_op_ld_i32:
1881     case INDEX_op_st8_i32:
1882     case INDEX_op_st16_i32:
1883     case INDEX_op_st_i32:
1884     case INDEX_op_add_i32:
1885     case INDEX_op_sub_i32:
1886     case INDEX_op_mul_i32:
1887     case INDEX_op_and_i32:
1888     case INDEX_op_or_i32:
1889     case INDEX_op_xor_i32:
1890     case INDEX_op_shl_i32:
1891     case INDEX_op_shr_i32:
1892     case INDEX_op_sar_i32:
1893         return true;
1894 
1895     case INDEX_op_movcond_i32:
1896         return TCG_TARGET_HAS_movcond_i32;
1897     case INDEX_op_div_i32:
1898     case INDEX_op_divu_i32:
1899         return TCG_TARGET_HAS_div_i32;
1900     case INDEX_op_rem_i32:
1901     case INDEX_op_remu_i32:
1902         return TCG_TARGET_HAS_rem_i32;
1903     case INDEX_op_div2_i32:
1904     case INDEX_op_divu2_i32:
1905         return TCG_TARGET_HAS_div2_i32;
1906     case INDEX_op_rotl_i32:
1907     case INDEX_op_rotr_i32:
1908         return TCG_TARGET_HAS_rot_i32;
1909     case INDEX_op_deposit_i32:
1910         return TCG_TARGET_HAS_deposit_i32;
1911     case INDEX_op_extract_i32:
1912         return TCG_TARGET_HAS_extract_i32;
1913     case INDEX_op_sextract_i32:
1914         return TCG_TARGET_HAS_sextract_i32;
1915     case INDEX_op_extract2_i32:
1916         return TCG_TARGET_HAS_extract2_i32;
1917     case INDEX_op_add2_i32:
1918         return TCG_TARGET_HAS_add2_i32;
1919     case INDEX_op_sub2_i32:
1920         return TCG_TARGET_HAS_sub2_i32;
1921     case INDEX_op_mulu2_i32:
1922         return TCG_TARGET_HAS_mulu2_i32;
1923     case INDEX_op_muls2_i32:
1924         return TCG_TARGET_HAS_muls2_i32;
1925     case INDEX_op_muluh_i32:
1926         return TCG_TARGET_HAS_muluh_i32;
1927     case INDEX_op_mulsh_i32:
1928         return TCG_TARGET_HAS_mulsh_i32;
1929     case INDEX_op_ext8s_i32:
1930         return TCG_TARGET_HAS_ext8s_i32;
1931     case INDEX_op_ext16s_i32:
1932         return TCG_TARGET_HAS_ext16s_i32;
1933     case INDEX_op_ext8u_i32:
1934         return TCG_TARGET_HAS_ext8u_i32;
1935     case INDEX_op_ext16u_i32:
1936         return TCG_TARGET_HAS_ext16u_i32;
1937     case INDEX_op_bswap16_i32:
1938         return TCG_TARGET_HAS_bswap16_i32;
1939     case INDEX_op_bswap32_i32:
1940         return TCG_TARGET_HAS_bswap32_i32;
1941     case INDEX_op_not_i32:
1942         return TCG_TARGET_HAS_not_i32;
1943     case INDEX_op_neg_i32:
1944         return TCG_TARGET_HAS_neg_i32;
1945     case INDEX_op_andc_i32:
1946         return TCG_TARGET_HAS_andc_i32;
1947     case INDEX_op_orc_i32:
1948         return TCG_TARGET_HAS_orc_i32;
1949     case INDEX_op_eqv_i32:
1950         return TCG_TARGET_HAS_eqv_i32;
1951     case INDEX_op_nand_i32:
1952         return TCG_TARGET_HAS_nand_i32;
1953     case INDEX_op_nor_i32:
1954         return TCG_TARGET_HAS_nor_i32;
1955     case INDEX_op_clz_i32:
1956         return TCG_TARGET_HAS_clz_i32;
1957     case INDEX_op_ctz_i32:
1958         return TCG_TARGET_HAS_ctz_i32;
1959     case INDEX_op_ctpop_i32:
1960         return TCG_TARGET_HAS_ctpop_i32;
1961 
1962     case INDEX_op_brcond2_i32:
1963     case INDEX_op_setcond2_i32:
1964         return TCG_TARGET_REG_BITS == 32;
1965 
1966     case INDEX_op_mov_i64:
1967     case INDEX_op_setcond_i64:
1968     case INDEX_op_brcond_i64:
1969     case INDEX_op_ld8u_i64:
1970     case INDEX_op_ld8s_i64:
1971     case INDEX_op_ld16u_i64:
1972     case INDEX_op_ld16s_i64:
1973     case INDEX_op_ld32u_i64:
1974     case INDEX_op_ld32s_i64:
1975     case INDEX_op_ld_i64:
1976     case INDEX_op_st8_i64:
1977     case INDEX_op_st16_i64:
1978     case INDEX_op_st32_i64:
1979     case INDEX_op_st_i64:
1980     case INDEX_op_add_i64:
1981     case INDEX_op_sub_i64:
1982     case INDEX_op_mul_i64:
1983     case INDEX_op_and_i64:
1984     case INDEX_op_or_i64:
1985     case INDEX_op_xor_i64:
1986     case INDEX_op_shl_i64:
1987     case INDEX_op_shr_i64:
1988     case INDEX_op_sar_i64:
1989     case INDEX_op_ext_i32_i64:
1990     case INDEX_op_extu_i32_i64:
1991         return TCG_TARGET_REG_BITS == 64;
1992 
1993     case INDEX_op_movcond_i64:
1994         return TCG_TARGET_HAS_movcond_i64;
1995     case INDEX_op_div_i64:
1996     case INDEX_op_divu_i64:
1997         return TCG_TARGET_HAS_div_i64;
1998     case INDEX_op_rem_i64:
1999     case INDEX_op_remu_i64:
2000         return TCG_TARGET_HAS_rem_i64;
2001     case INDEX_op_div2_i64:
2002     case INDEX_op_divu2_i64:
2003         return TCG_TARGET_HAS_div2_i64;
2004     case INDEX_op_rotl_i64:
2005     case INDEX_op_rotr_i64:
2006         return TCG_TARGET_HAS_rot_i64;
2007     case INDEX_op_deposit_i64:
2008         return TCG_TARGET_HAS_deposit_i64;
2009     case INDEX_op_extract_i64:
2010         return TCG_TARGET_HAS_extract_i64;
2011     case INDEX_op_sextract_i64:
2012         return TCG_TARGET_HAS_sextract_i64;
2013     case INDEX_op_extract2_i64:
2014         return TCG_TARGET_HAS_extract2_i64;
2015     case INDEX_op_extrl_i64_i32:
2016         return TCG_TARGET_HAS_extrl_i64_i32;
2017     case INDEX_op_extrh_i64_i32:
2018         return TCG_TARGET_HAS_extrh_i64_i32;
2019     case INDEX_op_ext8s_i64:
2020         return TCG_TARGET_HAS_ext8s_i64;
2021     case INDEX_op_ext16s_i64:
2022         return TCG_TARGET_HAS_ext16s_i64;
2023     case INDEX_op_ext32s_i64:
2024         return TCG_TARGET_HAS_ext32s_i64;
2025     case INDEX_op_ext8u_i64:
2026         return TCG_TARGET_HAS_ext8u_i64;
2027     case INDEX_op_ext16u_i64:
2028         return TCG_TARGET_HAS_ext16u_i64;
2029     case INDEX_op_ext32u_i64:
2030         return TCG_TARGET_HAS_ext32u_i64;
2031     case INDEX_op_bswap16_i64:
2032         return TCG_TARGET_HAS_bswap16_i64;
2033     case INDEX_op_bswap32_i64:
2034         return TCG_TARGET_HAS_bswap32_i64;
2035     case INDEX_op_bswap64_i64:
2036         return TCG_TARGET_HAS_bswap64_i64;
2037     case INDEX_op_not_i64:
2038         return TCG_TARGET_HAS_not_i64;
2039     case INDEX_op_neg_i64:
2040         return TCG_TARGET_HAS_neg_i64;
2041     case INDEX_op_andc_i64:
2042         return TCG_TARGET_HAS_andc_i64;
2043     case INDEX_op_orc_i64:
2044         return TCG_TARGET_HAS_orc_i64;
2045     case INDEX_op_eqv_i64:
2046         return TCG_TARGET_HAS_eqv_i64;
2047     case INDEX_op_nand_i64:
2048         return TCG_TARGET_HAS_nand_i64;
2049     case INDEX_op_nor_i64:
2050         return TCG_TARGET_HAS_nor_i64;
2051     case INDEX_op_clz_i64:
2052         return TCG_TARGET_HAS_clz_i64;
2053     case INDEX_op_ctz_i64:
2054         return TCG_TARGET_HAS_ctz_i64;
2055     case INDEX_op_ctpop_i64:
2056         return TCG_TARGET_HAS_ctpop_i64;
2057     case INDEX_op_add2_i64:
2058         return TCG_TARGET_HAS_add2_i64;
2059     case INDEX_op_sub2_i64:
2060         return TCG_TARGET_HAS_sub2_i64;
2061     case INDEX_op_mulu2_i64:
2062         return TCG_TARGET_HAS_mulu2_i64;
2063     case INDEX_op_muls2_i64:
2064         return TCG_TARGET_HAS_muls2_i64;
2065     case INDEX_op_muluh_i64:
2066         return TCG_TARGET_HAS_muluh_i64;
2067     case INDEX_op_mulsh_i64:
2068         return TCG_TARGET_HAS_mulsh_i64;
2069 
2070     case INDEX_op_mov_vec:
2071     case INDEX_op_dup_vec:
2072     case INDEX_op_dupm_vec:
2073     case INDEX_op_ld_vec:
2074     case INDEX_op_st_vec:
2075     case INDEX_op_add_vec:
2076     case INDEX_op_sub_vec:
2077     case INDEX_op_and_vec:
2078     case INDEX_op_or_vec:
2079     case INDEX_op_xor_vec:
2080     case INDEX_op_cmp_vec:
2081         return have_vec;
2082     case INDEX_op_dup2_vec:
2083         return have_vec && TCG_TARGET_REG_BITS == 32;
2084     case INDEX_op_not_vec:
2085         return have_vec && TCG_TARGET_HAS_not_vec;
2086     case INDEX_op_neg_vec:
2087         return have_vec && TCG_TARGET_HAS_neg_vec;
2088     case INDEX_op_abs_vec:
2089         return have_vec && TCG_TARGET_HAS_abs_vec;
2090     case INDEX_op_andc_vec:
2091         return have_vec && TCG_TARGET_HAS_andc_vec;
2092     case INDEX_op_orc_vec:
2093         return have_vec && TCG_TARGET_HAS_orc_vec;
2094     case INDEX_op_nand_vec:
2095         return have_vec && TCG_TARGET_HAS_nand_vec;
2096     case INDEX_op_nor_vec:
2097         return have_vec && TCG_TARGET_HAS_nor_vec;
2098     case INDEX_op_eqv_vec:
2099         return have_vec && TCG_TARGET_HAS_eqv_vec;
2100     case INDEX_op_mul_vec:
2101         return have_vec && TCG_TARGET_HAS_mul_vec;
2102     case INDEX_op_shli_vec:
2103     case INDEX_op_shri_vec:
2104     case INDEX_op_sari_vec:
2105         return have_vec && TCG_TARGET_HAS_shi_vec;
2106     case INDEX_op_shls_vec:
2107     case INDEX_op_shrs_vec:
2108     case INDEX_op_sars_vec:
2109         return have_vec && TCG_TARGET_HAS_shs_vec;
2110     case INDEX_op_shlv_vec:
2111     case INDEX_op_shrv_vec:
2112     case INDEX_op_sarv_vec:
2113         return have_vec && TCG_TARGET_HAS_shv_vec;
2114     case INDEX_op_rotli_vec:
2115         return have_vec && TCG_TARGET_HAS_roti_vec;
2116     case INDEX_op_rotls_vec:
2117         return have_vec && TCG_TARGET_HAS_rots_vec;
2118     case INDEX_op_rotlv_vec:
2119     case INDEX_op_rotrv_vec:
2120         return have_vec && TCG_TARGET_HAS_rotv_vec;
2121     case INDEX_op_ssadd_vec:
2122     case INDEX_op_usadd_vec:
2123     case INDEX_op_sssub_vec:
2124     case INDEX_op_ussub_vec:
2125         return have_vec && TCG_TARGET_HAS_sat_vec;
2126     case INDEX_op_smin_vec:
2127     case INDEX_op_umin_vec:
2128     case INDEX_op_smax_vec:
2129     case INDEX_op_umax_vec:
2130         return have_vec && TCG_TARGET_HAS_minmax_vec;
2131     case INDEX_op_bitsel_vec:
2132         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2133     case INDEX_op_cmpsel_vec:
2134         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2135 
2136     default:
2137         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2138         return true;
2139     }
2140 }
2141 
2142 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2143 
2144 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
2145 {
2146     const TCGHelperInfo *info;
2147     TCGv_i64 extend_free[MAX_CALL_IARGS];
2148     int n_extend = 0;
2149     TCGOp *op;
2150     int i, n, pi = 0, total_args;
2151 
2152     info = g_hash_table_lookup(helper_table, (gpointer)func);
2153     total_args = info->nr_out + info->nr_in + 2;
2154     op = tcg_op_alloc(INDEX_op_call, total_args);
2155 
2156 #ifdef CONFIG_PLUGIN
2157     /* Flag helpers that may affect guest state */
2158     if (tcg_ctx->plugin_insn &&
2159         !(info->flags & TCG_CALL_PLUGIN) &&
2160         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2161         tcg_ctx->plugin_insn->calls_helpers = true;
2162     }
2163 #endif
2164 
2165     TCGOP_CALLO(op) = n = info->nr_out;
2166     switch (n) {
2167     case 0:
2168         tcg_debug_assert(ret == NULL);
2169         break;
2170     case 1:
2171         tcg_debug_assert(ret != NULL);
2172         op->args[pi++] = temp_arg(ret);
2173         break;
2174     case 2:
2175     case 4:
2176         tcg_debug_assert(ret != NULL);
2177         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2178         tcg_debug_assert(ret->temp_subindex == 0);
2179         for (i = 0; i < n; ++i) {
2180             op->args[pi++] = temp_arg(ret + i);
2181         }
2182         break;
2183     default:
2184         g_assert_not_reached();
2185     }
2186 
2187     TCGOP_CALLI(op) = n = info->nr_in;
2188     for (i = 0; i < n; i++) {
2189         const TCGCallArgumentLoc *loc = &info->in[i];
2190         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2191 
2192         switch (loc->kind) {
2193         case TCG_CALL_ARG_NORMAL:
2194         case TCG_CALL_ARG_BY_REF:
2195         case TCG_CALL_ARG_BY_REF_N:
2196             op->args[pi++] = temp_arg(ts);
2197             break;
2198 
2199         case TCG_CALL_ARG_EXTEND_U:
2200         case TCG_CALL_ARG_EXTEND_S:
2201             {
2202                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2203                 TCGv_i32 orig = temp_tcgv_i32(ts);
2204 
2205                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2206                     tcg_gen_ext_i32_i64(temp, orig);
2207                 } else {
2208                     tcg_gen_extu_i32_i64(temp, orig);
2209                 }
2210                 op->args[pi++] = tcgv_i64_arg(temp);
2211                 extend_free[n_extend++] = temp;
2212             }
2213             break;
2214 
2215         default:
2216             g_assert_not_reached();
2217         }
2218     }
2219     op->args[pi++] = (uintptr_t)func;
2220     op->args[pi++] = (uintptr_t)info;
2221     tcg_debug_assert(pi == total_args);
2222 
2223     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2224 
2225     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2226     for (i = 0; i < n_extend; ++i) {
2227         tcg_temp_free_i64(extend_free[i]);
2228     }
2229 }
2230 
2231 static void tcg_reg_alloc_start(TCGContext *s)
2232 {
2233     int i, n;
2234 
2235     for (i = 0, n = s->nb_temps; i < n; i++) {
2236         TCGTemp *ts = &s->temps[i];
2237         TCGTempVal val = TEMP_VAL_MEM;
2238 
2239         switch (ts->kind) {
2240         case TEMP_CONST:
2241             val = TEMP_VAL_CONST;
2242             break;
2243         case TEMP_FIXED:
2244             val = TEMP_VAL_REG;
2245             break;
2246         case TEMP_GLOBAL:
2247             break;
2248         case TEMP_EBB:
2249             val = TEMP_VAL_DEAD;
2250             /* fall through */
2251         case TEMP_TB:
2252             ts->mem_allocated = 0;
2253             break;
2254         default:
2255             g_assert_not_reached();
2256         }
2257         ts->val_type = val;
2258     }
2259 
2260     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2261 }
2262 
2263 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2264                                  TCGTemp *ts)
2265 {
2266     int idx = temp_idx(ts);
2267 
2268     switch (ts->kind) {
2269     case TEMP_FIXED:
2270     case TEMP_GLOBAL:
2271         pstrcpy(buf, buf_size, ts->name);
2272         break;
2273     case TEMP_TB:
2274         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2275         break;
2276     case TEMP_EBB:
2277         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2278         break;
2279     case TEMP_CONST:
2280         switch (ts->type) {
2281         case TCG_TYPE_I32:
2282             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2283             break;
2284 #if TCG_TARGET_REG_BITS > 32
2285         case TCG_TYPE_I64:
2286             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2287             break;
2288 #endif
2289         case TCG_TYPE_V64:
2290         case TCG_TYPE_V128:
2291         case TCG_TYPE_V256:
2292             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2293                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2294             break;
2295         default:
2296             g_assert_not_reached();
2297         }
2298         break;
2299     }
2300     return buf;
2301 }
2302 
2303 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2304                              int buf_size, TCGArg arg)
2305 {
2306     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2307 }
2308 
2309 static const char * const cond_name[] =
2310 {
2311     [TCG_COND_NEVER] = "never",
2312     [TCG_COND_ALWAYS] = "always",
2313     [TCG_COND_EQ] = "eq",
2314     [TCG_COND_NE] = "ne",
2315     [TCG_COND_LT] = "lt",
2316     [TCG_COND_GE] = "ge",
2317     [TCG_COND_LE] = "le",
2318     [TCG_COND_GT] = "gt",
2319     [TCG_COND_LTU] = "ltu",
2320     [TCG_COND_GEU] = "geu",
2321     [TCG_COND_LEU] = "leu",
2322     [TCG_COND_GTU] = "gtu"
2323 };
2324 
2325 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2326 {
2327     [MO_UB]   = "ub",
2328     [MO_SB]   = "sb",
2329     [MO_LEUW] = "leuw",
2330     [MO_LESW] = "lesw",
2331     [MO_LEUL] = "leul",
2332     [MO_LESL] = "lesl",
2333     [MO_LEUQ] = "leq",
2334     [MO_BEUW] = "beuw",
2335     [MO_BESW] = "besw",
2336     [MO_BEUL] = "beul",
2337     [MO_BESL] = "besl",
2338     [MO_BEUQ] = "beq",
2339     [MO_128 + MO_BE] = "beo",
2340     [MO_128 + MO_LE] = "leo",
2341 };
2342 
2343 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2344     [MO_UNALN >> MO_ASHIFT]    = "un+",
2345     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2346     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2347     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2348     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2349     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2350     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2351     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2352 };
2353 
2354 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2355     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2356     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2357     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2358     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2359     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2360     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2361 };
2362 
2363 static const char bswap_flag_name[][6] = {
2364     [TCG_BSWAP_IZ] = "iz",
2365     [TCG_BSWAP_OZ] = "oz",
2366     [TCG_BSWAP_OS] = "os",
2367     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2368     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2369 };
2370 
2371 static inline bool tcg_regset_single(TCGRegSet d)
2372 {
2373     return (d & (d - 1)) == 0;
2374 }
2375 
2376 static inline TCGReg tcg_regset_first(TCGRegSet d)
2377 {
2378     if (TCG_TARGET_NB_REGS <= 32) {
2379         return ctz32(d);
2380     } else {
2381         return ctz64(d);
2382     }
2383 }
2384 
2385 /* Return only the number of characters output -- no error return. */
2386 #define ne_fprintf(...) \
2387     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2388 
2389 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2390 {
2391     char buf[128];
2392     TCGOp *op;
2393 
2394     QTAILQ_FOREACH(op, &s->ops, link) {
2395         int i, k, nb_oargs, nb_iargs, nb_cargs;
2396         const TCGOpDef *def;
2397         TCGOpcode c;
2398         int col = 0;
2399 
2400         c = op->opc;
2401         def = &tcg_op_defs[c];
2402 
2403         if (c == INDEX_op_insn_start) {
2404             nb_oargs = 0;
2405             col += ne_fprintf(f, "\n ----");
2406 
2407             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2408                 col += ne_fprintf(f, " %016" PRIx64,
2409                                   tcg_get_insn_start_param(op, i));
2410             }
2411         } else if (c == INDEX_op_call) {
2412             const TCGHelperInfo *info = tcg_call_info(op);
2413             void *func = tcg_call_func(op);
2414 
2415             /* variable number of arguments */
2416             nb_oargs = TCGOP_CALLO(op);
2417             nb_iargs = TCGOP_CALLI(op);
2418             nb_cargs = def->nb_cargs;
2419 
2420             col += ne_fprintf(f, " %s ", def->name);
2421 
2422             /*
2423              * Print the function name from TCGHelperInfo, if available.
2424              * Note that plugins have a template function for the info,
2425              * but the actual function pointer comes from the plugin.
2426              */
2427             if (func == info->func) {
2428                 col += ne_fprintf(f, "%s", info->name);
2429             } else {
2430                 col += ne_fprintf(f, "plugin(%p)", func);
2431             }
2432 
2433             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2434             for (i = 0; i < nb_oargs; i++) {
2435                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2436                                                             op->args[i]));
2437             }
2438             for (i = 0; i < nb_iargs; i++) {
2439                 TCGArg arg = op->args[nb_oargs + i];
2440                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2441                 col += ne_fprintf(f, ",%s", t);
2442             }
2443         } else {
2444             col += ne_fprintf(f, " %s ", def->name);
2445 
2446             nb_oargs = def->nb_oargs;
2447             nb_iargs = def->nb_iargs;
2448             nb_cargs = def->nb_cargs;
2449 
2450             if (def->flags & TCG_OPF_VECTOR) {
2451                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2452                                   8 << TCGOP_VECE(op));
2453             }
2454 
2455             k = 0;
2456             for (i = 0; i < nb_oargs; i++) {
2457                 const char *sep =  k ? "," : "";
2458                 col += ne_fprintf(f, "%s%s", sep,
2459                                   tcg_get_arg_str(s, buf, sizeof(buf),
2460                                                   op->args[k++]));
2461             }
2462             for (i = 0; i < nb_iargs; i++) {
2463                 const char *sep =  k ? "," : "";
2464                 col += ne_fprintf(f, "%s%s", sep,
2465                                   tcg_get_arg_str(s, buf, sizeof(buf),
2466                                                   op->args[k++]));
2467             }
2468             switch (c) {
2469             case INDEX_op_brcond_i32:
2470             case INDEX_op_setcond_i32:
2471             case INDEX_op_movcond_i32:
2472             case INDEX_op_brcond2_i32:
2473             case INDEX_op_setcond2_i32:
2474             case INDEX_op_brcond_i64:
2475             case INDEX_op_setcond_i64:
2476             case INDEX_op_movcond_i64:
2477             case INDEX_op_cmp_vec:
2478             case INDEX_op_cmpsel_vec:
2479                 if (op->args[k] < ARRAY_SIZE(cond_name)
2480                     && cond_name[op->args[k]]) {
2481                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2482                 } else {
2483                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2484                 }
2485                 i = 1;
2486                 break;
2487             case INDEX_op_qemu_ld_a32_i32:
2488             case INDEX_op_qemu_ld_a64_i32:
2489             case INDEX_op_qemu_st_a32_i32:
2490             case INDEX_op_qemu_st_a64_i32:
2491             case INDEX_op_qemu_st8_a32_i32:
2492             case INDEX_op_qemu_st8_a64_i32:
2493             case INDEX_op_qemu_ld_a32_i64:
2494             case INDEX_op_qemu_ld_a64_i64:
2495             case INDEX_op_qemu_st_a32_i64:
2496             case INDEX_op_qemu_st_a64_i64:
2497             case INDEX_op_qemu_ld_a32_i128:
2498             case INDEX_op_qemu_ld_a64_i128:
2499             case INDEX_op_qemu_st_a32_i128:
2500             case INDEX_op_qemu_st_a64_i128:
2501                 {
2502                     const char *s_al, *s_op, *s_at;
2503                     MemOpIdx oi = op->args[k++];
2504                     MemOp op = get_memop(oi);
2505                     unsigned ix = get_mmuidx(oi);
2506 
2507                     s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2508                     s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2509                     s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2510                     op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2511 
2512                     /* If all fields are accounted for, print symbolically. */
2513                     if (!op && s_al && s_op && s_at) {
2514                         col += ne_fprintf(f, ",%s%s%s,%u",
2515                                           s_at, s_al, s_op, ix);
2516                     } else {
2517                         op = get_memop(oi);
2518                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2519                     }
2520                     i = 1;
2521                 }
2522                 break;
2523             case INDEX_op_bswap16_i32:
2524             case INDEX_op_bswap16_i64:
2525             case INDEX_op_bswap32_i32:
2526             case INDEX_op_bswap32_i64:
2527             case INDEX_op_bswap64_i64:
2528                 {
2529                     TCGArg flags = op->args[k];
2530                     const char *name = NULL;
2531 
2532                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2533                         name = bswap_flag_name[flags];
2534                     }
2535                     if (name) {
2536                         col += ne_fprintf(f, ",%s", name);
2537                     } else {
2538                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2539                     }
2540                     i = k = 1;
2541                 }
2542                 break;
2543             default:
2544                 i = 0;
2545                 break;
2546             }
2547             switch (c) {
2548             case INDEX_op_set_label:
2549             case INDEX_op_br:
2550             case INDEX_op_brcond_i32:
2551             case INDEX_op_brcond_i64:
2552             case INDEX_op_brcond2_i32:
2553                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2554                                   arg_label(op->args[k])->id);
2555                 i++, k++;
2556                 break;
2557             case INDEX_op_mb:
2558                 {
2559                     TCGBar membar = op->args[k];
2560                     const char *b_op, *m_op;
2561 
2562                     switch (membar & TCG_BAR_SC) {
2563                     case 0:
2564                         b_op = "none";
2565                         break;
2566                     case TCG_BAR_LDAQ:
2567                         b_op = "acq";
2568                         break;
2569                     case TCG_BAR_STRL:
2570                         b_op = "rel";
2571                         break;
2572                     case TCG_BAR_SC:
2573                         b_op = "seq";
2574                         break;
2575                     default:
2576                         g_assert_not_reached();
2577                     }
2578 
2579                     switch (membar & TCG_MO_ALL) {
2580                     case 0:
2581                         m_op = "none";
2582                         break;
2583                     case TCG_MO_LD_LD:
2584                         m_op = "rr";
2585                         break;
2586                     case TCG_MO_LD_ST:
2587                         m_op = "rw";
2588                         break;
2589                     case TCG_MO_ST_LD:
2590                         m_op = "wr";
2591                         break;
2592                     case TCG_MO_ST_ST:
2593                         m_op = "ww";
2594                         break;
2595                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2596                         m_op = "rr+rw";
2597                         break;
2598                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2599                         m_op = "rr+wr";
2600                         break;
2601                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2602                         m_op = "rr+ww";
2603                         break;
2604                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2605                         m_op = "rw+wr";
2606                         break;
2607                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2608                         m_op = "rw+ww";
2609                         break;
2610                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2611                         m_op = "wr+ww";
2612                         break;
2613                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2614                         m_op = "rr+rw+wr";
2615                         break;
2616                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2617                         m_op = "rr+rw+ww";
2618                         break;
2619                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2620                         m_op = "rr+wr+ww";
2621                         break;
2622                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2623                         m_op = "rw+wr+ww";
2624                         break;
2625                     case TCG_MO_ALL:
2626                         m_op = "all";
2627                         break;
2628                     default:
2629                         g_assert_not_reached();
2630                     }
2631 
2632                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2633                     i++, k++;
2634                 }
2635                 break;
2636             default:
2637                 break;
2638             }
2639             for (; i < nb_cargs; i++, k++) {
2640                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2641                                   op->args[k]);
2642             }
2643         }
2644 
2645         if (have_prefs || op->life) {
2646             for (; col < 40; ++col) {
2647                 putc(' ', f);
2648             }
2649         }
2650 
2651         if (op->life) {
2652             unsigned life = op->life;
2653 
2654             if (life & (SYNC_ARG * 3)) {
2655                 ne_fprintf(f, "  sync:");
2656                 for (i = 0; i < 2; ++i) {
2657                     if (life & (SYNC_ARG << i)) {
2658                         ne_fprintf(f, " %d", i);
2659                     }
2660                 }
2661             }
2662             life /= DEAD_ARG;
2663             if (life) {
2664                 ne_fprintf(f, "  dead:");
2665                 for (i = 0; life; ++i, life >>= 1) {
2666                     if (life & 1) {
2667                         ne_fprintf(f, " %d", i);
2668                     }
2669                 }
2670             }
2671         }
2672 
2673         if (have_prefs) {
2674             for (i = 0; i < nb_oargs; ++i) {
2675                 TCGRegSet set = output_pref(op, i);
2676 
2677                 if (i == 0) {
2678                     ne_fprintf(f, "  pref=");
2679                 } else {
2680                     ne_fprintf(f, ",");
2681                 }
2682                 if (set == 0) {
2683                     ne_fprintf(f, "none");
2684                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2685                     ne_fprintf(f, "all");
2686 #ifdef CONFIG_DEBUG_TCG
2687                 } else if (tcg_regset_single(set)) {
2688                     TCGReg reg = tcg_regset_first(set);
2689                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2690 #endif
2691                 } else if (TCG_TARGET_NB_REGS <= 32) {
2692                     ne_fprintf(f, "0x%x", (uint32_t)set);
2693                 } else {
2694                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2695                 }
2696             }
2697         }
2698 
2699         putc('\n', f);
2700     }
2701 }
2702 
2703 /* we give more priority to constraints with less registers */
2704 static int get_constraint_priority(const TCGOpDef *def, int k)
2705 {
2706     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2707     int n = ctpop64(arg_ct->regs);
2708 
2709     /*
2710      * Sort constraints of a single register first, which includes output
2711      * aliases (which must exactly match the input already allocated).
2712      */
2713     if (n == 1 || arg_ct->oalias) {
2714         return INT_MAX;
2715     }
2716 
2717     /*
2718      * Sort register pairs next, first then second immediately after.
2719      * Arbitrarily sort multiple pairs by the index of the first reg;
2720      * there shouldn't be many pairs.
2721      */
2722     switch (arg_ct->pair) {
2723     case 1:
2724     case 3:
2725         return (k + 1) * 2;
2726     case 2:
2727         return (arg_ct->pair_index + 1) * 2 - 1;
2728     }
2729 
2730     /* Finally, sort by decreasing register count. */
2731     assert(n > 1);
2732     return -n;
2733 }
2734 
2735 /* sort from highest priority to lowest */
2736 static void sort_constraints(TCGOpDef *def, int start, int n)
2737 {
2738     int i, j;
2739     TCGArgConstraint *a = def->args_ct;
2740 
2741     for (i = 0; i < n; i++) {
2742         a[start + i].sort_index = start + i;
2743     }
2744     if (n <= 1) {
2745         return;
2746     }
2747     for (i = 0; i < n - 1; i++) {
2748         for (j = i + 1; j < n; j++) {
2749             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2750             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2751             if (p1 < p2) {
2752                 int tmp = a[start + i].sort_index;
2753                 a[start + i].sort_index = a[start + j].sort_index;
2754                 a[start + j].sort_index = tmp;
2755             }
2756         }
2757     }
2758 }
2759 
2760 static void process_op_defs(TCGContext *s)
2761 {
2762     TCGOpcode op;
2763 
2764     for (op = 0; op < NB_OPS; op++) {
2765         TCGOpDef *def = &tcg_op_defs[op];
2766         const TCGTargetOpDef *tdefs;
2767         bool saw_alias_pair = false;
2768         int i, o, i2, o2, nb_args;
2769 
2770         if (def->flags & TCG_OPF_NOT_PRESENT) {
2771             continue;
2772         }
2773 
2774         nb_args = def->nb_iargs + def->nb_oargs;
2775         if (nb_args == 0) {
2776             continue;
2777         }
2778 
2779         /*
2780          * Macro magic should make it impossible, but double-check that
2781          * the array index is in range.  Since the signness of an enum
2782          * is implementation defined, force the result to unsigned.
2783          */
2784         unsigned con_set = tcg_target_op_def(op);
2785         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2786         tdefs = &constraint_sets[con_set];
2787 
2788         for (i = 0; i < nb_args; i++) {
2789             const char *ct_str = tdefs->args_ct_str[i];
2790             bool input_p = i >= def->nb_oargs;
2791 
2792             /* Incomplete TCGTargetOpDef entry. */
2793             tcg_debug_assert(ct_str != NULL);
2794 
2795             switch (*ct_str) {
2796             case '0' ... '9':
2797                 o = *ct_str - '0';
2798                 tcg_debug_assert(input_p);
2799                 tcg_debug_assert(o < def->nb_oargs);
2800                 tcg_debug_assert(def->args_ct[o].regs != 0);
2801                 tcg_debug_assert(!def->args_ct[o].oalias);
2802                 def->args_ct[i] = def->args_ct[o];
2803                 /* The output sets oalias.  */
2804                 def->args_ct[o].oalias = 1;
2805                 def->args_ct[o].alias_index = i;
2806                 /* The input sets ialias. */
2807                 def->args_ct[i].ialias = 1;
2808                 def->args_ct[i].alias_index = o;
2809                 if (def->args_ct[i].pair) {
2810                     saw_alias_pair = true;
2811                 }
2812                 tcg_debug_assert(ct_str[1] == '\0');
2813                 continue;
2814 
2815             case '&':
2816                 tcg_debug_assert(!input_p);
2817                 def->args_ct[i].newreg = true;
2818                 ct_str++;
2819                 break;
2820 
2821             case 'p': /* plus */
2822                 /* Allocate to the register after the previous. */
2823                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2824                 o = i - 1;
2825                 tcg_debug_assert(!def->args_ct[o].pair);
2826                 tcg_debug_assert(!def->args_ct[o].ct);
2827                 def->args_ct[i] = (TCGArgConstraint){
2828                     .pair = 2,
2829                     .pair_index = o,
2830                     .regs = def->args_ct[o].regs << 1,
2831                 };
2832                 def->args_ct[o].pair = 1;
2833                 def->args_ct[o].pair_index = i;
2834                 tcg_debug_assert(ct_str[1] == '\0');
2835                 continue;
2836 
2837             case 'm': /* minus */
2838                 /* Allocate to the register before the previous. */
2839                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2840                 o = i - 1;
2841                 tcg_debug_assert(!def->args_ct[o].pair);
2842                 tcg_debug_assert(!def->args_ct[o].ct);
2843                 def->args_ct[i] = (TCGArgConstraint){
2844                     .pair = 1,
2845                     .pair_index = o,
2846                     .regs = def->args_ct[o].regs >> 1,
2847                 };
2848                 def->args_ct[o].pair = 2;
2849                 def->args_ct[o].pair_index = i;
2850                 tcg_debug_assert(ct_str[1] == '\0');
2851                 continue;
2852             }
2853 
2854             do {
2855                 switch (*ct_str) {
2856                 case 'i':
2857                     def->args_ct[i].ct |= TCG_CT_CONST;
2858                     break;
2859 
2860                 /* Include all of the target-specific constraints. */
2861 
2862 #undef CONST
2863 #define CONST(CASE, MASK) \
2864     case CASE: def->args_ct[i].ct |= MASK; break;
2865 #define REGS(CASE, MASK) \
2866     case CASE: def->args_ct[i].regs |= MASK; break;
2867 
2868 #include "tcg-target-con-str.h"
2869 
2870 #undef REGS
2871 #undef CONST
2872                 default:
2873                 case '0' ... '9':
2874                 case '&':
2875                 case 'p':
2876                 case 'm':
2877                     /* Typo in TCGTargetOpDef constraint. */
2878                     g_assert_not_reached();
2879                 }
2880             } while (*++ct_str != '\0');
2881         }
2882 
2883         /* TCGTargetOpDef entry with too much information? */
2884         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2885 
2886         /*
2887          * Fix up output pairs that are aliased with inputs.
2888          * When we created the alias, we copied pair from the output.
2889          * There are three cases:
2890          *    (1a) Pairs of inputs alias pairs of outputs.
2891          *    (1b) One input aliases the first of a pair of outputs.
2892          *    (2)  One input aliases the second of a pair of outputs.
2893          *
2894          * Case 1a is handled by making sure that the pair_index'es are
2895          * properly updated so that they appear the same as a pair of inputs.
2896          *
2897          * Case 1b is handled by setting the pair_index of the input to
2898          * itself, simply so it doesn't point to an unrelated argument.
2899          * Since we don't encounter the "second" during the input allocation
2900          * phase, nothing happens with the second half of the input pair.
2901          *
2902          * Case 2 is handled by setting the second input to pair=3, the
2903          * first output to pair=3, and the pair_index'es to match.
2904          */
2905         if (saw_alias_pair) {
2906             for (i = def->nb_oargs; i < nb_args; i++) {
2907                 /*
2908                  * Since [0-9pm] must be alone in the constraint string,
2909                  * the only way they can both be set is if the pair comes
2910                  * from the output alias.
2911                  */
2912                 if (!def->args_ct[i].ialias) {
2913                     continue;
2914                 }
2915                 switch (def->args_ct[i].pair) {
2916                 case 0:
2917                     break;
2918                 case 1:
2919                     o = def->args_ct[i].alias_index;
2920                     o2 = def->args_ct[o].pair_index;
2921                     tcg_debug_assert(def->args_ct[o].pair == 1);
2922                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2923                     if (def->args_ct[o2].oalias) {
2924                         /* Case 1a */
2925                         i2 = def->args_ct[o2].alias_index;
2926                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2927                         def->args_ct[i2].pair_index = i;
2928                         def->args_ct[i].pair_index = i2;
2929                     } else {
2930                         /* Case 1b */
2931                         def->args_ct[i].pair_index = i;
2932                     }
2933                     break;
2934                 case 2:
2935                     o = def->args_ct[i].alias_index;
2936                     o2 = def->args_ct[o].pair_index;
2937                     tcg_debug_assert(def->args_ct[o].pair == 2);
2938                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2939                     if (def->args_ct[o2].oalias) {
2940                         /* Case 1a */
2941                         i2 = def->args_ct[o2].alias_index;
2942                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2943                         def->args_ct[i2].pair_index = i;
2944                         def->args_ct[i].pair_index = i2;
2945                     } else {
2946                         /* Case 2 */
2947                         def->args_ct[i].pair = 3;
2948                         def->args_ct[o2].pair = 3;
2949                         def->args_ct[i].pair_index = o2;
2950                         def->args_ct[o2].pair_index = i;
2951                     }
2952                     break;
2953                 default:
2954                     g_assert_not_reached();
2955                 }
2956             }
2957         }
2958 
2959         /* sort the constraints (XXX: this is just an heuristic) */
2960         sort_constraints(def, 0, def->nb_oargs);
2961         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2962     }
2963 }
2964 
2965 static void remove_label_use(TCGOp *op, int idx)
2966 {
2967     TCGLabel *label = arg_label(op->args[idx]);
2968     TCGLabelUse *use;
2969 
2970     QSIMPLEQ_FOREACH(use, &label->branches, next) {
2971         if (use->op == op) {
2972             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
2973             return;
2974         }
2975     }
2976     g_assert_not_reached();
2977 }
2978 
2979 void tcg_op_remove(TCGContext *s, TCGOp *op)
2980 {
2981     switch (op->opc) {
2982     case INDEX_op_br:
2983         remove_label_use(op, 0);
2984         break;
2985     case INDEX_op_brcond_i32:
2986     case INDEX_op_brcond_i64:
2987         remove_label_use(op, 3);
2988         break;
2989     case INDEX_op_brcond2_i32:
2990         remove_label_use(op, 5);
2991         break;
2992     default:
2993         break;
2994     }
2995 
2996     QTAILQ_REMOVE(&s->ops, op, link);
2997     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2998     s->nb_ops--;
2999 
3000 #ifdef CONFIG_PROFILER
3001     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
3002 #endif
3003 }
3004 
3005 void tcg_remove_ops_after(TCGOp *op)
3006 {
3007     TCGContext *s = tcg_ctx;
3008 
3009     while (true) {
3010         TCGOp *last = tcg_last_op();
3011         if (last == op) {
3012             return;
3013         }
3014         tcg_op_remove(s, last);
3015     }
3016 }
3017 
3018 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3019 {
3020     TCGContext *s = tcg_ctx;
3021     TCGOp *op = NULL;
3022 
3023     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3024         QTAILQ_FOREACH(op, &s->free_ops, link) {
3025             if (nargs <= op->nargs) {
3026                 QTAILQ_REMOVE(&s->free_ops, op, link);
3027                 nargs = op->nargs;
3028                 goto found;
3029             }
3030         }
3031     }
3032 
3033     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3034     nargs = MAX(4, nargs);
3035     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3036 
3037  found:
3038     memset(op, 0, offsetof(TCGOp, link));
3039     op->opc = opc;
3040     op->nargs = nargs;
3041 
3042     /* Check for bitfield overflow. */
3043     tcg_debug_assert(op->nargs == nargs);
3044 
3045     s->nb_ops++;
3046     return op;
3047 }
3048 
3049 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3050 {
3051     TCGOp *op = tcg_op_alloc(opc, nargs);
3052     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3053     return op;
3054 }
3055 
3056 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3057                             TCGOpcode opc, unsigned nargs)
3058 {
3059     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3060     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3061     return new_op;
3062 }
3063 
3064 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3065                            TCGOpcode opc, unsigned nargs)
3066 {
3067     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3068     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3069     return new_op;
3070 }
3071 
3072 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3073 {
3074     TCGLabelUse *u;
3075 
3076     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3077         TCGOp *op = u->op;
3078         switch (op->opc) {
3079         case INDEX_op_br:
3080             op->args[0] = label_arg(to);
3081             break;
3082         case INDEX_op_brcond_i32:
3083         case INDEX_op_brcond_i64:
3084             op->args[3] = label_arg(to);
3085             break;
3086         case INDEX_op_brcond2_i32:
3087             op->args[5] = label_arg(to);
3088             break;
3089         default:
3090             g_assert_not_reached();
3091         }
3092     }
3093 
3094     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3095 }
3096 
3097 /* Reachable analysis : remove unreachable code.  */
3098 static void __attribute__((noinline))
3099 reachable_code_pass(TCGContext *s)
3100 {
3101     TCGOp *op, *op_next, *op_prev;
3102     bool dead = false;
3103 
3104     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3105         bool remove = dead;
3106         TCGLabel *label;
3107 
3108         switch (op->opc) {
3109         case INDEX_op_set_label:
3110             label = arg_label(op->args[0]);
3111 
3112             /*
3113              * Note that the first op in the TB is always a load,
3114              * so there is always something before a label.
3115              */
3116             op_prev = QTAILQ_PREV(op, link);
3117 
3118             /*
3119              * If we find two sequential labels, move all branches to
3120              * reference the second label and remove the first label.
3121              * Do this before branch to next optimization, so that the
3122              * middle label is out of the way.
3123              */
3124             if (op_prev->opc == INDEX_op_set_label) {
3125                 move_label_uses(label, arg_label(op_prev->args[0]));
3126                 tcg_op_remove(s, op_prev);
3127                 op_prev = QTAILQ_PREV(op, link);
3128             }
3129 
3130             /*
3131              * Optimization can fold conditional branches to unconditional.
3132              * If we find a label which is preceded by an unconditional
3133              * branch to next, remove the branch.  We couldn't do this when
3134              * processing the branch because any dead code between the branch
3135              * and label had not yet been removed.
3136              */
3137             if (op_prev->opc == INDEX_op_br &&
3138                 label == arg_label(op_prev->args[0])) {
3139                 tcg_op_remove(s, op_prev);
3140                 /* Fall through means insns become live again.  */
3141                 dead = false;
3142             }
3143 
3144             if (QSIMPLEQ_EMPTY(&label->branches)) {
3145                 /*
3146                  * While there is an occasional backward branch, virtually
3147                  * all branches generated by the translators are forward.
3148                  * Which means that generally we will have already removed
3149                  * all references to the label that will be, and there is
3150                  * little to be gained by iterating.
3151                  */
3152                 remove = true;
3153             } else {
3154                 /* Once we see a label, insns become live again.  */
3155                 dead = false;
3156                 remove = false;
3157             }
3158             break;
3159 
3160         case INDEX_op_br:
3161         case INDEX_op_exit_tb:
3162         case INDEX_op_goto_ptr:
3163             /* Unconditional branches; everything following is dead.  */
3164             dead = true;
3165             break;
3166 
3167         case INDEX_op_call:
3168             /* Notice noreturn helper calls, raising exceptions.  */
3169             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3170                 dead = true;
3171             }
3172             break;
3173 
3174         case INDEX_op_insn_start:
3175             /* Never remove -- we need to keep these for unwind.  */
3176             remove = false;
3177             break;
3178 
3179         default:
3180             break;
3181         }
3182 
3183         if (remove) {
3184             tcg_op_remove(s, op);
3185         }
3186     }
3187 }
3188 
3189 #define TS_DEAD  1
3190 #define TS_MEM   2
3191 
3192 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3193 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3194 
3195 /* For liveness_pass_1, the register preferences for a given temp.  */
3196 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3197 {
3198     return ts->state_ptr;
3199 }
3200 
3201 /* For liveness_pass_1, reset the preferences for a given temp to the
3202  * maximal regset for its type.
3203  */
3204 static inline void la_reset_pref(TCGTemp *ts)
3205 {
3206     *la_temp_pref(ts)
3207         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3208 }
3209 
3210 /* liveness analysis: end of function: all temps are dead, and globals
3211    should be in memory. */
3212 static void la_func_end(TCGContext *s, int ng, int nt)
3213 {
3214     int i;
3215 
3216     for (i = 0; i < ng; ++i) {
3217         s->temps[i].state = TS_DEAD | TS_MEM;
3218         la_reset_pref(&s->temps[i]);
3219     }
3220     for (i = ng; i < nt; ++i) {
3221         s->temps[i].state = TS_DEAD;
3222         la_reset_pref(&s->temps[i]);
3223     }
3224 }
3225 
3226 /* liveness analysis: end of basic block: all temps are dead, globals
3227    and local temps should be in memory. */
3228 static void la_bb_end(TCGContext *s, int ng, int nt)
3229 {
3230     int i;
3231 
3232     for (i = 0; i < nt; ++i) {
3233         TCGTemp *ts = &s->temps[i];
3234         int state;
3235 
3236         switch (ts->kind) {
3237         case TEMP_FIXED:
3238         case TEMP_GLOBAL:
3239         case TEMP_TB:
3240             state = TS_DEAD | TS_MEM;
3241             break;
3242         case TEMP_EBB:
3243         case TEMP_CONST:
3244             state = TS_DEAD;
3245             break;
3246         default:
3247             g_assert_not_reached();
3248         }
3249         ts->state = state;
3250         la_reset_pref(ts);
3251     }
3252 }
3253 
3254 /* liveness analysis: sync globals back to memory.  */
3255 static void la_global_sync(TCGContext *s, int ng)
3256 {
3257     int i;
3258 
3259     for (i = 0; i < ng; ++i) {
3260         int state = s->temps[i].state;
3261         s->temps[i].state = state | TS_MEM;
3262         if (state == TS_DEAD) {
3263             /* If the global was previously dead, reset prefs.  */
3264             la_reset_pref(&s->temps[i]);
3265         }
3266     }
3267 }
3268 
3269 /*
3270  * liveness analysis: conditional branch: all temps are dead unless
3271  * explicitly live-across-conditional-branch, globals and local temps
3272  * should be synced.
3273  */
3274 static void la_bb_sync(TCGContext *s, int ng, int nt)
3275 {
3276     la_global_sync(s, ng);
3277 
3278     for (int i = ng; i < nt; ++i) {
3279         TCGTemp *ts = &s->temps[i];
3280         int state;
3281 
3282         switch (ts->kind) {
3283         case TEMP_TB:
3284             state = ts->state;
3285             ts->state = state | TS_MEM;
3286             if (state != TS_DEAD) {
3287                 continue;
3288             }
3289             break;
3290         case TEMP_EBB:
3291         case TEMP_CONST:
3292             continue;
3293         default:
3294             g_assert_not_reached();
3295         }
3296         la_reset_pref(&s->temps[i]);
3297     }
3298 }
3299 
3300 /* liveness analysis: sync globals back to memory and kill.  */
3301 static void la_global_kill(TCGContext *s, int ng)
3302 {
3303     int i;
3304 
3305     for (i = 0; i < ng; i++) {
3306         s->temps[i].state = TS_DEAD | TS_MEM;
3307         la_reset_pref(&s->temps[i]);
3308     }
3309 }
3310 
3311 /* liveness analysis: note live globals crossing calls.  */
3312 static void la_cross_call(TCGContext *s, int nt)
3313 {
3314     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3315     int i;
3316 
3317     for (i = 0; i < nt; i++) {
3318         TCGTemp *ts = &s->temps[i];
3319         if (!(ts->state & TS_DEAD)) {
3320             TCGRegSet *pset = la_temp_pref(ts);
3321             TCGRegSet set = *pset;
3322 
3323             set &= mask;
3324             /* If the combination is not possible, restart.  */
3325             if (set == 0) {
3326                 set = tcg_target_available_regs[ts->type] & mask;
3327             }
3328             *pset = set;
3329         }
3330     }
3331 }
3332 
3333 /*
3334  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3335  * to TEMP_EBB, if possible.
3336  */
3337 static void __attribute__((noinline))
3338 liveness_pass_0(TCGContext *s)
3339 {
3340     void * const multiple_ebb = (void *)(uintptr_t)-1;
3341     int nb_temps = s->nb_temps;
3342     TCGOp *op, *ebb;
3343 
3344     for (int i = s->nb_globals; i < nb_temps; ++i) {
3345         s->temps[i].state_ptr = NULL;
3346     }
3347 
3348     /*
3349      * Represent each EBB by the op at which it begins.  In the case of
3350      * the first EBB, this is the first op, otherwise it is a label.
3351      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3352      * within a single EBB, else MULTIPLE_EBB.
3353      */
3354     ebb = QTAILQ_FIRST(&s->ops);
3355     QTAILQ_FOREACH(op, &s->ops, link) {
3356         const TCGOpDef *def;
3357         int nb_oargs, nb_iargs;
3358 
3359         switch (op->opc) {
3360         case INDEX_op_set_label:
3361             ebb = op;
3362             continue;
3363         case INDEX_op_discard:
3364             continue;
3365         case INDEX_op_call:
3366             nb_oargs = TCGOP_CALLO(op);
3367             nb_iargs = TCGOP_CALLI(op);
3368             break;
3369         default:
3370             def = &tcg_op_defs[op->opc];
3371             nb_oargs = def->nb_oargs;
3372             nb_iargs = def->nb_iargs;
3373             break;
3374         }
3375 
3376         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3377             TCGTemp *ts = arg_temp(op->args[i]);
3378 
3379             if (ts->kind != TEMP_TB) {
3380                 continue;
3381             }
3382             if (ts->state_ptr == NULL) {
3383                 ts->state_ptr = ebb;
3384             } else if (ts->state_ptr != ebb) {
3385                 ts->state_ptr = multiple_ebb;
3386             }
3387         }
3388     }
3389 
3390     /*
3391      * For TEMP_TB that turned out not to be used beyond one EBB,
3392      * reduce the liveness to TEMP_EBB.
3393      */
3394     for (int i = s->nb_globals; i < nb_temps; ++i) {
3395         TCGTemp *ts = &s->temps[i];
3396         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3397             ts->kind = TEMP_EBB;
3398         }
3399     }
3400 }
3401 
3402 /* Liveness analysis : update the opc_arg_life array to tell if a
3403    given input arguments is dead. Instructions updating dead
3404    temporaries are removed. */
3405 static void __attribute__((noinline))
3406 liveness_pass_1(TCGContext *s)
3407 {
3408     int nb_globals = s->nb_globals;
3409     int nb_temps = s->nb_temps;
3410     TCGOp *op, *op_prev;
3411     TCGRegSet *prefs;
3412     int i;
3413 
3414     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3415     for (i = 0; i < nb_temps; ++i) {
3416         s->temps[i].state_ptr = prefs + i;
3417     }
3418 
3419     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3420     la_func_end(s, nb_globals, nb_temps);
3421 
3422     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3423         int nb_iargs, nb_oargs;
3424         TCGOpcode opc_new, opc_new2;
3425         bool have_opc_new2;
3426         TCGLifeData arg_life = 0;
3427         TCGTemp *ts;
3428         TCGOpcode opc = op->opc;
3429         const TCGOpDef *def = &tcg_op_defs[opc];
3430 
3431         switch (opc) {
3432         case INDEX_op_call:
3433             {
3434                 const TCGHelperInfo *info = tcg_call_info(op);
3435                 int call_flags = tcg_call_flags(op);
3436 
3437                 nb_oargs = TCGOP_CALLO(op);
3438                 nb_iargs = TCGOP_CALLI(op);
3439 
3440                 /* pure functions can be removed if their result is unused */
3441                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3442                     for (i = 0; i < nb_oargs; i++) {
3443                         ts = arg_temp(op->args[i]);
3444                         if (ts->state != TS_DEAD) {
3445                             goto do_not_remove_call;
3446                         }
3447                     }
3448                     goto do_remove;
3449                 }
3450             do_not_remove_call:
3451 
3452                 /* Output args are dead.  */
3453                 for (i = 0; i < nb_oargs; i++) {
3454                     ts = arg_temp(op->args[i]);
3455                     if (ts->state & TS_DEAD) {
3456                         arg_life |= DEAD_ARG << i;
3457                     }
3458                     if (ts->state & TS_MEM) {
3459                         arg_life |= SYNC_ARG << i;
3460                     }
3461                     ts->state = TS_DEAD;
3462                     la_reset_pref(ts);
3463                 }
3464 
3465                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3466                 memset(op->output_pref, 0, sizeof(op->output_pref));
3467 
3468                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3469                                     TCG_CALL_NO_READ_GLOBALS))) {
3470                     la_global_kill(s, nb_globals);
3471                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3472                     la_global_sync(s, nb_globals);
3473                 }
3474 
3475                 /* Record arguments that die in this helper.  */
3476                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3477                     ts = arg_temp(op->args[i]);
3478                     if (ts->state & TS_DEAD) {
3479                         arg_life |= DEAD_ARG << i;
3480                     }
3481                 }
3482 
3483                 /* For all live registers, remove call-clobbered prefs.  */
3484                 la_cross_call(s, nb_temps);
3485 
3486                 /*
3487                  * Input arguments are live for preceding opcodes.
3488                  *
3489                  * For those arguments that die, and will be allocated in
3490                  * registers, clear the register set for that arg, to be
3491                  * filled in below.  For args that will be on the stack,
3492                  * reset to any available reg.  Process arguments in reverse
3493                  * order so that if a temp is used more than once, the stack
3494                  * reset to max happens before the register reset to 0.
3495                  */
3496                 for (i = nb_iargs - 1; i >= 0; i--) {
3497                     const TCGCallArgumentLoc *loc = &info->in[i];
3498                     ts = arg_temp(op->args[nb_oargs + i]);
3499 
3500                     if (ts->state & TS_DEAD) {
3501                         switch (loc->kind) {
3502                         case TCG_CALL_ARG_NORMAL:
3503                         case TCG_CALL_ARG_EXTEND_U:
3504                         case TCG_CALL_ARG_EXTEND_S:
3505                             if (arg_slot_reg_p(loc->arg_slot)) {
3506                                 *la_temp_pref(ts) = 0;
3507                                 break;
3508                             }
3509                             /* fall through */
3510                         default:
3511                             *la_temp_pref(ts) =
3512                                 tcg_target_available_regs[ts->type];
3513                             break;
3514                         }
3515                         ts->state &= ~TS_DEAD;
3516                     }
3517                 }
3518 
3519                 /*
3520                  * For each input argument, add its input register to prefs.
3521                  * If a temp is used once, this produces a single set bit;
3522                  * if a temp is used multiple times, this produces a set.
3523                  */
3524                 for (i = 0; i < nb_iargs; i++) {
3525                     const TCGCallArgumentLoc *loc = &info->in[i];
3526                     ts = arg_temp(op->args[nb_oargs + i]);
3527 
3528                     switch (loc->kind) {
3529                     case TCG_CALL_ARG_NORMAL:
3530                     case TCG_CALL_ARG_EXTEND_U:
3531                     case TCG_CALL_ARG_EXTEND_S:
3532                         if (arg_slot_reg_p(loc->arg_slot)) {
3533                             tcg_regset_set_reg(*la_temp_pref(ts),
3534                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3535                         }
3536                         break;
3537                     default:
3538                         break;
3539                     }
3540                 }
3541             }
3542             break;
3543         case INDEX_op_insn_start:
3544             break;
3545         case INDEX_op_discard:
3546             /* mark the temporary as dead */
3547             ts = arg_temp(op->args[0]);
3548             ts->state = TS_DEAD;
3549             la_reset_pref(ts);
3550             break;
3551 
3552         case INDEX_op_add2_i32:
3553             opc_new = INDEX_op_add_i32;
3554             goto do_addsub2;
3555         case INDEX_op_sub2_i32:
3556             opc_new = INDEX_op_sub_i32;
3557             goto do_addsub2;
3558         case INDEX_op_add2_i64:
3559             opc_new = INDEX_op_add_i64;
3560             goto do_addsub2;
3561         case INDEX_op_sub2_i64:
3562             opc_new = INDEX_op_sub_i64;
3563         do_addsub2:
3564             nb_iargs = 4;
3565             nb_oargs = 2;
3566             /* Test if the high part of the operation is dead, but not
3567                the low part.  The result can be optimized to a simple
3568                add or sub.  This happens often for x86_64 guest when the
3569                cpu mode is set to 32 bit.  */
3570             if (arg_temp(op->args[1])->state == TS_DEAD) {
3571                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3572                     goto do_remove;
3573                 }
3574                 /* Replace the opcode and adjust the args in place,
3575                    leaving 3 unused args at the end.  */
3576                 op->opc = opc = opc_new;
3577                 op->args[1] = op->args[2];
3578                 op->args[2] = op->args[4];
3579                 /* Fall through and mark the single-word operation live.  */
3580                 nb_iargs = 2;
3581                 nb_oargs = 1;
3582             }
3583             goto do_not_remove;
3584 
3585         case INDEX_op_mulu2_i32:
3586             opc_new = INDEX_op_mul_i32;
3587             opc_new2 = INDEX_op_muluh_i32;
3588             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3589             goto do_mul2;
3590         case INDEX_op_muls2_i32:
3591             opc_new = INDEX_op_mul_i32;
3592             opc_new2 = INDEX_op_mulsh_i32;
3593             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3594             goto do_mul2;
3595         case INDEX_op_mulu2_i64:
3596             opc_new = INDEX_op_mul_i64;
3597             opc_new2 = INDEX_op_muluh_i64;
3598             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3599             goto do_mul2;
3600         case INDEX_op_muls2_i64:
3601             opc_new = INDEX_op_mul_i64;
3602             opc_new2 = INDEX_op_mulsh_i64;
3603             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3604             goto do_mul2;
3605         do_mul2:
3606             nb_iargs = 2;
3607             nb_oargs = 2;
3608             if (arg_temp(op->args[1])->state == TS_DEAD) {
3609                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3610                     /* Both parts of the operation are dead.  */
3611                     goto do_remove;
3612                 }
3613                 /* The high part of the operation is dead; generate the low. */
3614                 op->opc = opc = opc_new;
3615                 op->args[1] = op->args[2];
3616                 op->args[2] = op->args[3];
3617             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3618                 /* The low part of the operation is dead; generate the high. */
3619                 op->opc = opc = opc_new2;
3620                 op->args[0] = op->args[1];
3621                 op->args[1] = op->args[2];
3622                 op->args[2] = op->args[3];
3623             } else {
3624                 goto do_not_remove;
3625             }
3626             /* Mark the single-word operation live.  */
3627             nb_oargs = 1;
3628             goto do_not_remove;
3629 
3630         default:
3631             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3632             nb_iargs = def->nb_iargs;
3633             nb_oargs = def->nb_oargs;
3634 
3635             /* Test if the operation can be removed because all
3636                its outputs are dead. We assume that nb_oargs == 0
3637                implies side effects */
3638             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3639                 for (i = 0; i < nb_oargs; i++) {
3640                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3641                         goto do_not_remove;
3642                     }
3643                 }
3644                 goto do_remove;
3645             }
3646             goto do_not_remove;
3647 
3648         do_remove:
3649             tcg_op_remove(s, op);
3650             break;
3651 
3652         do_not_remove:
3653             for (i = 0; i < nb_oargs; i++) {
3654                 ts = arg_temp(op->args[i]);
3655 
3656                 /* Remember the preference of the uses that followed.  */
3657                 if (i < ARRAY_SIZE(op->output_pref)) {
3658                     op->output_pref[i] = *la_temp_pref(ts);
3659                 }
3660 
3661                 /* Output args are dead.  */
3662                 if (ts->state & TS_DEAD) {
3663                     arg_life |= DEAD_ARG << i;
3664                 }
3665                 if (ts->state & TS_MEM) {
3666                     arg_life |= SYNC_ARG << i;
3667                 }
3668                 ts->state = TS_DEAD;
3669                 la_reset_pref(ts);
3670             }
3671 
3672             /* If end of basic block, update.  */
3673             if (def->flags & TCG_OPF_BB_EXIT) {
3674                 la_func_end(s, nb_globals, nb_temps);
3675             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3676                 la_bb_sync(s, nb_globals, nb_temps);
3677             } else if (def->flags & TCG_OPF_BB_END) {
3678                 la_bb_end(s, nb_globals, nb_temps);
3679             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3680                 la_global_sync(s, nb_globals);
3681                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3682                     la_cross_call(s, nb_temps);
3683                 }
3684             }
3685 
3686             /* Record arguments that die in this opcode.  */
3687             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3688                 ts = arg_temp(op->args[i]);
3689                 if (ts->state & TS_DEAD) {
3690                     arg_life |= DEAD_ARG << i;
3691                 }
3692             }
3693 
3694             /* Input arguments are live for preceding opcodes.  */
3695             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3696                 ts = arg_temp(op->args[i]);
3697                 if (ts->state & TS_DEAD) {
3698                     /* For operands that were dead, initially allow
3699                        all regs for the type.  */
3700                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3701                     ts->state &= ~TS_DEAD;
3702                 }
3703             }
3704 
3705             /* Incorporate constraints for this operand.  */
3706             switch (opc) {
3707             case INDEX_op_mov_i32:
3708             case INDEX_op_mov_i64:
3709                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3710                    have proper constraints.  That said, special case
3711                    moves to propagate preferences backward.  */
3712                 if (IS_DEAD_ARG(1)) {
3713                     *la_temp_pref(arg_temp(op->args[0]))
3714                         = *la_temp_pref(arg_temp(op->args[1]));
3715                 }
3716                 break;
3717 
3718             default:
3719                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3720                     const TCGArgConstraint *ct = &def->args_ct[i];
3721                     TCGRegSet set, *pset;
3722 
3723                     ts = arg_temp(op->args[i]);
3724                     pset = la_temp_pref(ts);
3725                     set = *pset;
3726 
3727                     set &= ct->regs;
3728                     if (ct->ialias) {
3729                         set &= output_pref(op, ct->alias_index);
3730                     }
3731                     /* If the combination is not possible, restart.  */
3732                     if (set == 0) {
3733                         set = ct->regs;
3734                     }
3735                     *pset = set;
3736                 }
3737                 break;
3738             }
3739             break;
3740         }
3741         op->life = arg_life;
3742     }
3743 }
3744 
3745 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3746 static bool __attribute__((noinline))
3747 liveness_pass_2(TCGContext *s)
3748 {
3749     int nb_globals = s->nb_globals;
3750     int nb_temps, i;
3751     bool changes = false;
3752     TCGOp *op, *op_next;
3753 
3754     /* Create a temporary for each indirect global.  */
3755     for (i = 0; i < nb_globals; ++i) {
3756         TCGTemp *its = &s->temps[i];
3757         if (its->indirect_reg) {
3758             TCGTemp *dts = tcg_temp_alloc(s);
3759             dts->type = its->type;
3760             dts->base_type = its->base_type;
3761             dts->temp_subindex = its->temp_subindex;
3762             dts->kind = TEMP_EBB;
3763             its->state_ptr = dts;
3764         } else {
3765             its->state_ptr = NULL;
3766         }
3767         /* All globals begin dead.  */
3768         its->state = TS_DEAD;
3769     }
3770     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3771         TCGTemp *its = &s->temps[i];
3772         its->state_ptr = NULL;
3773         its->state = TS_DEAD;
3774     }
3775 
3776     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3777         TCGOpcode opc = op->opc;
3778         const TCGOpDef *def = &tcg_op_defs[opc];
3779         TCGLifeData arg_life = op->life;
3780         int nb_iargs, nb_oargs, call_flags;
3781         TCGTemp *arg_ts, *dir_ts;
3782 
3783         if (opc == INDEX_op_call) {
3784             nb_oargs = TCGOP_CALLO(op);
3785             nb_iargs = TCGOP_CALLI(op);
3786             call_flags = tcg_call_flags(op);
3787         } else {
3788             nb_iargs = def->nb_iargs;
3789             nb_oargs = def->nb_oargs;
3790 
3791             /* Set flags similar to how calls require.  */
3792             if (def->flags & TCG_OPF_COND_BRANCH) {
3793                 /* Like reading globals: sync_globals */
3794                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3795             } else if (def->flags & TCG_OPF_BB_END) {
3796                 /* Like writing globals: save_globals */
3797                 call_flags = 0;
3798             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3799                 /* Like reading globals: sync_globals */
3800                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3801             } else {
3802                 /* No effect on globals.  */
3803                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3804                               TCG_CALL_NO_WRITE_GLOBALS);
3805             }
3806         }
3807 
3808         /* Make sure that input arguments are available.  */
3809         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3810             arg_ts = arg_temp(op->args[i]);
3811             dir_ts = arg_ts->state_ptr;
3812             if (dir_ts && arg_ts->state == TS_DEAD) {
3813                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3814                                   ? INDEX_op_ld_i32
3815                                   : INDEX_op_ld_i64);
3816                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3817 
3818                 lop->args[0] = temp_arg(dir_ts);
3819                 lop->args[1] = temp_arg(arg_ts->mem_base);
3820                 lop->args[2] = arg_ts->mem_offset;
3821 
3822                 /* Loaded, but synced with memory.  */
3823                 arg_ts->state = TS_MEM;
3824             }
3825         }
3826 
3827         /* Perform input replacement, and mark inputs that became dead.
3828            No action is required except keeping temp_state up to date
3829            so that we reload when needed.  */
3830         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3831             arg_ts = arg_temp(op->args[i]);
3832             dir_ts = arg_ts->state_ptr;
3833             if (dir_ts) {
3834                 op->args[i] = temp_arg(dir_ts);
3835                 changes = true;
3836                 if (IS_DEAD_ARG(i)) {
3837                     arg_ts->state = TS_DEAD;
3838                 }
3839             }
3840         }
3841 
3842         /* Liveness analysis should ensure that the following are
3843            all correct, for call sites and basic block end points.  */
3844         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3845             /* Nothing to do */
3846         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3847             for (i = 0; i < nb_globals; ++i) {
3848                 /* Liveness should see that globals are synced back,
3849                    that is, either TS_DEAD or TS_MEM.  */
3850                 arg_ts = &s->temps[i];
3851                 tcg_debug_assert(arg_ts->state_ptr == 0
3852                                  || arg_ts->state != 0);
3853             }
3854         } else {
3855             for (i = 0; i < nb_globals; ++i) {
3856                 /* Liveness should see that globals are saved back,
3857                    that is, TS_DEAD, waiting to be reloaded.  */
3858                 arg_ts = &s->temps[i];
3859                 tcg_debug_assert(arg_ts->state_ptr == 0
3860                                  || arg_ts->state == TS_DEAD);
3861             }
3862         }
3863 
3864         /* Outputs become available.  */
3865         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3866             arg_ts = arg_temp(op->args[0]);
3867             dir_ts = arg_ts->state_ptr;
3868             if (dir_ts) {
3869                 op->args[0] = temp_arg(dir_ts);
3870                 changes = true;
3871 
3872                 /* The output is now live and modified.  */
3873                 arg_ts->state = 0;
3874 
3875                 if (NEED_SYNC_ARG(0)) {
3876                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3877                                       ? INDEX_op_st_i32
3878                                       : INDEX_op_st_i64);
3879                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3880                     TCGTemp *out_ts = dir_ts;
3881 
3882                     if (IS_DEAD_ARG(0)) {
3883                         out_ts = arg_temp(op->args[1]);
3884                         arg_ts->state = TS_DEAD;
3885                         tcg_op_remove(s, op);
3886                     } else {
3887                         arg_ts->state = TS_MEM;
3888                     }
3889 
3890                     sop->args[0] = temp_arg(out_ts);
3891                     sop->args[1] = temp_arg(arg_ts->mem_base);
3892                     sop->args[2] = arg_ts->mem_offset;
3893                 } else {
3894                     tcg_debug_assert(!IS_DEAD_ARG(0));
3895                 }
3896             }
3897         } else {
3898             for (i = 0; i < nb_oargs; i++) {
3899                 arg_ts = arg_temp(op->args[i]);
3900                 dir_ts = arg_ts->state_ptr;
3901                 if (!dir_ts) {
3902                     continue;
3903                 }
3904                 op->args[i] = temp_arg(dir_ts);
3905                 changes = true;
3906 
3907                 /* The output is now live and modified.  */
3908                 arg_ts->state = 0;
3909 
3910                 /* Sync outputs upon their last write.  */
3911                 if (NEED_SYNC_ARG(i)) {
3912                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3913                                       ? INDEX_op_st_i32
3914                                       : INDEX_op_st_i64);
3915                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3916 
3917                     sop->args[0] = temp_arg(dir_ts);
3918                     sop->args[1] = temp_arg(arg_ts->mem_base);
3919                     sop->args[2] = arg_ts->mem_offset;
3920 
3921                     arg_ts->state = TS_MEM;
3922                 }
3923                 /* Drop outputs that are dead.  */
3924                 if (IS_DEAD_ARG(i)) {
3925                     arg_ts->state = TS_DEAD;
3926                 }
3927             }
3928         }
3929     }
3930 
3931     return changes;
3932 }
3933 
3934 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3935 {
3936     intptr_t off;
3937     int size, align;
3938 
3939     /* When allocating an object, look at the full type. */
3940     size = tcg_type_size(ts->base_type);
3941     switch (ts->base_type) {
3942     case TCG_TYPE_I32:
3943         align = 4;
3944         break;
3945     case TCG_TYPE_I64:
3946     case TCG_TYPE_V64:
3947         align = 8;
3948         break;
3949     case TCG_TYPE_I128:
3950     case TCG_TYPE_V128:
3951     case TCG_TYPE_V256:
3952         /*
3953          * Note that we do not require aligned storage for V256,
3954          * and that we provide alignment for I128 to match V128,
3955          * even if that's above what the host ABI requires.
3956          */
3957         align = 16;
3958         break;
3959     default:
3960         g_assert_not_reached();
3961     }
3962 
3963     /*
3964      * Assume the stack is sufficiently aligned.
3965      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3966      * and do not require 16 byte vector alignment.  This seems slightly
3967      * easier than fully parameterizing the above switch statement.
3968      */
3969     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3970     off = ROUND_UP(s->current_frame_offset, align);
3971 
3972     /* If we've exhausted the stack frame, restart with a smaller TB. */
3973     if (off + size > s->frame_end) {
3974         tcg_raise_tb_overflow(s);
3975     }
3976     s->current_frame_offset = off + size;
3977 #if defined(__sparc__)
3978     off += TCG_TARGET_STACK_BIAS;
3979 #endif
3980 
3981     /* If the object was subdivided, assign memory to all the parts. */
3982     if (ts->base_type != ts->type) {
3983         int part_size = tcg_type_size(ts->type);
3984         int part_count = size / part_size;
3985 
3986         /*
3987          * Each part is allocated sequentially in tcg_temp_new_internal.
3988          * Jump back to the first part by subtracting the current index.
3989          */
3990         ts -= ts->temp_subindex;
3991         for (int i = 0; i < part_count; ++i) {
3992             ts[i].mem_offset = off + i * part_size;
3993             ts[i].mem_base = s->frame_temp;
3994             ts[i].mem_allocated = 1;
3995         }
3996     } else {
3997         ts->mem_offset = off;
3998         ts->mem_base = s->frame_temp;
3999         ts->mem_allocated = 1;
4000     }
4001 }
4002 
4003 /* Assign @reg to @ts, and update reg_to_temp[]. */
4004 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4005 {
4006     if (ts->val_type == TEMP_VAL_REG) {
4007         TCGReg old = ts->reg;
4008         tcg_debug_assert(s->reg_to_temp[old] == ts);
4009         if (old == reg) {
4010             return;
4011         }
4012         s->reg_to_temp[old] = NULL;
4013     }
4014     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4015     s->reg_to_temp[reg] = ts;
4016     ts->val_type = TEMP_VAL_REG;
4017     ts->reg = reg;
4018 }
4019 
4020 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4021 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4022 {
4023     tcg_debug_assert(type != TEMP_VAL_REG);
4024     if (ts->val_type == TEMP_VAL_REG) {
4025         TCGReg reg = ts->reg;
4026         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4027         s->reg_to_temp[reg] = NULL;
4028     }
4029     ts->val_type = type;
4030 }
4031 
4032 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4033 
4034 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4035    mark it free; otherwise mark it dead.  */
4036 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4037 {
4038     TCGTempVal new_type;
4039 
4040     switch (ts->kind) {
4041     case TEMP_FIXED:
4042         return;
4043     case TEMP_GLOBAL:
4044     case TEMP_TB:
4045         new_type = TEMP_VAL_MEM;
4046         break;
4047     case TEMP_EBB:
4048         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4049         break;
4050     case TEMP_CONST:
4051         new_type = TEMP_VAL_CONST;
4052         break;
4053     default:
4054         g_assert_not_reached();
4055     }
4056     set_temp_val_nonreg(s, ts, new_type);
4057 }
4058 
4059 /* Mark a temporary as dead.  */
4060 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4061 {
4062     temp_free_or_dead(s, ts, 1);
4063 }
4064 
4065 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4066    registers needs to be allocated to store a constant.  If 'free_or_dead'
4067    is non-zero, subsequently release the temporary; if it is positive, the
4068    temp is dead; if it is negative, the temp is free.  */
4069 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4070                       TCGRegSet preferred_regs, int free_or_dead)
4071 {
4072     if (!temp_readonly(ts) && !ts->mem_coherent) {
4073         if (!ts->mem_allocated) {
4074             temp_allocate_frame(s, ts);
4075         }
4076         switch (ts->val_type) {
4077         case TEMP_VAL_CONST:
4078             /* If we're going to free the temp immediately, then we won't
4079                require it later in a register, so attempt to store the
4080                constant to memory directly.  */
4081             if (free_or_dead
4082                 && tcg_out_sti(s, ts->type, ts->val,
4083                                ts->mem_base->reg, ts->mem_offset)) {
4084                 break;
4085             }
4086             temp_load(s, ts, tcg_target_available_regs[ts->type],
4087                       allocated_regs, preferred_regs);
4088             /* fallthrough */
4089 
4090         case TEMP_VAL_REG:
4091             tcg_out_st(s, ts->type, ts->reg,
4092                        ts->mem_base->reg, ts->mem_offset);
4093             break;
4094 
4095         case TEMP_VAL_MEM:
4096             break;
4097 
4098         case TEMP_VAL_DEAD:
4099         default:
4100             g_assert_not_reached();
4101         }
4102         ts->mem_coherent = 1;
4103     }
4104     if (free_or_dead) {
4105         temp_free_or_dead(s, ts, free_or_dead);
4106     }
4107 }
4108 
4109 /* free register 'reg' by spilling the corresponding temporary if necessary */
4110 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4111 {
4112     TCGTemp *ts = s->reg_to_temp[reg];
4113     if (ts != NULL) {
4114         temp_sync(s, ts, allocated_regs, 0, -1);
4115     }
4116 }
4117 
4118 /**
4119  * tcg_reg_alloc:
4120  * @required_regs: Set of registers in which we must allocate.
4121  * @allocated_regs: Set of registers which must be avoided.
4122  * @preferred_regs: Set of registers we should prefer.
4123  * @rev: True if we search the registers in "indirect" order.
4124  *
4125  * The allocated register must be in @required_regs & ~@allocated_regs,
4126  * but if we can put it in @preferred_regs we may save a move later.
4127  */
4128 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4129                             TCGRegSet allocated_regs,
4130                             TCGRegSet preferred_regs, bool rev)
4131 {
4132     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4133     TCGRegSet reg_ct[2];
4134     const int *order;
4135 
4136     reg_ct[1] = required_regs & ~allocated_regs;
4137     tcg_debug_assert(reg_ct[1] != 0);
4138     reg_ct[0] = reg_ct[1] & preferred_regs;
4139 
4140     /* Skip the preferred_regs option if it cannot be satisfied,
4141        or if the preference made no difference.  */
4142     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4143 
4144     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4145 
4146     /* Try free registers, preferences first.  */
4147     for (j = f; j < 2; j++) {
4148         TCGRegSet set = reg_ct[j];
4149 
4150         if (tcg_regset_single(set)) {
4151             /* One register in the set.  */
4152             TCGReg reg = tcg_regset_first(set);
4153             if (s->reg_to_temp[reg] == NULL) {
4154                 return reg;
4155             }
4156         } else {
4157             for (i = 0; i < n; i++) {
4158                 TCGReg reg = order[i];
4159                 if (s->reg_to_temp[reg] == NULL &&
4160                     tcg_regset_test_reg(set, reg)) {
4161                     return reg;
4162                 }
4163             }
4164         }
4165     }
4166 
4167     /* We must spill something.  */
4168     for (j = f; j < 2; j++) {
4169         TCGRegSet set = reg_ct[j];
4170 
4171         if (tcg_regset_single(set)) {
4172             /* One register in the set.  */
4173             TCGReg reg = tcg_regset_first(set);
4174             tcg_reg_free(s, reg, allocated_regs);
4175             return reg;
4176         } else {
4177             for (i = 0; i < n; i++) {
4178                 TCGReg reg = order[i];
4179                 if (tcg_regset_test_reg(set, reg)) {
4180                     tcg_reg_free(s, reg, allocated_regs);
4181                     return reg;
4182                 }
4183             }
4184         }
4185     }
4186 
4187     g_assert_not_reached();
4188 }
4189 
4190 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4191                                  TCGRegSet allocated_regs,
4192                                  TCGRegSet preferred_regs, bool rev)
4193 {
4194     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4195     TCGRegSet reg_ct[2];
4196     const int *order;
4197 
4198     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4199     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4200     tcg_debug_assert(reg_ct[1] != 0);
4201     reg_ct[0] = reg_ct[1] & preferred_regs;
4202 
4203     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4204 
4205     /*
4206      * Skip the preferred_regs option if it cannot be satisfied,
4207      * or if the preference made no difference.
4208      */
4209     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4210 
4211     /*
4212      * Minimize the number of flushes by looking for 2 free registers first,
4213      * then a single flush, then two flushes.
4214      */
4215     for (fmin = 2; fmin >= 0; fmin--) {
4216         for (j = k; j < 2; j++) {
4217             TCGRegSet set = reg_ct[j];
4218 
4219             for (i = 0; i < n; i++) {
4220                 TCGReg reg = order[i];
4221 
4222                 if (tcg_regset_test_reg(set, reg)) {
4223                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4224                     if (f >= fmin) {
4225                         tcg_reg_free(s, reg, allocated_regs);
4226                         tcg_reg_free(s, reg + 1, allocated_regs);
4227                         return reg;
4228                     }
4229                 }
4230             }
4231         }
4232     }
4233     g_assert_not_reached();
4234 }
4235 
4236 /* Make sure the temporary is in a register.  If needed, allocate the register
4237    from DESIRED while avoiding ALLOCATED.  */
4238 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4239                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4240 {
4241     TCGReg reg;
4242 
4243     switch (ts->val_type) {
4244     case TEMP_VAL_REG:
4245         return;
4246     case TEMP_VAL_CONST:
4247         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4248                             preferred_regs, ts->indirect_base);
4249         if (ts->type <= TCG_TYPE_I64) {
4250             tcg_out_movi(s, ts->type, reg, ts->val);
4251         } else {
4252             uint64_t val = ts->val;
4253             MemOp vece = MO_64;
4254 
4255             /*
4256              * Find the minimal vector element that matches the constant.
4257              * The targets will, in general, have to do this search anyway,
4258              * do this generically.
4259              */
4260             if (val == dup_const(MO_8, val)) {
4261                 vece = MO_8;
4262             } else if (val == dup_const(MO_16, val)) {
4263                 vece = MO_16;
4264             } else if (val == dup_const(MO_32, val)) {
4265                 vece = MO_32;
4266             }
4267 
4268             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4269         }
4270         ts->mem_coherent = 0;
4271         break;
4272     case TEMP_VAL_MEM:
4273         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4274                             preferred_regs, ts->indirect_base);
4275         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4276         ts->mem_coherent = 1;
4277         break;
4278     case TEMP_VAL_DEAD:
4279     default:
4280         g_assert_not_reached();
4281     }
4282     set_temp_val_reg(s, ts, reg);
4283 }
4284 
4285 /* Save a temporary to memory. 'allocated_regs' is used in case a
4286    temporary registers needs to be allocated to store a constant.  */
4287 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4288 {
4289     /* The liveness analysis already ensures that globals are back
4290        in memory. Keep an tcg_debug_assert for safety. */
4291     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4292 }
4293 
4294 /* save globals to their canonical location and assume they can be
4295    modified be the following code. 'allocated_regs' is used in case a
4296    temporary registers needs to be allocated to store a constant. */
4297 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4298 {
4299     int i, n;
4300 
4301     for (i = 0, n = s->nb_globals; i < n; i++) {
4302         temp_save(s, &s->temps[i], allocated_regs);
4303     }
4304 }
4305 
4306 /* sync globals to their canonical location and assume they can be
4307    read by the following code. 'allocated_regs' is used in case a
4308    temporary registers needs to be allocated to store a constant. */
4309 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4310 {
4311     int i, n;
4312 
4313     for (i = 0, n = s->nb_globals; i < n; i++) {
4314         TCGTemp *ts = &s->temps[i];
4315         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4316                          || ts->kind == TEMP_FIXED
4317                          || ts->mem_coherent);
4318     }
4319 }
4320 
4321 /* at the end of a basic block, we assume all temporaries are dead and
4322    all globals are stored at their canonical location. */
4323 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4324 {
4325     int i;
4326 
4327     for (i = s->nb_globals; i < s->nb_temps; i++) {
4328         TCGTemp *ts = &s->temps[i];
4329 
4330         switch (ts->kind) {
4331         case TEMP_TB:
4332             temp_save(s, ts, allocated_regs);
4333             break;
4334         case TEMP_EBB:
4335             /* The liveness analysis already ensures that temps are dead.
4336                Keep an tcg_debug_assert for safety. */
4337             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4338             break;
4339         case TEMP_CONST:
4340             /* Similarly, we should have freed any allocated register. */
4341             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4342             break;
4343         default:
4344             g_assert_not_reached();
4345         }
4346     }
4347 
4348     save_globals(s, allocated_regs);
4349 }
4350 
4351 /*
4352  * At a conditional branch, we assume all temporaries are dead unless
4353  * explicitly live-across-conditional-branch; all globals and local
4354  * temps are synced to their location.
4355  */
4356 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4357 {
4358     sync_globals(s, allocated_regs);
4359 
4360     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4361         TCGTemp *ts = &s->temps[i];
4362         /*
4363          * The liveness analysis already ensures that temps are dead.
4364          * Keep tcg_debug_asserts for safety.
4365          */
4366         switch (ts->kind) {
4367         case TEMP_TB:
4368             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4369             break;
4370         case TEMP_EBB:
4371         case TEMP_CONST:
4372             break;
4373         default:
4374             g_assert_not_reached();
4375         }
4376     }
4377 }
4378 
4379 /*
4380  * Specialized code generation for INDEX_op_mov_* with a constant.
4381  */
4382 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4383                                   tcg_target_ulong val, TCGLifeData arg_life,
4384                                   TCGRegSet preferred_regs)
4385 {
4386     /* ENV should not be modified.  */
4387     tcg_debug_assert(!temp_readonly(ots));
4388 
4389     /* The movi is not explicitly generated here.  */
4390     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4391     ots->val = val;
4392     ots->mem_coherent = 0;
4393     if (NEED_SYNC_ARG(0)) {
4394         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4395     } else if (IS_DEAD_ARG(0)) {
4396         temp_dead(s, ots);
4397     }
4398 }
4399 
4400 /*
4401  * Specialized code generation for INDEX_op_mov_*.
4402  */
4403 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4404 {
4405     const TCGLifeData arg_life = op->life;
4406     TCGRegSet allocated_regs, preferred_regs;
4407     TCGTemp *ts, *ots;
4408     TCGType otype, itype;
4409     TCGReg oreg, ireg;
4410 
4411     allocated_regs = s->reserved_regs;
4412     preferred_regs = output_pref(op, 0);
4413     ots = arg_temp(op->args[0]);
4414     ts = arg_temp(op->args[1]);
4415 
4416     /* ENV should not be modified.  */
4417     tcg_debug_assert(!temp_readonly(ots));
4418 
4419     /* Note that otype != itype for no-op truncation.  */
4420     otype = ots->type;
4421     itype = ts->type;
4422 
4423     if (ts->val_type == TEMP_VAL_CONST) {
4424         /* propagate constant or generate sti */
4425         tcg_target_ulong val = ts->val;
4426         if (IS_DEAD_ARG(1)) {
4427             temp_dead(s, ts);
4428         }
4429         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4430         return;
4431     }
4432 
4433     /* If the source value is in memory we're going to be forced
4434        to have it in a register in order to perform the copy.  Copy
4435        the SOURCE value into its own register first, that way we
4436        don't have to reload SOURCE the next time it is used. */
4437     if (ts->val_type == TEMP_VAL_MEM) {
4438         temp_load(s, ts, tcg_target_available_regs[itype],
4439                   allocated_regs, preferred_regs);
4440     }
4441     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4442     ireg = ts->reg;
4443 
4444     if (IS_DEAD_ARG(0)) {
4445         /* mov to a non-saved dead register makes no sense (even with
4446            liveness analysis disabled). */
4447         tcg_debug_assert(NEED_SYNC_ARG(0));
4448         if (!ots->mem_allocated) {
4449             temp_allocate_frame(s, ots);
4450         }
4451         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4452         if (IS_DEAD_ARG(1)) {
4453             temp_dead(s, ts);
4454         }
4455         temp_dead(s, ots);
4456         return;
4457     }
4458 
4459     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4460         /*
4461          * The mov can be suppressed.  Kill input first, so that it
4462          * is unlinked from reg_to_temp, then set the output to the
4463          * reg that we saved from the input.
4464          */
4465         temp_dead(s, ts);
4466         oreg = ireg;
4467     } else {
4468         if (ots->val_type == TEMP_VAL_REG) {
4469             oreg = ots->reg;
4470         } else {
4471             /* Make sure to not spill the input register during allocation. */
4472             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4473                                  allocated_regs | ((TCGRegSet)1 << ireg),
4474                                  preferred_regs, ots->indirect_base);
4475         }
4476         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4477             /*
4478              * Cross register class move not supported.
4479              * Store the source register into the destination slot
4480              * and leave the destination temp as TEMP_VAL_MEM.
4481              */
4482             assert(!temp_readonly(ots));
4483             if (!ts->mem_allocated) {
4484                 temp_allocate_frame(s, ots);
4485             }
4486             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4487             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4488             ots->mem_coherent = 1;
4489             return;
4490         }
4491     }
4492     set_temp_val_reg(s, ots, oreg);
4493     ots->mem_coherent = 0;
4494 
4495     if (NEED_SYNC_ARG(0)) {
4496         temp_sync(s, ots, allocated_regs, 0, 0);
4497     }
4498 }
4499 
4500 /*
4501  * Specialized code generation for INDEX_op_dup_vec.
4502  */
4503 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4504 {
4505     const TCGLifeData arg_life = op->life;
4506     TCGRegSet dup_out_regs, dup_in_regs;
4507     TCGTemp *its, *ots;
4508     TCGType itype, vtype;
4509     unsigned vece;
4510     int lowpart_ofs;
4511     bool ok;
4512 
4513     ots = arg_temp(op->args[0]);
4514     its = arg_temp(op->args[1]);
4515 
4516     /* ENV should not be modified.  */
4517     tcg_debug_assert(!temp_readonly(ots));
4518 
4519     itype = its->type;
4520     vece = TCGOP_VECE(op);
4521     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4522 
4523     if (its->val_type == TEMP_VAL_CONST) {
4524         /* Propagate constant via movi -> dupi.  */
4525         tcg_target_ulong val = its->val;
4526         if (IS_DEAD_ARG(1)) {
4527             temp_dead(s, its);
4528         }
4529         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4530         return;
4531     }
4532 
4533     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4534     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4535 
4536     /* Allocate the output register now.  */
4537     if (ots->val_type != TEMP_VAL_REG) {
4538         TCGRegSet allocated_regs = s->reserved_regs;
4539         TCGReg oreg;
4540 
4541         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4542             /* Make sure to not spill the input register. */
4543             tcg_regset_set_reg(allocated_regs, its->reg);
4544         }
4545         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4546                              output_pref(op, 0), ots->indirect_base);
4547         set_temp_val_reg(s, ots, oreg);
4548     }
4549 
4550     switch (its->val_type) {
4551     case TEMP_VAL_REG:
4552         /*
4553          * The dup constriaints must be broad, covering all possible VECE.
4554          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4555          * to fail, indicating that extra moves are required for that case.
4556          */
4557         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4558             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4559                 goto done;
4560             }
4561             /* Try again from memory or a vector input register.  */
4562         }
4563         if (!its->mem_coherent) {
4564             /*
4565              * The input register is not synced, and so an extra store
4566              * would be required to use memory.  Attempt an integer-vector
4567              * register move first.  We do not have a TCGRegSet for this.
4568              */
4569             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4570                 break;
4571             }
4572             /* Sync the temp back to its slot and load from there.  */
4573             temp_sync(s, its, s->reserved_regs, 0, 0);
4574         }
4575         /* fall through */
4576 
4577     case TEMP_VAL_MEM:
4578         lowpart_ofs = 0;
4579         if (HOST_BIG_ENDIAN) {
4580             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4581         }
4582         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4583                              its->mem_offset + lowpart_ofs)) {
4584             goto done;
4585         }
4586         /* Load the input into the destination vector register. */
4587         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4588         break;
4589 
4590     default:
4591         g_assert_not_reached();
4592     }
4593 
4594     /* We now have a vector input register, so dup must succeed. */
4595     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4596     tcg_debug_assert(ok);
4597 
4598  done:
4599     ots->mem_coherent = 0;
4600     if (IS_DEAD_ARG(1)) {
4601         temp_dead(s, its);
4602     }
4603     if (NEED_SYNC_ARG(0)) {
4604         temp_sync(s, ots, s->reserved_regs, 0, 0);
4605     }
4606     if (IS_DEAD_ARG(0)) {
4607         temp_dead(s, ots);
4608     }
4609 }
4610 
4611 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4612 {
4613     const TCGLifeData arg_life = op->life;
4614     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4615     TCGRegSet i_allocated_regs;
4616     TCGRegSet o_allocated_regs;
4617     int i, k, nb_iargs, nb_oargs;
4618     TCGReg reg;
4619     TCGArg arg;
4620     const TCGArgConstraint *arg_ct;
4621     TCGTemp *ts;
4622     TCGArg new_args[TCG_MAX_OP_ARGS];
4623     int const_args[TCG_MAX_OP_ARGS];
4624 
4625     nb_oargs = def->nb_oargs;
4626     nb_iargs = def->nb_iargs;
4627 
4628     /* copy constants */
4629     memcpy(new_args + nb_oargs + nb_iargs,
4630            op->args + nb_oargs + nb_iargs,
4631            sizeof(TCGArg) * def->nb_cargs);
4632 
4633     i_allocated_regs = s->reserved_regs;
4634     o_allocated_regs = s->reserved_regs;
4635 
4636     /* satisfy input constraints */
4637     for (k = 0; k < nb_iargs; k++) {
4638         TCGRegSet i_preferred_regs, i_required_regs;
4639         bool allocate_new_reg, copyto_new_reg;
4640         TCGTemp *ts2;
4641         int i1, i2;
4642 
4643         i = def->args_ct[nb_oargs + k].sort_index;
4644         arg = op->args[i];
4645         arg_ct = &def->args_ct[i];
4646         ts = arg_temp(arg);
4647 
4648         if (ts->val_type == TEMP_VAL_CONST
4649             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4650             /* constant is OK for instruction */
4651             const_args[i] = 1;
4652             new_args[i] = ts->val;
4653             continue;
4654         }
4655 
4656         reg = ts->reg;
4657         i_preferred_regs = 0;
4658         i_required_regs = arg_ct->regs;
4659         allocate_new_reg = false;
4660         copyto_new_reg = false;
4661 
4662         switch (arg_ct->pair) {
4663         case 0: /* not paired */
4664             if (arg_ct->ialias) {
4665                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4666 
4667                 /*
4668                  * If the input is readonly, then it cannot also be an
4669                  * output and aliased to itself.  If the input is not
4670                  * dead after the instruction, we must allocate a new
4671                  * register and move it.
4672                  */
4673                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4674                     allocate_new_reg = true;
4675                 } else if (ts->val_type == TEMP_VAL_REG) {
4676                     /*
4677                      * Check if the current register has already been
4678                      * allocated for another input.
4679                      */
4680                     allocate_new_reg =
4681                         tcg_regset_test_reg(i_allocated_regs, reg);
4682                 }
4683             }
4684             if (!allocate_new_reg) {
4685                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4686                           i_preferred_regs);
4687                 reg = ts->reg;
4688                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4689             }
4690             if (allocate_new_reg) {
4691                 /*
4692                  * Allocate a new register matching the constraint
4693                  * and move the temporary register into it.
4694                  */
4695                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4696                           i_allocated_regs, 0);
4697                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4698                                     i_preferred_regs, ts->indirect_base);
4699                 copyto_new_reg = true;
4700             }
4701             break;
4702 
4703         case 1:
4704             /* First of an input pair; if i1 == i2, the second is an output. */
4705             i1 = i;
4706             i2 = arg_ct->pair_index;
4707             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4708 
4709             /*
4710              * It is easier to default to allocating a new pair
4711              * and to identify a few cases where it's not required.
4712              */
4713             if (arg_ct->ialias) {
4714                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4715                 if (IS_DEAD_ARG(i1) &&
4716                     IS_DEAD_ARG(i2) &&
4717                     !temp_readonly(ts) &&
4718                     ts->val_type == TEMP_VAL_REG &&
4719                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4720                     tcg_regset_test_reg(i_required_regs, reg) &&
4721                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4722                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4723                     (ts2
4724                      ? ts2->val_type == TEMP_VAL_REG &&
4725                        ts2->reg == reg + 1 &&
4726                        !temp_readonly(ts2)
4727                      : s->reg_to_temp[reg + 1] == NULL)) {
4728                     break;
4729                 }
4730             } else {
4731                 /* Without aliasing, the pair must also be an input. */
4732                 tcg_debug_assert(ts2);
4733                 if (ts->val_type == TEMP_VAL_REG &&
4734                     ts2->val_type == TEMP_VAL_REG &&
4735                     ts2->reg == reg + 1 &&
4736                     tcg_regset_test_reg(i_required_regs, reg)) {
4737                     break;
4738                 }
4739             }
4740             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4741                                      0, ts->indirect_base);
4742             goto do_pair;
4743 
4744         case 2: /* pair second */
4745             reg = new_args[arg_ct->pair_index] + 1;
4746             goto do_pair;
4747 
4748         case 3: /* ialias with second output, no first input */
4749             tcg_debug_assert(arg_ct->ialias);
4750             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4751 
4752             if (IS_DEAD_ARG(i) &&
4753                 !temp_readonly(ts) &&
4754                 ts->val_type == TEMP_VAL_REG &&
4755                 reg > 0 &&
4756                 s->reg_to_temp[reg - 1] == NULL &&
4757                 tcg_regset_test_reg(i_required_regs, reg) &&
4758                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4759                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4760                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4761                 break;
4762             }
4763             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4764                                      i_allocated_regs, 0,
4765                                      ts->indirect_base);
4766             tcg_regset_set_reg(i_allocated_regs, reg);
4767             reg += 1;
4768             goto do_pair;
4769 
4770         do_pair:
4771             /*
4772              * If an aliased input is not dead after the instruction,
4773              * we must allocate a new register and move it.
4774              */
4775             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4776                 TCGRegSet t_allocated_regs = i_allocated_regs;
4777 
4778                 /*
4779                  * Because of the alias, and the continued life, make sure
4780                  * that the temp is somewhere *other* than the reg pair,
4781                  * and we get a copy in reg.
4782                  */
4783                 tcg_regset_set_reg(t_allocated_regs, reg);
4784                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4785                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4786                     /* If ts was already in reg, copy it somewhere else. */
4787                     TCGReg nr;
4788                     bool ok;
4789 
4790                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4791                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4792                                        t_allocated_regs, 0, ts->indirect_base);
4793                     ok = tcg_out_mov(s, ts->type, nr, reg);
4794                     tcg_debug_assert(ok);
4795 
4796                     set_temp_val_reg(s, ts, nr);
4797                 } else {
4798                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4799                               t_allocated_regs, 0);
4800                     copyto_new_reg = true;
4801                 }
4802             } else {
4803                 /* Preferably allocate to reg, otherwise copy. */
4804                 i_required_regs = (TCGRegSet)1 << reg;
4805                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4806                           i_preferred_regs);
4807                 copyto_new_reg = ts->reg != reg;
4808             }
4809             break;
4810 
4811         default:
4812             g_assert_not_reached();
4813         }
4814 
4815         if (copyto_new_reg) {
4816             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4817                 /*
4818                  * Cross register class move not supported.  Sync the
4819                  * temp back to its slot and load from there.
4820                  */
4821                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4822                 tcg_out_ld(s, ts->type, reg,
4823                            ts->mem_base->reg, ts->mem_offset);
4824             }
4825         }
4826         new_args[i] = reg;
4827         const_args[i] = 0;
4828         tcg_regset_set_reg(i_allocated_regs, reg);
4829     }
4830 
4831     /* mark dead temporaries and free the associated registers */
4832     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4833         if (IS_DEAD_ARG(i)) {
4834             temp_dead(s, arg_temp(op->args[i]));
4835         }
4836     }
4837 
4838     if (def->flags & TCG_OPF_COND_BRANCH) {
4839         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4840     } else if (def->flags & TCG_OPF_BB_END) {
4841         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4842     } else {
4843         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4844             /* XXX: permit generic clobber register list ? */
4845             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4846                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4847                     tcg_reg_free(s, i, i_allocated_regs);
4848                 }
4849             }
4850         }
4851         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4852             /* sync globals if the op has side effects and might trigger
4853                an exception. */
4854             sync_globals(s, i_allocated_regs);
4855         }
4856 
4857         /* satisfy the output constraints */
4858         for(k = 0; k < nb_oargs; k++) {
4859             i = def->args_ct[k].sort_index;
4860             arg = op->args[i];
4861             arg_ct = &def->args_ct[i];
4862             ts = arg_temp(arg);
4863 
4864             /* ENV should not be modified.  */
4865             tcg_debug_assert(!temp_readonly(ts));
4866 
4867             switch (arg_ct->pair) {
4868             case 0: /* not paired */
4869                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4870                     reg = new_args[arg_ct->alias_index];
4871                 } else if (arg_ct->newreg) {
4872                     reg = tcg_reg_alloc(s, arg_ct->regs,
4873                                         i_allocated_regs | o_allocated_regs,
4874                                         output_pref(op, k), ts->indirect_base);
4875                 } else {
4876                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4877                                         output_pref(op, k), ts->indirect_base);
4878                 }
4879                 break;
4880 
4881             case 1: /* first of pair */
4882                 tcg_debug_assert(!arg_ct->newreg);
4883                 if (arg_ct->oalias) {
4884                     reg = new_args[arg_ct->alias_index];
4885                     break;
4886                 }
4887                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4888                                          output_pref(op, k), ts->indirect_base);
4889                 break;
4890 
4891             case 2: /* second of pair */
4892                 tcg_debug_assert(!arg_ct->newreg);
4893                 if (arg_ct->oalias) {
4894                     reg = new_args[arg_ct->alias_index];
4895                 } else {
4896                     reg = new_args[arg_ct->pair_index] + 1;
4897                 }
4898                 break;
4899 
4900             case 3: /* first of pair, aliasing with a second input */
4901                 tcg_debug_assert(!arg_ct->newreg);
4902                 reg = new_args[arg_ct->pair_index] - 1;
4903                 break;
4904 
4905             default:
4906                 g_assert_not_reached();
4907             }
4908             tcg_regset_set_reg(o_allocated_regs, reg);
4909             set_temp_val_reg(s, ts, reg);
4910             ts->mem_coherent = 0;
4911             new_args[i] = reg;
4912         }
4913     }
4914 
4915     /* emit instruction */
4916     switch (op->opc) {
4917     case INDEX_op_ext8s_i32:
4918         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4919         break;
4920     case INDEX_op_ext8s_i64:
4921         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4922         break;
4923     case INDEX_op_ext8u_i32:
4924     case INDEX_op_ext8u_i64:
4925         tcg_out_ext8u(s, new_args[0], new_args[1]);
4926         break;
4927     case INDEX_op_ext16s_i32:
4928         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4929         break;
4930     case INDEX_op_ext16s_i64:
4931         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4932         break;
4933     case INDEX_op_ext16u_i32:
4934     case INDEX_op_ext16u_i64:
4935         tcg_out_ext16u(s, new_args[0], new_args[1]);
4936         break;
4937     case INDEX_op_ext32s_i64:
4938         tcg_out_ext32s(s, new_args[0], new_args[1]);
4939         break;
4940     case INDEX_op_ext32u_i64:
4941         tcg_out_ext32u(s, new_args[0], new_args[1]);
4942         break;
4943     case INDEX_op_ext_i32_i64:
4944         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4945         break;
4946     case INDEX_op_extu_i32_i64:
4947         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4948         break;
4949     case INDEX_op_extrl_i64_i32:
4950         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
4951         break;
4952     default:
4953         if (def->flags & TCG_OPF_VECTOR) {
4954             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4955                            new_args, const_args);
4956         } else {
4957             tcg_out_op(s, op->opc, new_args, const_args);
4958         }
4959         break;
4960     }
4961 
4962     /* move the outputs in the correct register if needed */
4963     for(i = 0; i < nb_oargs; i++) {
4964         ts = arg_temp(op->args[i]);
4965 
4966         /* ENV should not be modified.  */
4967         tcg_debug_assert(!temp_readonly(ts));
4968 
4969         if (NEED_SYNC_ARG(i)) {
4970             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4971         } else if (IS_DEAD_ARG(i)) {
4972             temp_dead(s, ts);
4973         }
4974     }
4975 }
4976 
4977 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4978 {
4979     const TCGLifeData arg_life = op->life;
4980     TCGTemp *ots, *itsl, *itsh;
4981     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4982 
4983     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4984     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4985     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4986 
4987     ots = arg_temp(op->args[0]);
4988     itsl = arg_temp(op->args[1]);
4989     itsh = arg_temp(op->args[2]);
4990 
4991     /* ENV should not be modified.  */
4992     tcg_debug_assert(!temp_readonly(ots));
4993 
4994     /* Allocate the output register now.  */
4995     if (ots->val_type != TEMP_VAL_REG) {
4996         TCGRegSet allocated_regs = s->reserved_regs;
4997         TCGRegSet dup_out_regs =
4998             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4999         TCGReg oreg;
5000 
5001         /* Make sure to not spill the input registers. */
5002         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5003             tcg_regset_set_reg(allocated_regs, itsl->reg);
5004         }
5005         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5006             tcg_regset_set_reg(allocated_regs, itsh->reg);
5007         }
5008 
5009         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5010                              output_pref(op, 0), ots->indirect_base);
5011         set_temp_val_reg(s, ots, oreg);
5012     }
5013 
5014     /* Promote dup2 of immediates to dupi_vec. */
5015     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5016         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5017         MemOp vece = MO_64;
5018 
5019         if (val == dup_const(MO_8, val)) {
5020             vece = MO_8;
5021         } else if (val == dup_const(MO_16, val)) {
5022             vece = MO_16;
5023         } else if (val == dup_const(MO_32, val)) {
5024             vece = MO_32;
5025         }
5026 
5027         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5028         goto done;
5029     }
5030 
5031     /* If the two inputs form one 64-bit value, try dupm_vec. */
5032     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5033         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5034         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5035         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5036 
5037         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5038         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5039 
5040         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5041                              its->mem_base->reg, its->mem_offset)) {
5042             goto done;
5043         }
5044     }
5045 
5046     /* Fall back to generic expansion. */
5047     return false;
5048 
5049  done:
5050     ots->mem_coherent = 0;
5051     if (IS_DEAD_ARG(1)) {
5052         temp_dead(s, itsl);
5053     }
5054     if (IS_DEAD_ARG(2)) {
5055         temp_dead(s, itsh);
5056     }
5057     if (NEED_SYNC_ARG(0)) {
5058         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5059     } else if (IS_DEAD_ARG(0)) {
5060         temp_dead(s, ots);
5061     }
5062     return true;
5063 }
5064 
5065 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5066                          TCGRegSet allocated_regs)
5067 {
5068     if (ts->val_type == TEMP_VAL_REG) {
5069         if (ts->reg != reg) {
5070             tcg_reg_free(s, reg, allocated_regs);
5071             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5072                 /*
5073                  * Cross register class move not supported.  Sync the
5074                  * temp back to its slot and load from there.
5075                  */
5076                 temp_sync(s, ts, allocated_regs, 0, 0);
5077                 tcg_out_ld(s, ts->type, reg,
5078                            ts->mem_base->reg, ts->mem_offset);
5079             }
5080         }
5081     } else {
5082         TCGRegSet arg_set = 0;
5083 
5084         tcg_reg_free(s, reg, allocated_regs);
5085         tcg_regset_set_reg(arg_set, reg);
5086         temp_load(s, ts, arg_set, allocated_regs, 0);
5087     }
5088 }
5089 
5090 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5091                          TCGRegSet allocated_regs)
5092 {
5093     /*
5094      * When the destination is on the stack, load up the temp and store.
5095      * If there are many call-saved registers, the temp might live to
5096      * see another use; otherwise it'll be discarded.
5097      */
5098     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5099     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5100                arg_slot_stk_ofs(arg_slot));
5101 }
5102 
5103 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5104                             TCGTemp *ts, TCGRegSet *allocated_regs)
5105 {
5106     if (arg_slot_reg_p(l->arg_slot)) {
5107         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5108         load_arg_reg(s, reg, ts, *allocated_regs);
5109         tcg_regset_set_reg(*allocated_regs, reg);
5110     } else {
5111         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5112     }
5113 }
5114 
5115 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5116                          intptr_t ref_off, TCGRegSet *allocated_regs)
5117 {
5118     TCGReg reg;
5119 
5120     if (arg_slot_reg_p(arg_slot)) {
5121         reg = tcg_target_call_iarg_regs[arg_slot];
5122         tcg_reg_free(s, reg, *allocated_regs);
5123         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5124         tcg_regset_set_reg(*allocated_regs, reg);
5125     } else {
5126         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5127                             *allocated_regs, 0, false);
5128         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5129         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5130                    arg_slot_stk_ofs(arg_slot));
5131     }
5132 }
5133 
5134 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5135 {
5136     const int nb_oargs = TCGOP_CALLO(op);
5137     const int nb_iargs = TCGOP_CALLI(op);
5138     const TCGLifeData arg_life = op->life;
5139     const TCGHelperInfo *info = tcg_call_info(op);
5140     TCGRegSet allocated_regs = s->reserved_regs;
5141     int i;
5142 
5143     /*
5144      * Move inputs into place in reverse order,
5145      * so that we place stacked arguments first.
5146      */
5147     for (i = nb_iargs - 1; i >= 0; --i) {
5148         const TCGCallArgumentLoc *loc = &info->in[i];
5149         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5150 
5151         switch (loc->kind) {
5152         case TCG_CALL_ARG_NORMAL:
5153         case TCG_CALL_ARG_EXTEND_U:
5154         case TCG_CALL_ARG_EXTEND_S:
5155             load_arg_normal(s, loc, ts, &allocated_regs);
5156             break;
5157         case TCG_CALL_ARG_BY_REF:
5158             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5159             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5160                          arg_slot_stk_ofs(loc->ref_slot),
5161                          &allocated_regs);
5162             break;
5163         case TCG_CALL_ARG_BY_REF_N:
5164             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5165             break;
5166         default:
5167             g_assert_not_reached();
5168         }
5169     }
5170 
5171     /* Mark dead temporaries and free the associated registers.  */
5172     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5173         if (IS_DEAD_ARG(i)) {
5174             temp_dead(s, arg_temp(op->args[i]));
5175         }
5176     }
5177 
5178     /* Clobber call registers.  */
5179     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5180         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5181             tcg_reg_free(s, i, allocated_regs);
5182         }
5183     }
5184 
5185     /*
5186      * Save globals if they might be written by the helper,
5187      * sync them if they might be read.
5188      */
5189     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5190         /* Nothing to do */
5191     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5192         sync_globals(s, allocated_regs);
5193     } else {
5194         save_globals(s, allocated_regs);
5195     }
5196 
5197     /*
5198      * If the ABI passes a pointer to the returned struct as the first
5199      * argument, load that now.  Pass a pointer to the output home slot.
5200      */
5201     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5202         TCGTemp *ts = arg_temp(op->args[0]);
5203 
5204         if (!ts->mem_allocated) {
5205             temp_allocate_frame(s, ts);
5206         }
5207         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5208     }
5209 
5210     tcg_out_call(s, tcg_call_func(op), info);
5211 
5212     /* Assign output registers and emit moves if needed.  */
5213     switch (info->out_kind) {
5214     case TCG_CALL_RET_NORMAL:
5215         for (i = 0; i < nb_oargs; i++) {
5216             TCGTemp *ts = arg_temp(op->args[i]);
5217             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5218 
5219             /* ENV should not be modified.  */
5220             tcg_debug_assert(!temp_readonly(ts));
5221 
5222             set_temp_val_reg(s, ts, reg);
5223             ts->mem_coherent = 0;
5224         }
5225         break;
5226 
5227     case TCG_CALL_RET_BY_VEC:
5228         {
5229             TCGTemp *ts = arg_temp(op->args[0]);
5230 
5231             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5232             tcg_debug_assert(ts->temp_subindex == 0);
5233             if (!ts->mem_allocated) {
5234                 temp_allocate_frame(s, ts);
5235             }
5236             tcg_out_st(s, TCG_TYPE_V128,
5237                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5238                        ts->mem_base->reg, ts->mem_offset);
5239         }
5240         /* fall through to mark all parts in memory */
5241 
5242     case TCG_CALL_RET_BY_REF:
5243         /* The callee has performed a write through the reference. */
5244         for (i = 0; i < nb_oargs; i++) {
5245             TCGTemp *ts = arg_temp(op->args[i]);
5246             ts->val_type = TEMP_VAL_MEM;
5247         }
5248         break;
5249 
5250     default:
5251         g_assert_not_reached();
5252     }
5253 
5254     /* Flush or discard output registers as needed. */
5255     for (i = 0; i < nb_oargs; i++) {
5256         TCGTemp *ts = arg_temp(op->args[i]);
5257         if (NEED_SYNC_ARG(i)) {
5258             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5259         } else if (IS_DEAD_ARG(i)) {
5260             temp_dead(s, ts);
5261         }
5262     }
5263 }
5264 
5265 /**
5266  * atom_and_align_for_opc:
5267  * @s: tcg context
5268  * @opc: memory operation code
5269  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5270  * @allow_two_ops: true if we are prepared to issue two operations
5271  *
5272  * Return the alignment and atomicity to use for the inline fast path
5273  * for the given memory operation.  The alignment may be larger than
5274  * that specified in @opc, and the correct alignment will be diagnosed
5275  * by the slow path helper.
5276  *
5277  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5278  * and issue two loads or stores for subalignment.
5279  */
5280 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5281                                            MemOp host_atom, bool allow_two_ops)
5282 {
5283     MemOp align = get_alignment_bits(opc);
5284     MemOp size = opc & MO_SIZE;
5285     MemOp half = size ? size - 1 : 0;
5286     MemOp atmax;
5287     MemOp atom;
5288 
5289     /* When serialized, no further atomicity required.  */
5290     if (s->gen_tb->cflags & CF_PARALLEL) {
5291         atom = opc & MO_ATOM_MASK;
5292     } else {
5293         atom = MO_ATOM_NONE;
5294     }
5295 
5296     switch (atom) {
5297     case MO_ATOM_NONE:
5298         /* The operation requires no specific atomicity. */
5299         atmax = MO_8;
5300         break;
5301 
5302     case MO_ATOM_IFALIGN:
5303         atmax = size;
5304         break;
5305 
5306     case MO_ATOM_IFALIGN_PAIR:
5307         atmax = half;
5308         break;
5309 
5310     case MO_ATOM_WITHIN16:
5311         atmax = size;
5312         if (size == MO_128) {
5313             /* Misalignment implies !within16, and therefore no atomicity. */
5314         } else if (host_atom != MO_ATOM_WITHIN16) {
5315             /* The host does not implement within16, so require alignment. */
5316             align = MAX(align, size);
5317         }
5318         break;
5319 
5320     case MO_ATOM_WITHIN16_PAIR:
5321         atmax = size;
5322         /*
5323          * Misalignment implies !within16, and therefore half atomicity.
5324          * Any host prepared for two operations can implement this with
5325          * half alignment.
5326          */
5327         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5328             align = MAX(align, half);
5329         }
5330         break;
5331 
5332     case MO_ATOM_SUBALIGN:
5333         atmax = size;
5334         if (host_atom != MO_ATOM_SUBALIGN) {
5335             /* If unaligned but not odd, there are subobjects up to half. */
5336             if (allow_two_ops) {
5337                 align = MAX(align, half);
5338             } else {
5339                 align = MAX(align, size);
5340             }
5341         }
5342         break;
5343 
5344     default:
5345         g_assert_not_reached();
5346     }
5347 
5348     return (TCGAtomAlign){ .atom = atmax, .align = align };
5349 }
5350 
5351 /*
5352  * Similarly for qemu_ld/st slow path helpers.
5353  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5354  * using only the provided backend tcg_out_* functions.
5355  */
5356 
5357 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5358 {
5359     int ofs = arg_slot_stk_ofs(slot);
5360 
5361     /*
5362      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5363      * require extension to uint64_t, adjust the address for uint32_t.
5364      */
5365     if (HOST_BIG_ENDIAN &&
5366         TCG_TARGET_REG_BITS == 64 &&
5367         type == TCG_TYPE_I32) {
5368         ofs += 4;
5369     }
5370     return ofs;
5371 }
5372 
5373 static void tcg_out_helper_load_slots(TCGContext *s,
5374                                       unsigned nmov, TCGMovExtend *mov,
5375                                       const TCGLdstHelperParam *parm)
5376 {
5377     unsigned i;
5378     TCGReg dst3;
5379 
5380     /*
5381      * Start from the end, storing to the stack first.
5382      * This frees those registers, so we need not consider overlap.
5383      */
5384     for (i = nmov; i-- > 0; ) {
5385         unsigned slot = mov[i].dst;
5386 
5387         if (arg_slot_reg_p(slot)) {
5388             goto found_reg;
5389         }
5390 
5391         TCGReg src = mov[i].src;
5392         TCGType dst_type = mov[i].dst_type;
5393         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5394 
5395         /* The argument is going onto the stack; extend into scratch. */
5396         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5397             tcg_debug_assert(parm->ntmp != 0);
5398             mov[i].dst = src = parm->tmp[0];
5399             tcg_out_movext1(s, &mov[i]);
5400         }
5401 
5402         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5403                    tcg_out_helper_stk_ofs(dst_type, slot));
5404     }
5405     return;
5406 
5407  found_reg:
5408     /*
5409      * The remaining arguments are in registers.
5410      * Convert slot numbers to argument registers.
5411      */
5412     nmov = i + 1;
5413     for (i = 0; i < nmov; ++i) {
5414         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5415     }
5416 
5417     switch (nmov) {
5418     case 4:
5419         /* The backend must have provided enough temps for the worst case. */
5420         tcg_debug_assert(parm->ntmp >= 2);
5421 
5422         dst3 = mov[3].dst;
5423         for (unsigned j = 0; j < 3; ++j) {
5424             if (dst3 == mov[j].src) {
5425                 /*
5426                  * Conflict. Copy the source to a temporary, perform the
5427                  * remaining moves, then the extension from our scratch
5428                  * on the way out.
5429                  */
5430                 TCGReg scratch = parm->tmp[1];
5431 
5432                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5433                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5434                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5435                 break;
5436             }
5437         }
5438 
5439         /* No conflicts: perform this move and continue. */
5440         tcg_out_movext1(s, &mov[3]);
5441         /* fall through */
5442 
5443     case 3:
5444         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5445                         parm->ntmp ? parm->tmp[0] : -1);
5446         break;
5447     case 2:
5448         tcg_out_movext2(s, mov, mov + 1,
5449                         parm->ntmp ? parm->tmp[0] : -1);
5450         break;
5451     case 1:
5452         tcg_out_movext1(s, mov);
5453         break;
5454     default:
5455         g_assert_not_reached();
5456     }
5457 }
5458 
5459 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5460                                     TCGType type, tcg_target_long imm,
5461                                     const TCGLdstHelperParam *parm)
5462 {
5463     if (arg_slot_reg_p(slot)) {
5464         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5465     } else {
5466         int ofs = tcg_out_helper_stk_ofs(type, slot);
5467         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5468             tcg_debug_assert(parm->ntmp != 0);
5469             tcg_out_movi(s, type, parm->tmp[0], imm);
5470             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5471         }
5472     }
5473 }
5474 
5475 static void tcg_out_helper_load_common_args(TCGContext *s,
5476                                             const TCGLabelQemuLdst *ldst,
5477                                             const TCGLdstHelperParam *parm,
5478                                             const TCGHelperInfo *info,
5479                                             unsigned next_arg)
5480 {
5481     TCGMovExtend ptr_mov = {
5482         .dst_type = TCG_TYPE_PTR,
5483         .src_type = TCG_TYPE_PTR,
5484         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5485     };
5486     const TCGCallArgumentLoc *loc = &info->in[0];
5487     TCGType type;
5488     unsigned slot;
5489     tcg_target_ulong imm;
5490 
5491     /*
5492      * Handle env, which is always first.
5493      */
5494     ptr_mov.dst = loc->arg_slot;
5495     ptr_mov.src = TCG_AREG0;
5496     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5497 
5498     /*
5499      * Handle oi.
5500      */
5501     imm = ldst->oi;
5502     loc = &info->in[next_arg];
5503     type = TCG_TYPE_I32;
5504     switch (loc->kind) {
5505     case TCG_CALL_ARG_NORMAL:
5506         break;
5507     case TCG_CALL_ARG_EXTEND_U:
5508     case TCG_CALL_ARG_EXTEND_S:
5509         /* No extension required for MemOpIdx. */
5510         tcg_debug_assert(imm <= INT32_MAX);
5511         type = TCG_TYPE_REG;
5512         break;
5513     default:
5514         g_assert_not_reached();
5515     }
5516     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5517     next_arg++;
5518 
5519     /*
5520      * Handle ra.
5521      */
5522     loc = &info->in[next_arg];
5523     slot = loc->arg_slot;
5524     if (parm->ra_gen) {
5525         int arg_reg = -1;
5526         TCGReg ra_reg;
5527 
5528         if (arg_slot_reg_p(slot)) {
5529             arg_reg = tcg_target_call_iarg_regs[slot];
5530         }
5531         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5532 
5533         ptr_mov.dst = slot;
5534         ptr_mov.src = ra_reg;
5535         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5536     } else {
5537         imm = (uintptr_t)ldst->raddr;
5538         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5539     }
5540 }
5541 
5542 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5543                                        const TCGCallArgumentLoc *loc,
5544                                        TCGType dst_type, TCGType src_type,
5545                                        TCGReg lo, TCGReg hi)
5546 {
5547     MemOp reg_mo;
5548 
5549     if (dst_type <= TCG_TYPE_REG) {
5550         MemOp src_ext;
5551 
5552         switch (loc->kind) {
5553         case TCG_CALL_ARG_NORMAL:
5554             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5555             break;
5556         case TCG_CALL_ARG_EXTEND_U:
5557             dst_type = TCG_TYPE_REG;
5558             src_ext = MO_UL;
5559             break;
5560         case TCG_CALL_ARG_EXTEND_S:
5561             dst_type = TCG_TYPE_REG;
5562             src_ext = MO_SL;
5563             break;
5564         default:
5565             g_assert_not_reached();
5566         }
5567 
5568         mov[0].dst = loc->arg_slot;
5569         mov[0].dst_type = dst_type;
5570         mov[0].src = lo;
5571         mov[0].src_type = src_type;
5572         mov[0].src_ext = src_ext;
5573         return 1;
5574     }
5575 
5576     if (TCG_TARGET_REG_BITS == 32) {
5577         assert(dst_type == TCG_TYPE_I64);
5578         reg_mo = MO_32;
5579     } else {
5580         assert(dst_type == TCG_TYPE_I128);
5581         reg_mo = MO_64;
5582     }
5583 
5584     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5585     mov[0].src = lo;
5586     mov[0].dst_type = TCG_TYPE_REG;
5587     mov[0].src_type = TCG_TYPE_REG;
5588     mov[0].src_ext = reg_mo;
5589 
5590     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5591     mov[1].src = hi;
5592     mov[1].dst_type = TCG_TYPE_REG;
5593     mov[1].src_type = TCG_TYPE_REG;
5594     mov[1].src_ext = reg_mo;
5595 
5596     return 2;
5597 }
5598 
5599 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5600                                    const TCGLdstHelperParam *parm)
5601 {
5602     const TCGHelperInfo *info;
5603     const TCGCallArgumentLoc *loc;
5604     TCGMovExtend mov[2];
5605     unsigned next_arg, nmov;
5606     MemOp mop = get_memop(ldst->oi);
5607 
5608     switch (mop & MO_SIZE) {
5609     case MO_8:
5610     case MO_16:
5611     case MO_32:
5612         info = &info_helper_ld32_mmu;
5613         break;
5614     case MO_64:
5615         info = &info_helper_ld64_mmu;
5616         break;
5617     case MO_128:
5618         info = &info_helper_ld128_mmu;
5619         break;
5620     default:
5621         g_assert_not_reached();
5622     }
5623 
5624     /* Defer env argument. */
5625     next_arg = 1;
5626 
5627     loc = &info->in[next_arg];
5628     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5629         /*
5630          * 32-bit host with 32-bit guest: zero-extend the guest address
5631          * to 64-bits for the helper by storing the low part, then
5632          * load a zero for the high part.
5633          */
5634         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5635                                TCG_TYPE_I32, TCG_TYPE_I32,
5636                                ldst->addrlo_reg, -1);
5637         tcg_out_helper_load_slots(s, 1, mov, parm);
5638 
5639         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5640                                 TCG_TYPE_I32, 0, parm);
5641         next_arg += 2;
5642     } else {
5643         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5644                                       ldst->addrlo_reg, ldst->addrhi_reg);
5645         tcg_out_helper_load_slots(s, nmov, mov, parm);
5646         next_arg += nmov;
5647     }
5648 
5649     switch (info->out_kind) {
5650     case TCG_CALL_RET_NORMAL:
5651     case TCG_CALL_RET_BY_VEC:
5652         break;
5653     case TCG_CALL_RET_BY_REF:
5654         /*
5655          * The return reference is in the first argument slot.
5656          * We need memory in which to return: re-use the top of stack.
5657          */
5658         {
5659             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5660 
5661             if (arg_slot_reg_p(0)) {
5662                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5663                                  TCG_REG_CALL_STACK, ofs_slot0);
5664             } else {
5665                 tcg_debug_assert(parm->ntmp != 0);
5666                 tcg_out_addi_ptr(s, parm->tmp[0],
5667                                  TCG_REG_CALL_STACK, ofs_slot0);
5668                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5669                            TCG_REG_CALL_STACK, ofs_slot0);
5670             }
5671         }
5672         break;
5673     default:
5674         g_assert_not_reached();
5675     }
5676 
5677     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5678 }
5679 
5680 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5681                                   bool load_sign,
5682                                   const TCGLdstHelperParam *parm)
5683 {
5684     MemOp mop = get_memop(ldst->oi);
5685     TCGMovExtend mov[2];
5686     int ofs_slot0;
5687 
5688     switch (ldst->type) {
5689     case TCG_TYPE_I64:
5690         if (TCG_TARGET_REG_BITS == 32) {
5691             break;
5692         }
5693         /* fall through */
5694 
5695     case TCG_TYPE_I32:
5696         mov[0].dst = ldst->datalo_reg;
5697         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5698         mov[0].dst_type = ldst->type;
5699         mov[0].src_type = TCG_TYPE_REG;
5700 
5701         /*
5702          * If load_sign, then we allowed the helper to perform the
5703          * appropriate sign extension to tcg_target_ulong, and all
5704          * we need now is a plain move.
5705          *
5706          * If they do not, then we expect the relevant extension
5707          * instruction to be no more expensive than a move, and
5708          * we thus save the icache etc by only using one of two
5709          * helper functions.
5710          */
5711         if (load_sign || !(mop & MO_SIGN)) {
5712             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5713                 mov[0].src_ext = MO_32;
5714             } else {
5715                 mov[0].src_ext = MO_64;
5716             }
5717         } else {
5718             mov[0].src_ext = mop & MO_SSIZE;
5719         }
5720         tcg_out_movext1(s, mov);
5721         return;
5722 
5723     case TCG_TYPE_I128:
5724         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5725         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5726         switch (TCG_TARGET_CALL_RET_I128) {
5727         case TCG_CALL_RET_NORMAL:
5728             break;
5729         case TCG_CALL_RET_BY_VEC:
5730             tcg_out_st(s, TCG_TYPE_V128,
5731                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5732                        TCG_REG_CALL_STACK, ofs_slot0);
5733             /* fall through */
5734         case TCG_CALL_RET_BY_REF:
5735             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5736                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5737             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5738                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5739             return;
5740         default:
5741             g_assert_not_reached();
5742         }
5743         break;
5744 
5745     default:
5746         g_assert_not_reached();
5747     }
5748 
5749     mov[0].dst = ldst->datalo_reg;
5750     mov[0].src =
5751         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5752     mov[0].dst_type = TCG_TYPE_REG;
5753     mov[0].src_type = TCG_TYPE_REG;
5754     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5755 
5756     mov[1].dst = ldst->datahi_reg;
5757     mov[1].src =
5758         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5759     mov[1].dst_type = TCG_TYPE_REG;
5760     mov[1].src_type = TCG_TYPE_REG;
5761     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5762 
5763     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5764 }
5765 
5766 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5767                                    const TCGLdstHelperParam *parm)
5768 {
5769     const TCGHelperInfo *info;
5770     const TCGCallArgumentLoc *loc;
5771     TCGMovExtend mov[4];
5772     TCGType data_type;
5773     unsigned next_arg, nmov, n;
5774     MemOp mop = get_memop(ldst->oi);
5775 
5776     switch (mop & MO_SIZE) {
5777     case MO_8:
5778     case MO_16:
5779     case MO_32:
5780         info = &info_helper_st32_mmu;
5781         data_type = TCG_TYPE_I32;
5782         break;
5783     case MO_64:
5784         info = &info_helper_st64_mmu;
5785         data_type = TCG_TYPE_I64;
5786         break;
5787     case MO_128:
5788         info = &info_helper_st128_mmu;
5789         data_type = TCG_TYPE_I128;
5790         break;
5791     default:
5792         g_assert_not_reached();
5793     }
5794 
5795     /* Defer env argument. */
5796     next_arg = 1;
5797     nmov = 0;
5798 
5799     /* Handle addr argument. */
5800     loc = &info->in[next_arg];
5801     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5802         /*
5803          * 32-bit host with 32-bit guest: zero-extend the guest address
5804          * to 64-bits for the helper by storing the low part.  Later,
5805          * after we have processed the register inputs, we will load a
5806          * zero for the high part.
5807          */
5808         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5809                                TCG_TYPE_I32, TCG_TYPE_I32,
5810                                ldst->addrlo_reg, -1);
5811         next_arg += 2;
5812         nmov += 1;
5813     } else {
5814         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5815                                    ldst->addrlo_reg, ldst->addrhi_reg);
5816         next_arg += n;
5817         nmov += n;
5818     }
5819 
5820     /* Handle data argument. */
5821     loc = &info->in[next_arg];
5822     switch (loc->kind) {
5823     case TCG_CALL_ARG_NORMAL:
5824     case TCG_CALL_ARG_EXTEND_U:
5825     case TCG_CALL_ARG_EXTEND_S:
5826         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5827                                    ldst->datalo_reg, ldst->datahi_reg);
5828         next_arg += n;
5829         nmov += n;
5830         tcg_out_helper_load_slots(s, nmov, mov, parm);
5831         break;
5832 
5833     case TCG_CALL_ARG_BY_REF:
5834         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5835         tcg_debug_assert(data_type == TCG_TYPE_I128);
5836         tcg_out_st(s, TCG_TYPE_I64,
5837                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5838                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5839         tcg_out_st(s, TCG_TYPE_I64,
5840                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5841                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5842 
5843         tcg_out_helper_load_slots(s, nmov, mov, parm);
5844 
5845         if (arg_slot_reg_p(loc->arg_slot)) {
5846             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
5847                              TCG_REG_CALL_STACK,
5848                              arg_slot_stk_ofs(loc->ref_slot));
5849         } else {
5850             tcg_debug_assert(parm->ntmp != 0);
5851             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
5852                              arg_slot_stk_ofs(loc->ref_slot));
5853             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5854                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
5855         }
5856         next_arg += 2;
5857         break;
5858 
5859     default:
5860         g_assert_not_reached();
5861     }
5862 
5863     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5864         /* Zero extend the address by loading a zero for the high part. */
5865         loc = &info->in[1 + !HOST_BIG_ENDIAN];
5866         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
5867     }
5868 
5869     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5870 }
5871 
5872 #ifdef CONFIG_PROFILER
5873 
5874 /* avoid copy/paste errors */
5875 #define PROF_ADD(to, from, field)                       \
5876     do {                                                \
5877         (to)->field += qatomic_read(&((from)->field));  \
5878     } while (0)
5879 
5880 #define PROF_MAX(to, from, field)                                       \
5881     do {                                                                \
5882         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
5883         if (val__ > (to)->field) {                                      \
5884             (to)->field = val__;                                        \
5885         }                                                               \
5886     } while (0)
5887 
5888 /* Pass in a zero'ed @prof */
5889 static inline
5890 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
5891 {
5892     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5893     unsigned int i;
5894 
5895     for (i = 0; i < n_ctxs; i++) {
5896         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
5897         const TCGProfile *orig = &s->prof;
5898 
5899         if (counters) {
5900             PROF_ADD(prof, orig, cpu_exec_time);
5901             PROF_ADD(prof, orig, tb_count1);
5902             PROF_ADD(prof, orig, tb_count);
5903             PROF_ADD(prof, orig, op_count);
5904             PROF_MAX(prof, orig, op_count_max);
5905             PROF_ADD(prof, orig, temp_count);
5906             PROF_MAX(prof, orig, temp_count_max);
5907             PROF_ADD(prof, orig, del_op_count);
5908             PROF_ADD(prof, orig, code_in_len);
5909             PROF_ADD(prof, orig, code_out_len);
5910             PROF_ADD(prof, orig, search_out_len);
5911             PROF_ADD(prof, orig, interm_time);
5912             PROF_ADD(prof, orig, code_time);
5913             PROF_ADD(prof, orig, la_time);
5914             PROF_ADD(prof, orig, opt_time);
5915             PROF_ADD(prof, orig, restore_count);
5916             PROF_ADD(prof, orig, restore_time);
5917         }
5918         if (table) {
5919             int i;
5920 
5921             for (i = 0; i < NB_OPS; i++) {
5922                 PROF_ADD(prof, orig, table_op_count[i]);
5923             }
5924         }
5925     }
5926 }
5927 
5928 #undef PROF_ADD
5929 #undef PROF_MAX
5930 
5931 static void tcg_profile_snapshot_counters(TCGProfile *prof)
5932 {
5933     tcg_profile_snapshot(prof, true, false);
5934 }
5935 
5936 static void tcg_profile_snapshot_table(TCGProfile *prof)
5937 {
5938     tcg_profile_snapshot(prof, false, true);
5939 }
5940 
5941 void tcg_dump_op_count(GString *buf)
5942 {
5943     TCGProfile prof = {};
5944     int i;
5945 
5946     tcg_profile_snapshot_table(&prof);
5947     for (i = 0; i < NB_OPS; i++) {
5948         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
5949                                prof.table_op_count[i]);
5950     }
5951 }
5952 
5953 int64_t tcg_cpu_exec_time(void)
5954 {
5955     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5956     unsigned int i;
5957     int64_t ret = 0;
5958 
5959     for (i = 0; i < n_ctxs; i++) {
5960         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
5961         const TCGProfile *prof = &s->prof;
5962 
5963         ret += qatomic_read(&prof->cpu_exec_time);
5964     }
5965     return ret;
5966 }
5967 #else
5968 void tcg_dump_op_count(GString *buf)
5969 {
5970     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5971 }
5972 
5973 int64_t tcg_cpu_exec_time(void)
5974 {
5975     error_report("%s: TCG profiler not compiled", __func__);
5976     exit(EXIT_FAILURE);
5977 }
5978 #endif
5979 
5980 
5981 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
5982 {
5983 #ifdef CONFIG_PROFILER
5984     TCGProfile *prof = &s->prof;
5985 #endif
5986     int i, num_insns;
5987     TCGOp *op;
5988 
5989 #ifdef CONFIG_PROFILER
5990     {
5991         int n = 0;
5992 
5993         QTAILQ_FOREACH(op, &s->ops, link) {
5994             n++;
5995         }
5996         qatomic_set(&prof->op_count, prof->op_count + n);
5997         if (n > prof->op_count_max) {
5998             qatomic_set(&prof->op_count_max, n);
5999         }
6000 
6001         n = s->nb_temps;
6002         qatomic_set(&prof->temp_count, prof->temp_count + n);
6003         if (n > prof->temp_count_max) {
6004             qatomic_set(&prof->temp_count_max, n);
6005         }
6006     }
6007 #endif
6008 
6009     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6010                  && qemu_log_in_addr_range(pc_start))) {
6011         FILE *logfile = qemu_log_trylock();
6012         if (logfile) {
6013             fprintf(logfile, "OP:\n");
6014             tcg_dump_ops(s, logfile, false);
6015             fprintf(logfile, "\n");
6016             qemu_log_unlock(logfile);
6017         }
6018     }
6019 
6020 #ifdef CONFIG_DEBUG_TCG
6021     /* Ensure all labels referenced have been emitted.  */
6022     {
6023         TCGLabel *l;
6024         bool error = false;
6025 
6026         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6027             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6028                 qemu_log_mask(CPU_LOG_TB_OP,
6029                               "$L%d referenced but not present.\n", l->id);
6030                 error = true;
6031             }
6032         }
6033         assert(!error);
6034     }
6035 #endif
6036 
6037 #ifdef CONFIG_PROFILER
6038     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
6039 #endif
6040 
6041     tcg_optimize(s);
6042 
6043 #ifdef CONFIG_PROFILER
6044     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
6045     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
6046 #endif
6047 
6048     reachable_code_pass(s);
6049     liveness_pass_0(s);
6050     liveness_pass_1(s);
6051 
6052     if (s->nb_indirects > 0) {
6053         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6054                      && qemu_log_in_addr_range(pc_start))) {
6055             FILE *logfile = qemu_log_trylock();
6056             if (logfile) {
6057                 fprintf(logfile, "OP before indirect lowering:\n");
6058                 tcg_dump_ops(s, logfile, false);
6059                 fprintf(logfile, "\n");
6060                 qemu_log_unlock(logfile);
6061             }
6062         }
6063 
6064         /* Replace indirect temps with direct temps.  */
6065         if (liveness_pass_2(s)) {
6066             /* If changes were made, re-run liveness.  */
6067             liveness_pass_1(s);
6068         }
6069     }
6070 
6071 #ifdef CONFIG_PROFILER
6072     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
6073 #endif
6074 
6075     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6076                  && qemu_log_in_addr_range(pc_start))) {
6077         FILE *logfile = qemu_log_trylock();
6078         if (logfile) {
6079             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6080             tcg_dump_ops(s, logfile, true);
6081             fprintf(logfile, "\n");
6082             qemu_log_unlock(logfile);
6083         }
6084     }
6085 
6086     /* Initialize goto_tb jump offsets. */
6087     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6088     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6089     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6090     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6091 
6092     tcg_reg_alloc_start(s);
6093 
6094     /*
6095      * Reset the buffer pointers when restarting after overflow.
6096      * TODO: Move this into translate-all.c with the rest of the
6097      * buffer management.  Having only this done here is confusing.
6098      */
6099     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6100     s->code_ptr = s->code_buf;
6101 
6102 #ifdef TCG_TARGET_NEED_LDST_LABELS
6103     QSIMPLEQ_INIT(&s->ldst_labels);
6104 #endif
6105 #ifdef TCG_TARGET_NEED_POOL_LABELS
6106     s->pool_labels = NULL;
6107 #endif
6108 
6109     num_insns = -1;
6110     QTAILQ_FOREACH(op, &s->ops, link) {
6111         TCGOpcode opc = op->opc;
6112 
6113 #ifdef CONFIG_PROFILER
6114         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
6115 #endif
6116 
6117         switch (opc) {
6118         case INDEX_op_mov_i32:
6119         case INDEX_op_mov_i64:
6120         case INDEX_op_mov_vec:
6121             tcg_reg_alloc_mov(s, op);
6122             break;
6123         case INDEX_op_dup_vec:
6124             tcg_reg_alloc_dup(s, op);
6125             break;
6126         case INDEX_op_insn_start:
6127             if (num_insns >= 0) {
6128                 size_t off = tcg_current_code_size(s);
6129                 s->gen_insn_end_off[num_insns] = off;
6130                 /* Assert that we do not overflow our stored offset.  */
6131                 assert(s->gen_insn_end_off[num_insns] == off);
6132             }
6133             num_insns++;
6134             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
6135                 s->gen_insn_data[num_insns][i] =
6136                     tcg_get_insn_start_param(op, i);
6137             }
6138             break;
6139         case INDEX_op_discard:
6140             temp_dead(s, arg_temp(op->args[0]));
6141             break;
6142         case INDEX_op_set_label:
6143             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6144             tcg_out_label(s, arg_label(op->args[0]));
6145             break;
6146         case INDEX_op_call:
6147             tcg_reg_alloc_call(s, op);
6148             break;
6149         case INDEX_op_exit_tb:
6150             tcg_out_exit_tb(s, op->args[0]);
6151             break;
6152         case INDEX_op_goto_tb:
6153             tcg_out_goto_tb(s, op->args[0]);
6154             break;
6155         case INDEX_op_dup2_vec:
6156             if (tcg_reg_alloc_dup2(s, op)) {
6157                 break;
6158             }
6159             /* fall through */
6160         default:
6161             /* Sanity check that we've not introduced any unhandled opcodes. */
6162             tcg_debug_assert(tcg_op_supported(opc));
6163             /* Note: in order to speed up the code, it would be much
6164                faster to have specialized register allocator functions for
6165                some common argument patterns */
6166             tcg_reg_alloc_op(s, op);
6167             break;
6168         }
6169         /* Test for (pending) buffer overflow.  The assumption is that any
6170            one operation beginning below the high water mark cannot overrun
6171            the buffer completely.  Thus we can test for overflow after
6172            generating code without having to check during generation.  */
6173         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6174             return -1;
6175         }
6176         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6177         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6178             return -2;
6179         }
6180     }
6181     tcg_debug_assert(num_insns >= 0);
6182     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6183 
6184     /* Generate TB finalization at the end of block */
6185 #ifdef TCG_TARGET_NEED_LDST_LABELS
6186     i = tcg_out_ldst_finalize(s);
6187     if (i < 0) {
6188         return i;
6189     }
6190 #endif
6191 #ifdef TCG_TARGET_NEED_POOL_LABELS
6192     i = tcg_out_pool_finalize(s);
6193     if (i < 0) {
6194         return i;
6195     }
6196 #endif
6197     if (!tcg_resolve_relocs(s)) {
6198         return -2;
6199     }
6200 
6201 #ifndef CONFIG_TCG_INTERPRETER
6202     /* flush instruction cache */
6203     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6204                         (uintptr_t)s->code_buf,
6205                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6206 #endif
6207 
6208     return tcg_current_code_size(s);
6209 }
6210 
6211 #ifdef CONFIG_PROFILER
6212 void tcg_dump_info(GString *buf)
6213 {
6214     TCGProfile prof = {};
6215     const TCGProfile *s;
6216     int64_t tb_count;
6217     int64_t tb_div_count;
6218     int64_t tot;
6219 
6220     tcg_profile_snapshot_counters(&prof);
6221     s = &prof;
6222     tb_count = s->tb_count;
6223     tb_div_count = tb_count ? tb_count : 1;
6224     tot = s->interm_time + s->code_time;
6225 
6226     g_string_append_printf(buf, "JIT cycles          %" PRId64
6227                            " (%0.3f s at 2.4 GHz)\n",
6228                            tot, tot / 2.4e9);
6229     g_string_append_printf(buf, "translated TBs      %" PRId64
6230                            " (aborted=%" PRId64 " %0.1f%%)\n",
6231                            tb_count, s->tb_count1 - tb_count,
6232                            (double)(s->tb_count1 - s->tb_count)
6233                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
6234     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
6235                            (double)s->op_count / tb_div_count, s->op_count_max);
6236     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
6237                            (double)s->del_op_count / tb_div_count);
6238     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
6239                            (double)s->temp_count / tb_div_count,
6240                            s->temp_count_max);
6241     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
6242                            (double)s->code_out_len / tb_div_count);
6243     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
6244                            (double)s->search_out_len / tb_div_count);
6245 
6246     g_string_append_printf(buf, "cycles/op           %0.1f\n",
6247                            s->op_count ? (double)tot / s->op_count : 0);
6248     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
6249                            s->code_in_len ? (double)tot / s->code_in_len : 0);
6250     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
6251                            s->code_out_len ? (double)tot / s->code_out_len : 0);
6252     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
6253                            s->search_out_len ?
6254                            (double)tot / s->search_out_len : 0);
6255     if (tot == 0) {
6256         tot = 1;
6257     }
6258     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
6259                            (double)s->interm_time / tot * 100.0);
6260     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
6261                            (double)s->code_time / tot * 100.0);
6262     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
6263                            (double)s->opt_time / (s->code_time ?
6264                                                   s->code_time : 1)
6265                            * 100.0);
6266     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
6267                            (double)s->la_time / (s->code_time ?
6268                                                  s->code_time : 1) * 100.0);
6269     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
6270                            s->restore_count);
6271     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
6272                            s->restore_count ?
6273                            (double)s->restore_time / s->restore_count : 0);
6274 }
6275 #else
6276 void tcg_dump_info(GString *buf)
6277 {
6278     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
6279 }
6280 #endif
6281 
6282 #ifdef ELF_HOST_MACHINE
6283 /* In order to use this feature, the backend needs to do three things:
6284 
6285    (1) Define ELF_HOST_MACHINE to indicate both what value to
6286        put into the ELF image and to indicate support for the feature.
6287 
6288    (2) Define tcg_register_jit.  This should create a buffer containing
6289        the contents of a .debug_frame section that describes the post-
6290        prologue unwind info for the tcg machine.
6291 
6292    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6293 */
6294 
6295 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6296 typedef enum {
6297     JIT_NOACTION = 0,
6298     JIT_REGISTER_FN,
6299     JIT_UNREGISTER_FN
6300 } jit_actions_t;
6301 
6302 struct jit_code_entry {
6303     struct jit_code_entry *next_entry;
6304     struct jit_code_entry *prev_entry;
6305     const void *symfile_addr;
6306     uint64_t symfile_size;
6307 };
6308 
6309 struct jit_descriptor {
6310     uint32_t version;
6311     uint32_t action_flag;
6312     struct jit_code_entry *relevant_entry;
6313     struct jit_code_entry *first_entry;
6314 };
6315 
6316 void __jit_debug_register_code(void) __attribute__((noinline));
6317 void __jit_debug_register_code(void)
6318 {
6319     asm("");
6320 }
6321 
6322 /* Must statically initialize the version, because GDB may check
6323    the version before we can set it.  */
6324 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6325 
6326 /* End GDB interface.  */
6327 
6328 static int find_string(const char *strtab, const char *str)
6329 {
6330     const char *p = strtab + 1;
6331 
6332     while (1) {
6333         if (strcmp(p, str) == 0) {
6334             return p - strtab;
6335         }
6336         p += strlen(p) + 1;
6337     }
6338 }
6339 
6340 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6341                                  const void *debug_frame,
6342                                  size_t debug_frame_size)
6343 {
6344     struct __attribute__((packed)) DebugInfo {
6345         uint32_t  len;
6346         uint16_t  version;
6347         uint32_t  abbrev;
6348         uint8_t   ptr_size;
6349         uint8_t   cu_die;
6350         uint16_t  cu_lang;
6351         uintptr_t cu_low_pc;
6352         uintptr_t cu_high_pc;
6353         uint8_t   fn_die;
6354         char      fn_name[16];
6355         uintptr_t fn_low_pc;
6356         uintptr_t fn_high_pc;
6357         uint8_t   cu_eoc;
6358     };
6359 
6360     struct ElfImage {
6361         ElfW(Ehdr) ehdr;
6362         ElfW(Phdr) phdr;
6363         ElfW(Shdr) shdr[7];
6364         ElfW(Sym)  sym[2];
6365         struct DebugInfo di;
6366         uint8_t    da[24];
6367         char       str[80];
6368     };
6369 
6370     struct ElfImage *img;
6371 
6372     static const struct ElfImage img_template = {
6373         .ehdr = {
6374             .e_ident[EI_MAG0] = ELFMAG0,
6375             .e_ident[EI_MAG1] = ELFMAG1,
6376             .e_ident[EI_MAG2] = ELFMAG2,
6377             .e_ident[EI_MAG3] = ELFMAG3,
6378             .e_ident[EI_CLASS] = ELF_CLASS,
6379             .e_ident[EI_DATA] = ELF_DATA,
6380             .e_ident[EI_VERSION] = EV_CURRENT,
6381             .e_type = ET_EXEC,
6382             .e_machine = ELF_HOST_MACHINE,
6383             .e_version = EV_CURRENT,
6384             .e_phoff = offsetof(struct ElfImage, phdr),
6385             .e_shoff = offsetof(struct ElfImage, shdr),
6386             .e_ehsize = sizeof(ElfW(Shdr)),
6387             .e_phentsize = sizeof(ElfW(Phdr)),
6388             .e_phnum = 1,
6389             .e_shentsize = sizeof(ElfW(Shdr)),
6390             .e_shnum = ARRAY_SIZE(img->shdr),
6391             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6392 #ifdef ELF_HOST_FLAGS
6393             .e_flags = ELF_HOST_FLAGS,
6394 #endif
6395 #ifdef ELF_OSABI
6396             .e_ident[EI_OSABI] = ELF_OSABI,
6397 #endif
6398         },
6399         .phdr = {
6400             .p_type = PT_LOAD,
6401             .p_flags = PF_X,
6402         },
6403         .shdr = {
6404             [0] = { .sh_type = SHT_NULL },
6405             /* Trick: The contents of code_gen_buffer are not present in
6406                this fake ELF file; that got allocated elsewhere.  Therefore
6407                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6408                will not look for contents.  We can record any address.  */
6409             [1] = { /* .text */
6410                 .sh_type = SHT_NOBITS,
6411                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6412             },
6413             [2] = { /* .debug_info */
6414                 .sh_type = SHT_PROGBITS,
6415                 .sh_offset = offsetof(struct ElfImage, di),
6416                 .sh_size = sizeof(struct DebugInfo),
6417             },
6418             [3] = { /* .debug_abbrev */
6419                 .sh_type = SHT_PROGBITS,
6420                 .sh_offset = offsetof(struct ElfImage, da),
6421                 .sh_size = sizeof(img->da),
6422             },
6423             [4] = { /* .debug_frame */
6424                 .sh_type = SHT_PROGBITS,
6425                 .sh_offset = sizeof(struct ElfImage),
6426             },
6427             [5] = { /* .symtab */
6428                 .sh_type = SHT_SYMTAB,
6429                 .sh_offset = offsetof(struct ElfImage, sym),
6430                 .sh_size = sizeof(img->sym),
6431                 .sh_info = 1,
6432                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6433                 .sh_entsize = sizeof(ElfW(Sym)),
6434             },
6435             [6] = { /* .strtab */
6436                 .sh_type = SHT_STRTAB,
6437                 .sh_offset = offsetof(struct ElfImage, str),
6438                 .sh_size = sizeof(img->str),
6439             }
6440         },
6441         .sym = {
6442             [1] = { /* code_gen_buffer */
6443                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6444                 .st_shndx = 1,
6445             }
6446         },
6447         .di = {
6448             .len = sizeof(struct DebugInfo) - 4,
6449             .version = 2,
6450             .ptr_size = sizeof(void *),
6451             .cu_die = 1,
6452             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6453             .fn_die = 2,
6454             .fn_name = "code_gen_buffer"
6455         },
6456         .da = {
6457             1,          /* abbrev number (the cu) */
6458             0x11, 1,    /* DW_TAG_compile_unit, has children */
6459             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6460             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6461             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6462             0, 0,       /* end of abbrev */
6463             2,          /* abbrev number (the fn) */
6464             0x2e, 0,    /* DW_TAG_subprogram, no children */
6465             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6466             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6467             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6468             0, 0,       /* end of abbrev */
6469             0           /* no more abbrev */
6470         },
6471         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6472                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6473     };
6474 
6475     /* We only need a single jit entry; statically allocate it.  */
6476     static struct jit_code_entry one_entry;
6477 
6478     uintptr_t buf = (uintptr_t)buf_ptr;
6479     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6480     DebugFrameHeader *dfh;
6481 
6482     img = g_malloc(img_size);
6483     *img = img_template;
6484 
6485     img->phdr.p_vaddr = buf;
6486     img->phdr.p_paddr = buf;
6487     img->phdr.p_memsz = buf_size;
6488 
6489     img->shdr[1].sh_name = find_string(img->str, ".text");
6490     img->shdr[1].sh_addr = buf;
6491     img->shdr[1].sh_size = buf_size;
6492 
6493     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6494     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6495 
6496     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6497     img->shdr[4].sh_size = debug_frame_size;
6498 
6499     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6500     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6501 
6502     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6503     img->sym[1].st_value = buf;
6504     img->sym[1].st_size = buf_size;
6505 
6506     img->di.cu_low_pc = buf;
6507     img->di.cu_high_pc = buf + buf_size;
6508     img->di.fn_low_pc = buf;
6509     img->di.fn_high_pc = buf + buf_size;
6510 
6511     dfh = (DebugFrameHeader *)(img + 1);
6512     memcpy(dfh, debug_frame, debug_frame_size);
6513     dfh->fde.func_start = buf;
6514     dfh->fde.func_len = buf_size;
6515 
6516 #ifdef DEBUG_JIT
6517     /* Enable this block to be able to debug the ELF image file creation.
6518        One can use readelf, objdump, or other inspection utilities.  */
6519     {
6520         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6521         FILE *f = fopen(jit, "w+b");
6522         if (f) {
6523             if (fwrite(img, img_size, 1, f) != img_size) {
6524                 /* Avoid stupid unused return value warning for fwrite.  */
6525             }
6526             fclose(f);
6527         }
6528     }
6529 #endif
6530 
6531     one_entry.symfile_addr = img;
6532     one_entry.symfile_size = img_size;
6533 
6534     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6535     __jit_debug_descriptor.relevant_entry = &one_entry;
6536     __jit_debug_descriptor.first_entry = &one_entry;
6537     __jit_debug_register_code();
6538 }
6539 #else
6540 /* No support for the feature.  Provide the entry point expected by exec.c,
6541    and implement the internal function we declared earlier.  */
6542 
6543 static void tcg_register_jit_int(const void *buf, size_t size,
6544                                  const void *debug_frame,
6545                                  size_t debug_frame_size)
6546 {
6547 }
6548 
6549 void tcg_register_jit(const void *buf, size_t buf_size)
6550 {
6551 }
6552 #endif /* ELF_HOST_MACHINE */
6553 
6554 #if !TCG_TARGET_MAYBE_vec
6555 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6556 {
6557     g_assert_not_reached();
6558 }
6559 #endif
6560