xref: /openbmc/qemu/tcg/tcg.c (revision 0885f1221e0add5529dada1e7948d2c00189cb8b)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "accel/tcg/perf.h"
59 #ifdef CONFIG_USER_ONLY
60 #include "exec/user/guest-base.h"
61 #endif
62 
63 /* Forward declarations for functions declared in tcg-target.c.inc and
64    used here. */
65 static void tcg_target_init(TCGContext *s);
66 static void tcg_target_qemu_prologue(TCGContext *s);
67 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
68                         intptr_t value, intptr_t addend);
69 
70 /* The CIE and FDE header definitions will be common to all hosts.  */
71 typedef struct {
72     uint32_t len __attribute__((aligned((sizeof(void *)))));
73     uint32_t id;
74     uint8_t version;
75     char augmentation[1];
76     uint8_t code_align;
77     uint8_t data_align;
78     uint8_t return_column;
79 } DebugFrameCIE;
80 
81 typedef struct QEMU_PACKED {
82     uint32_t len __attribute__((aligned((sizeof(void *)))));
83     uint32_t cie_offset;
84     uintptr_t func_start;
85     uintptr_t func_len;
86 } DebugFrameFDEHeader;
87 
88 typedef struct QEMU_PACKED {
89     DebugFrameCIE cie;
90     DebugFrameFDEHeader fde;
91 } DebugFrameHeader;
92 
93 typedef struct TCGLabelQemuLdst {
94     bool is_ld;             /* qemu_ld: true, qemu_st: false */
95     MemOpIdx oi;
96     TCGType type;           /* result type of a load */
97     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
98     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
99     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
100     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
101     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
102     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
103     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
104 } TCGLabelQemuLdst;
105 
106 static void tcg_register_jit_int(const void *buf, size_t size,
107                                  const void *debug_frame,
108                                  size_t debug_frame_size)
109     __attribute__((unused));
110 
111 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
112 static void tcg_out_tb_start(TCGContext *s);
113 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
114                        intptr_t arg2);
115 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
116 static void tcg_out_movi(TCGContext *s, TCGType type,
117                          TCGReg ret, tcg_target_long arg);
118 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
120 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
121 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
122 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
123 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
128 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
129 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
130 static void tcg_out_goto_tb(TCGContext *s, int which);
131 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
132                        const TCGArg args[TCG_MAX_OP_ARGS],
133                        const int const_args[TCG_MAX_OP_ARGS]);
134 #if TCG_TARGET_MAYBE_vec
135 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
136                             TCGReg dst, TCGReg src);
137 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
138                              TCGReg dst, TCGReg base, intptr_t offset);
139 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
140                              TCGReg dst, int64_t arg);
141 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
142                            unsigned vecl, unsigned vece,
143                            const TCGArg args[TCG_MAX_OP_ARGS],
144                            const int const_args[TCG_MAX_OP_ARGS]);
145 #else
146 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
147                                    TCGReg dst, TCGReg src)
148 {
149     g_assert_not_reached();
150 }
151 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
152                                     TCGReg dst, TCGReg base, intptr_t offset)
153 {
154     g_assert_not_reached();
155 }
156 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, int64_t arg)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
162                                   unsigned vecl, unsigned vece,
163                                   const TCGArg args[TCG_MAX_OP_ARGS],
164                                   const int const_args[TCG_MAX_OP_ARGS])
165 {
166     g_assert_not_reached();
167 }
168 #endif
169 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
170                        intptr_t arg2);
171 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
172                         TCGReg base, intptr_t ofs);
173 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
174                          const TCGHelperInfo *info);
175 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
176 static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece);
177 #ifdef TCG_TARGET_NEED_LDST_LABELS
178 static int tcg_out_ldst_finalize(TCGContext *s);
179 #endif
180 
181 #ifndef CONFIG_USER_ONLY
182 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
183 #endif
184 
185 typedef struct TCGLdstHelperParam {
186     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
187     unsigned ntmp;
188     int tmp[3];
189 } TCGLdstHelperParam;
190 
191 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
192                                    const TCGLdstHelperParam *p)
193     __attribute__((unused));
194 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
195                                   bool load_sign, const TCGLdstHelperParam *p)
196     __attribute__((unused));
197 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
198                                    const TCGLdstHelperParam *p)
199     __attribute__((unused));
200 
201 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
202     [MO_UB] = helper_ldub_mmu,
203     [MO_SB] = helper_ldsb_mmu,
204     [MO_UW] = helper_lduw_mmu,
205     [MO_SW] = helper_ldsw_mmu,
206     [MO_UL] = helper_ldul_mmu,
207     [MO_UQ] = helper_ldq_mmu,
208 #if TCG_TARGET_REG_BITS == 64
209     [MO_SL] = helper_ldsl_mmu,
210     [MO_128] = helper_ld16_mmu,
211 #endif
212 };
213 
214 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
215     [MO_8]  = helper_stb_mmu,
216     [MO_16] = helper_stw_mmu,
217     [MO_32] = helper_stl_mmu,
218     [MO_64] = helper_stq_mmu,
219 #if TCG_TARGET_REG_BITS == 64
220     [MO_128] = helper_st16_mmu,
221 #endif
222 };
223 
224 typedef struct {
225     MemOp atom;   /* lg2 bits of atomicity required */
226     MemOp align;  /* lg2 bits of alignment to use */
227 } TCGAtomAlign;
228 
229 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
230                                            MemOp host_atom, bool allow_two_ops)
231     __attribute__((unused));
232 
233 #ifdef CONFIG_USER_ONLY
234 bool tcg_use_softmmu;
235 #endif
236 
237 TCGContext tcg_init_ctx;
238 __thread TCGContext *tcg_ctx;
239 
240 TCGContext **tcg_ctxs;
241 unsigned int tcg_cur_ctxs;
242 unsigned int tcg_max_ctxs;
243 TCGv_env tcg_env;
244 const void *tcg_code_gen_epilogue;
245 uintptr_t tcg_splitwx_diff;
246 
247 #ifndef CONFIG_TCG_INTERPRETER
248 tcg_prologue_fn *tcg_qemu_tb_exec;
249 #endif
250 
251 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
252 static TCGRegSet tcg_target_call_clobber_regs;
253 
254 #if TCG_TARGET_INSN_UNIT_SIZE == 1
255 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
256 {
257     *s->code_ptr++ = v;
258 }
259 
260 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
261                                                       uint8_t v)
262 {
263     *p = v;
264 }
265 #endif
266 
267 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
268 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
269 {
270     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
271         *s->code_ptr++ = v;
272     } else {
273         tcg_insn_unit *p = s->code_ptr;
274         memcpy(p, &v, sizeof(v));
275         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
276     }
277 }
278 
279 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
280                                                        uint16_t v)
281 {
282     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
283         *p = v;
284     } else {
285         memcpy(p, &v, sizeof(v));
286     }
287 }
288 #endif
289 
290 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
291 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
292 {
293     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
294         *s->code_ptr++ = v;
295     } else {
296         tcg_insn_unit *p = s->code_ptr;
297         memcpy(p, &v, sizeof(v));
298         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
299     }
300 }
301 
302 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
303                                                        uint32_t v)
304 {
305     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
306         *p = v;
307     } else {
308         memcpy(p, &v, sizeof(v));
309     }
310 }
311 #endif
312 
313 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
314 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
315 {
316     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
317         *s->code_ptr++ = v;
318     } else {
319         tcg_insn_unit *p = s->code_ptr;
320         memcpy(p, &v, sizeof(v));
321         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
322     }
323 }
324 
325 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
326                                                        uint64_t v)
327 {
328     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
329         *p = v;
330     } else {
331         memcpy(p, &v, sizeof(v));
332     }
333 }
334 #endif
335 
336 /* label relocation processing */
337 
338 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
339                           TCGLabel *l, intptr_t addend)
340 {
341     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
342 
343     r->type = type;
344     r->ptr = code_ptr;
345     r->addend = addend;
346     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
347 }
348 
349 static void tcg_out_label(TCGContext *s, TCGLabel *l)
350 {
351     tcg_debug_assert(!l->has_value);
352     l->has_value = 1;
353     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
354 }
355 
356 TCGLabel *gen_new_label(void)
357 {
358     TCGContext *s = tcg_ctx;
359     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
360 
361     memset(l, 0, sizeof(TCGLabel));
362     l->id = s->nb_labels++;
363     QSIMPLEQ_INIT(&l->branches);
364     QSIMPLEQ_INIT(&l->relocs);
365 
366     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
367 
368     return l;
369 }
370 
371 static bool tcg_resolve_relocs(TCGContext *s)
372 {
373     TCGLabel *l;
374 
375     QSIMPLEQ_FOREACH(l, &s->labels, next) {
376         TCGRelocation *r;
377         uintptr_t value = l->u.value;
378 
379         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
380             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
381                 return false;
382             }
383         }
384     }
385     return true;
386 }
387 
388 static void set_jmp_reset_offset(TCGContext *s, int which)
389 {
390     /*
391      * We will check for overflow at the end of the opcode loop in
392      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
393      */
394     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
395 }
396 
397 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
398 {
399     /*
400      * We will check for overflow at the end of the opcode loop in
401      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
402      */
403     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
404 }
405 
406 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
407 {
408     /*
409      * Return the read-execute version of the pointer, for the benefit
410      * of any pc-relative addressing mode.
411      */
412     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
413 }
414 
415 static int __attribute__((unused))
416 tlb_mask_table_ofs(TCGContext *s, int which)
417 {
418     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
419             sizeof(CPUNegativeOffsetState));
420 }
421 
422 /* Signal overflow, starting over with fewer guest insns. */
423 static G_NORETURN
424 void tcg_raise_tb_overflow(TCGContext *s)
425 {
426     siglongjmp(s->jmp_trans, -2);
427 }
428 
429 /*
430  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
431  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
432  *
433  * However, tcg_out_helper_load_slots reuses this field to hold an
434  * argument slot number (which may designate a argument register or an
435  * argument stack slot), converting to TCGReg once all arguments that
436  * are destined for the stack are processed.
437  */
438 typedef struct TCGMovExtend {
439     unsigned dst;
440     TCGReg src;
441     TCGType dst_type;
442     TCGType src_type;
443     MemOp src_ext;
444 } TCGMovExtend;
445 
446 /**
447  * tcg_out_movext -- move and extend
448  * @s: tcg context
449  * @dst_type: integral type for destination
450  * @dst: destination register
451  * @src_type: integral type for source
452  * @src_ext: extension to apply to source
453  * @src: source register
454  *
455  * Move or extend @src into @dst, depending on @src_ext and the types.
456  */
457 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
458                            TCGType src_type, MemOp src_ext, TCGReg src)
459 {
460     switch (src_ext) {
461     case MO_UB:
462         tcg_out_ext8u(s, dst, src);
463         break;
464     case MO_SB:
465         tcg_out_ext8s(s, dst_type, dst, src);
466         break;
467     case MO_UW:
468         tcg_out_ext16u(s, dst, src);
469         break;
470     case MO_SW:
471         tcg_out_ext16s(s, dst_type, dst, src);
472         break;
473     case MO_UL:
474     case MO_SL:
475         if (dst_type == TCG_TYPE_I32) {
476             if (src_type == TCG_TYPE_I32) {
477                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
478             } else {
479                 tcg_out_extrl_i64_i32(s, dst, src);
480             }
481         } else if (src_type == TCG_TYPE_I32) {
482             if (src_ext & MO_SIGN) {
483                 tcg_out_exts_i32_i64(s, dst, src);
484             } else {
485                 tcg_out_extu_i32_i64(s, dst, src);
486             }
487         } else {
488             if (src_ext & MO_SIGN) {
489                 tcg_out_ext32s(s, dst, src);
490             } else {
491                 tcg_out_ext32u(s, dst, src);
492             }
493         }
494         break;
495     case MO_UQ:
496         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
497         if (dst_type == TCG_TYPE_I32) {
498             tcg_out_extrl_i64_i32(s, dst, src);
499         } else {
500             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
501         }
502         break;
503     default:
504         g_assert_not_reached();
505     }
506 }
507 
508 /* Minor variations on a theme, using a structure. */
509 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
510                                     TCGReg src)
511 {
512     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
513 }
514 
515 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
516 {
517     tcg_out_movext1_new_src(s, i, i->src);
518 }
519 
520 /**
521  * tcg_out_movext2 -- move and extend two pair
522  * @s: tcg context
523  * @i1: first move description
524  * @i2: second move description
525  * @scratch: temporary register, or -1 for none
526  *
527  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
528  * between the sources and destinations.
529  */
530 
531 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
532                             const TCGMovExtend *i2, int scratch)
533 {
534     TCGReg src1 = i1->src;
535     TCGReg src2 = i2->src;
536 
537     if (i1->dst != src2) {
538         tcg_out_movext1(s, i1);
539         tcg_out_movext1(s, i2);
540         return;
541     }
542     if (i2->dst == src1) {
543         TCGType src1_type = i1->src_type;
544         TCGType src2_type = i2->src_type;
545 
546         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
547             /* The data is now in the correct registers, now extend. */
548             src1 = i2->src;
549             src2 = i1->src;
550         } else {
551             tcg_debug_assert(scratch >= 0);
552             tcg_out_mov(s, src1_type, scratch, src1);
553             src1 = scratch;
554         }
555     }
556     tcg_out_movext1_new_src(s, i2, src2);
557     tcg_out_movext1_new_src(s, i1, src1);
558 }
559 
560 /**
561  * tcg_out_movext3 -- move and extend three pair
562  * @s: tcg context
563  * @i1: first move description
564  * @i2: second move description
565  * @i3: third move description
566  * @scratch: temporary register, or -1 for none
567  *
568  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
569  * between the sources and destinations.
570  */
571 
572 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
573                             const TCGMovExtend *i2, const TCGMovExtend *i3,
574                             int scratch)
575 {
576     TCGReg src1 = i1->src;
577     TCGReg src2 = i2->src;
578     TCGReg src3 = i3->src;
579 
580     if (i1->dst != src2 && i1->dst != src3) {
581         tcg_out_movext1(s, i1);
582         tcg_out_movext2(s, i2, i3, scratch);
583         return;
584     }
585     if (i2->dst != src1 && i2->dst != src3) {
586         tcg_out_movext1(s, i2);
587         tcg_out_movext2(s, i1, i3, scratch);
588         return;
589     }
590     if (i3->dst != src1 && i3->dst != src2) {
591         tcg_out_movext1(s, i3);
592         tcg_out_movext2(s, i1, i2, scratch);
593         return;
594     }
595 
596     /*
597      * There is a cycle.  Since there are only 3 nodes, the cycle is
598      * either "clockwise" or "anti-clockwise", and can be solved with
599      * a single scratch or two xchg.
600      */
601     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
602         /* "Clockwise" */
603         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
604             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
605             /* The data is now in the correct registers, now extend. */
606             tcg_out_movext1_new_src(s, i1, i1->dst);
607             tcg_out_movext1_new_src(s, i2, i2->dst);
608             tcg_out_movext1_new_src(s, i3, i3->dst);
609         } else {
610             tcg_debug_assert(scratch >= 0);
611             tcg_out_mov(s, i1->src_type, scratch, src1);
612             tcg_out_movext1(s, i3);
613             tcg_out_movext1(s, i2);
614             tcg_out_movext1_new_src(s, i1, scratch);
615         }
616     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
617         /* "Anti-clockwise" */
618         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
619             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
620             /* The data is now in the correct registers, now extend. */
621             tcg_out_movext1_new_src(s, i1, i1->dst);
622             tcg_out_movext1_new_src(s, i2, i2->dst);
623             tcg_out_movext1_new_src(s, i3, i3->dst);
624         } else {
625             tcg_debug_assert(scratch >= 0);
626             tcg_out_mov(s, i1->src_type, scratch, src1);
627             tcg_out_movext1(s, i2);
628             tcg_out_movext1(s, i3);
629             tcg_out_movext1_new_src(s, i1, scratch);
630         }
631     } else {
632         g_assert_not_reached();
633     }
634 }
635 
636 #define C_PFX1(P, A)                    P##A
637 #define C_PFX2(P, A, B)                 P##A##_##B
638 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
639 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
640 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
641 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
642 
643 /* Define an enumeration for the various combinations. */
644 
645 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
646 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
647 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
648 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
649 
650 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
651 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
652 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
653 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
654 
655 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
656 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
657 
658 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
659 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
660 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
661 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
662 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
663 
664 typedef enum {
665 #include "tcg-target-con-set.h"
666 } TCGConstraintSetIndex;
667 
668 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
669 
670 #undef C_O0_I1
671 #undef C_O0_I2
672 #undef C_O0_I3
673 #undef C_O0_I4
674 #undef C_O1_I1
675 #undef C_O1_I2
676 #undef C_O1_I3
677 #undef C_O1_I4
678 #undef C_N1_I2
679 #undef C_N2_I1
680 #undef C_O2_I1
681 #undef C_O2_I2
682 #undef C_O2_I3
683 #undef C_O2_I4
684 #undef C_N1_O1_I4
685 
686 /* Put all of the constraint sets into an array, indexed by the enum. */
687 
688 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
689 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
690 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
691 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
692 
693 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
694 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
695 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
696 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
697 
698 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
699 #define C_N2_I1(O1, O2, I1)             { .args_ct_str = { "&" #O1, "&" #O2, #I1 } },
700 
701 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
702 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
703 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
704 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
705 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
706 
707 static const TCGTargetOpDef constraint_sets[] = {
708 #include "tcg-target-con-set.h"
709 };
710 
711 
712 #undef C_O0_I1
713 #undef C_O0_I2
714 #undef C_O0_I3
715 #undef C_O0_I4
716 #undef C_O1_I1
717 #undef C_O1_I2
718 #undef C_O1_I3
719 #undef C_O1_I4
720 #undef C_N1_I2
721 #undef C_N2_I1
722 #undef C_O2_I1
723 #undef C_O2_I2
724 #undef C_O2_I3
725 #undef C_O2_I4
726 #undef C_N1_O1_I4
727 
728 /* Expand the enumerator to be returned from tcg_target_op_def(). */
729 
730 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
731 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
732 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
733 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
734 
735 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
736 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
737 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
738 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
739 
740 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
741 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
742 
743 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
744 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
745 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
746 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
747 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
748 
749 #include "tcg-target.c.inc"
750 
751 #ifndef CONFIG_TCG_INTERPRETER
752 /* Validate CPUTLBDescFast placement. */
753 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
754                         sizeof(CPUNegativeOffsetState))
755                   < MIN_TLB_MASK_TABLE_OFS);
756 #endif
757 
758 static void alloc_tcg_plugin_context(TCGContext *s)
759 {
760 #ifdef CONFIG_PLUGIN
761     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
762     s->plugin_tb->insns =
763         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
764 #endif
765 }
766 
767 /*
768  * All TCG threads except the parent (i.e. the one that called tcg_context_init
769  * and registered the target's TCG globals) must register with this function
770  * before initiating translation.
771  *
772  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
773  * of tcg_region_init() for the reasoning behind this.
774  *
775  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
776  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
777  * is not used anymore for translation once this function is called.
778  *
779  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
780  * iterates over the array (e.g. tcg_code_size() the same for both system/user
781  * modes.
782  */
783 #ifdef CONFIG_USER_ONLY
784 void tcg_register_thread(void)
785 {
786     tcg_ctx = &tcg_init_ctx;
787 }
788 #else
789 void tcg_register_thread(void)
790 {
791     TCGContext *s = g_malloc(sizeof(*s));
792     unsigned int i, n;
793 
794     *s = tcg_init_ctx;
795 
796     /* Relink mem_base.  */
797     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
798         if (tcg_init_ctx.temps[i].mem_base) {
799             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
800             tcg_debug_assert(b >= 0 && b < n);
801             s->temps[i].mem_base = &s->temps[b];
802         }
803     }
804 
805     /* Claim an entry in tcg_ctxs */
806     n = qatomic_fetch_inc(&tcg_cur_ctxs);
807     g_assert(n < tcg_max_ctxs);
808     qatomic_set(&tcg_ctxs[n], s);
809 
810     if (n > 0) {
811         alloc_tcg_plugin_context(s);
812         tcg_region_initial_alloc(s);
813     }
814 
815     tcg_ctx = s;
816 }
817 #endif /* !CONFIG_USER_ONLY */
818 
819 /* pool based memory allocation */
820 void *tcg_malloc_internal(TCGContext *s, int size)
821 {
822     TCGPool *p;
823     int pool_size;
824 
825     if (size > TCG_POOL_CHUNK_SIZE) {
826         /* big malloc: insert a new pool (XXX: could optimize) */
827         p = g_malloc(sizeof(TCGPool) + size);
828         p->size = size;
829         p->next = s->pool_first_large;
830         s->pool_first_large = p;
831         return p->data;
832     } else {
833         p = s->pool_current;
834         if (!p) {
835             p = s->pool_first;
836             if (!p)
837                 goto new_pool;
838         } else {
839             if (!p->next) {
840             new_pool:
841                 pool_size = TCG_POOL_CHUNK_SIZE;
842                 p = g_malloc(sizeof(TCGPool) + pool_size);
843                 p->size = pool_size;
844                 p->next = NULL;
845                 if (s->pool_current) {
846                     s->pool_current->next = p;
847                 } else {
848                     s->pool_first = p;
849                 }
850             } else {
851                 p = p->next;
852             }
853         }
854     }
855     s->pool_current = p;
856     s->pool_cur = p->data + size;
857     s->pool_end = p->data + p->size;
858     return p->data;
859 }
860 
861 void tcg_pool_reset(TCGContext *s)
862 {
863     TCGPool *p, *t;
864     for (p = s->pool_first_large; p; p = t) {
865         t = p->next;
866         g_free(p);
867     }
868     s->pool_first_large = NULL;
869     s->pool_cur = s->pool_end = NULL;
870     s->pool_current = NULL;
871 }
872 
873 /*
874  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
875  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
876  * We only use these for layout in tcg_out_ld_helper_ret and
877  * tcg_out_st_helper_args, and share them between several of
878  * the helpers, with the end result that it's easier to build manually.
879  */
880 
881 #if TCG_TARGET_REG_BITS == 32
882 # define dh_typecode_ttl  dh_typecode_i32
883 #else
884 # define dh_typecode_ttl  dh_typecode_i64
885 #endif
886 
887 static TCGHelperInfo info_helper_ld32_mmu = {
888     .flags = TCG_CALL_NO_WG,
889     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
890               | dh_typemask(env, 1)
891               | dh_typemask(i64, 2)  /* uint64_t addr */
892               | dh_typemask(i32, 3)  /* unsigned oi */
893               | dh_typemask(ptr, 4)  /* uintptr_t ra */
894 };
895 
896 static TCGHelperInfo info_helper_ld64_mmu = {
897     .flags = TCG_CALL_NO_WG,
898     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
899               | dh_typemask(env, 1)
900               | dh_typemask(i64, 2)  /* uint64_t addr */
901               | dh_typemask(i32, 3)  /* unsigned oi */
902               | dh_typemask(ptr, 4)  /* uintptr_t ra */
903 };
904 
905 static TCGHelperInfo info_helper_ld128_mmu = {
906     .flags = TCG_CALL_NO_WG,
907     .typemask = dh_typemask(i128, 0) /* return Int128 */
908               | dh_typemask(env, 1)
909               | dh_typemask(i64, 2)  /* uint64_t addr */
910               | dh_typemask(i32, 3)  /* unsigned oi */
911               | dh_typemask(ptr, 4)  /* uintptr_t ra */
912 };
913 
914 static TCGHelperInfo info_helper_st32_mmu = {
915     .flags = TCG_CALL_NO_WG,
916     .typemask = dh_typemask(void, 0)
917               | dh_typemask(env, 1)
918               | dh_typemask(i64, 2)  /* uint64_t addr */
919               | dh_typemask(i32, 3)  /* uint32_t data */
920               | dh_typemask(i32, 4)  /* unsigned oi */
921               | dh_typemask(ptr, 5)  /* uintptr_t ra */
922 };
923 
924 static TCGHelperInfo info_helper_st64_mmu = {
925     .flags = TCG_CALL_NO_WG,
926     .typemask = dh_typemask(void, 0)
927               | dh_typemask(env, 1)
928               | dh_typemask(i64, 2)  /* uint64_t addr */
929               | dh_typemask(i64, 3)  /* uint64_t data */
930               | dh_typemask(i32, 4)  /* unsigned oi */
931               | dh_typemask(ptr, 5)  /* uintptr_t ra */
932 };
933 
934 static TCGHelperInfo info_helper_st128_mmu = {
935     .flags = TCG_CALL_NO_WG,
936     .typemask = dh_typemask(void, 0)
937               | dh_typemask(env, 1)
938               | dh_typemask(i64, 2)  /* uint64_t addr */
939               | dh_typemask(i128, 3) /* Int128 data */
940               | dh_typemask(i32, 4)  /* unsigned oi */
941               | dh_typemask(ptr, 5)  /* uintptr_t ra */
942 };
943 
944 #ifdef CONFIG_TCG_INTERPRETER
945 static ffi_type *typecode_to_ffi(int argmask)
946 {
947     /*
948      * libffi does not support __int128_t, so we have forced Int128
949      * to use the structure definition instead of the builtin type.
950      */
951     static ffi_type *ffi_type_i128_elements[3] = {
952         &ffi_type_uint64,
953         &ffi_type_uint64,
954         NULL
955     };
956     static ffi_type ffi_type_i128 = {
957         .size = 16,
958         .alignment = __alignof__(Int128),
959         .type = FFI_TYPE_STRUCT,
960         .elements = ffi_type_i128_elements,
961     };
962 
963     switch (argmask) {
964     case dh_typecode_void:
965         return &ffi_type_void;
966     case dh_typecode_i32:
967         return &ffi_type_uint32;
968     case dh_typecode_s32:
969         return &ffi_type_sint32;
970     case dh_typecode_i64:
971         return &ffi_type_uint64;
972     case dh_typecode_s64:
973         return &ffi_type_sint64;
974     case dh_typecode_ptr:
975         return &ffi_type_pointer;
976     case dh_typecode_i128:
977         return &ffi_type_i128;
978     }
979     g_assert_not_reached();
980 }
981 
982 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
983 {
984     unsigned typemask = info->typemask;
985     struct {
986         ffi_cif cif;
987         ffi_type *args[];
988     } *ca;
989     ffi_status status;
990     int nargs;
991 
992     /* Ignoring the return type, find the last non-zero field. */
993     nargs = 32 - clz32(typemask >> 3);
994     nargs = DIV_ROUND_UP(nargs, 3);
995     assert(nargs <= MAX_CALL_IARGS);
996 
997     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
998     ca->cif.rtype = typecode_to_ffi(typemask & 7);
999     ca->cif.nargs = nargs;
1000 
1001     if (nargs != 0) {
1002         ca->cif.arg_types = ca->args;
1003         for (int j = 0; j < nargs; ++j) {
1004             int typecode = extract32(typemask, (j + 1) * 3, 3);
1005             ca->args[j] = typecode_to_ffi(typecode);
1006         }
1007     }
1008 
1009     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1010                           ca->cif.rtype, ca->cif.arg_types);
1011     assert(status == FFI_OK);
1012 
1013     return &ca->cif;
1014 }
1015 
1016 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1017 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1018 #else
1019 #define HELPER_INFO_INIT(I)      (&(I)->init)
1020 #define HELPER_INFO_INIT_VAL(I)  1
1021 #endif /* CONFIG_TCG_INTERPRETER */
1022 
1023 static inline bool arg_slot_reg_p(unsigned arg_slot)
1024 {
1025     /*
1026      * Split the sizeof away from the comparison to avoid Werror from
1027      * "unsigned < 0 is always false", when iarg_regs is empty.
1028      */
1029     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1030     return arg_slot < nreg;
1031 }
1032 
1033 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1034 {
1035     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1036     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1037 
1038     tcg_debug_assert(stk_slot < max);
1039     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1040 }
1041 
1042 typedef struct TCGCumulativeArgs {
1043     int arg_idx;                /* tcg_gen_callN args[] */
1044     int info_in_idx;            /* TCGHelperInfo in[] */
1045     int arg_slot;               /* regs+stack slot */
1046     int ref_slot;               /* stack slots for references */
1047 } TCGCumulativeArgs;
1048 
1049 static void layout_arg_even(TCGCumulativeArgs *cum)
1050 {
1051     cum->arg_slot += cum->arg_slot & 1;
1052 }
1053 
1054 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1055                          TCGCallArgumentKind kind)
1056 {
1057     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1058 
1059     *loc = (TCGCallArgumentLoc){
1060         .kind = kind,
1061         .arg_idx = cum->arg_idx,
1062         .arg_slot = cum->arg_slot,
1063     };
1064     cum->info_in_idx++;
1065     cum->arg_slot++;
1066 }
1067 
1068 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1069                                 TCGHelperInfo *info, int n)
1070 {
1071     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1072 
1073     for (int i = 0; i < n; ++i) {
1074         /* Layout all using the same arg_idx, adjusting the subindex. */
1075         loc[i] = (TCGCallArgumentLoc){
1076             .kind = TCG_CALL_ARG_NORMAL,
1077             .arg_idx = cum->arg_idx,
1078             .tmp_subindex = i,
1079             .arg_slot = cum->arg_slot + i,
1080         };
1081     }
1082     cum->info_in_idx += n;
1083     cum->arg_slot += n;
1084 }
1085 
1086 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1087 {
1088     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1089     int n = 128 / TCG_TARGET_REG_BITS;
1090 
1091     /* The first subindex carries the pointer. */
1092     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1093 
1094     /*
1095      * The callee is allowed to clobber memory associated with
1096      * structure pass by-reference.  Therefore we must make copies.
1097      * Allocate space from "ref_slot", which will be adjusted to
1098      * follow the parameters on the stack.
1099      */
1100     loc[0].ref_slot = cum->ref_slot;
1101 
1102     /*
1103      * Subsequent words also go into the reference slot, but
1104      * do not accumulate into the regular arguments.
1105      */
1106     for (int i = 1; i < n; ++i) {
1107         loc[i] = (TCGCallArgumentLoc){
1108             .kind = TCG_CALL_ARG_BY_REF_N,
1109             .arg_idx = cum->arg_idx,
1110             .tmp_subindex = i,
1111             .ref_slot = cum->ref_slot + i,
1112         };
1113     }
1114     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1115     cum->ref_slot += n;
1116 }
1117 
1118 static void init_call_layout(TCGHelperInfo *info)
1119 {
1120     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1121     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1122     unsigned typemask = info->typemask;
1123     unsigned typecode;
1124     TCGCumulativeArgs cum = { };
1125 
1126     /*
1127      * Parse and place any function return value.
1128      */
1129     typecode = typemask & 7;
1130     switch (typecode) {
1131     case dh_typecode_void:
1132         info->nr_out = 0;
1133         break;
1134     case dh_typecode_i32:
1135     case dh_typecode_s32:
1136     case dh_typecode_ptr:
1137         info->nr_out = 1;
1138         info->out_kind = TCG_CALL_RET_NORMAL;
1139         break;
1140     case dh_typecode_i64:
1141     case dh_typecode_s64:
1142         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1143         info->out_kind = TCG_CALL_RET_NORMAL;
1144         /* Query the last register now to trigger any assert early. */
1145         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1146         break;
1147     case dh_typecode_i128:
1148         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1149         info->out_kind = TCG_TARGET_CALL_RET_I128;
1150         switch (TCG_TARGET_CALL_RET_I128) {
1151         case TCG_CALL_RET_NORMAL:
1152             /* Query the last register now to trigger any assert early. */
1153             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1154             break;
1155         case TCG_CALL_RET_BY_VEC:
1156             /* Query the single register now to trigger any assert early. */
1157             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1158             break;
1159         case TCG_CALL_RET_BY_REF:
1160             /*
1161              * Allocate the first argument to the output.
1162              * We don't need to store this anywhere, just make it
1163              * unavailable for use in the input loop below.
1164              */
1165             cum.arg_slot = 1;
1166             break;
1167         default:
1168             qemu_build_not_reached();
1169         }
1170         break;
1171     default:
1172         g_assert_not_reached();
1173     }
1174 
1175     /*
1176      * Parse and place function arguments.
1177      */
1178     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1179         TCGCallArgumentKind kind;
1180         TCGType type;
1181 
1182         typecode = typemask & 7;
1183         switch (typecode) {
1184         case dh_typecode_i32:
1185         case dh_typecode_s32:
1186             type = TCG_TYPE_I32;
1187             break;
1188         case dh_typecode_i64:
1189         case dh_typecode_s64:
1190             type = TCG_TYPE_I64;
1191             break;
1192         case dh_typecode_ptr:
1193             type = TCG_TYPE_PTR;
1194             break;
1195         case dh_typecode_i128:
1196             type = TCG_TYPE_I128;
1197             break;
1198         default:
1199             g_assert_not_reached();
1200         }
1201 
1202         switch (type) {
1203         case TCG_TYPE_I32:
1204             switch (TCG_TARGET_CALL_ARG_I32) {
1205             case TCG_CALL_ARG_EVEN:
1206                 layout_arg_even(&cum);
1207                 /* fall through */
1208             case TCG_CALL_ARG_NORMAL:
1209                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1210                 break;
1211             case TCG_CALL_ARG_EXTEND:
1212                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1213                 layout_arg_1(&cum, info, kind);
1214                 break;
1215             default:
1216                 qemu_build_not_reached();
1217             }
1218             break;
1219 
1220         case TCG_TYPE_I64:
1221             switch (TCG_TARGET_CALL_ARG_I64) {
1222             case TCG_CALL_ARG_EVEN:
1223                 layout_arg_even(&cum);
1224                 /* fall through */
1225             case TCG_CALL_ARG_NORMAL:
1226                 if (TCG_TARGET_REG_BITS == 32) {
1227                     layout_arg_normal_n(&cum, info, 2);
1228                 } else {
1229                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1230                 }
1231                 break;
1232             default:
1233                 qemu_build_not_reached();
1234             }
1235             break;
1236 
1237         case TCG_TYPE_I128:
1238             switch (TCG_TARGET_CALL_ARG_I128) {
1239             case TCG_CALL_ARG_EVEN:
1240                 layout_arg_even(&cum);
1241                 /* fall through */
1242             case TCG_CALL_ARG_NORMAL:
1243                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1244                 break;
1245             case TCG_CALL_ARG_BY_REF:
1246                 layout_arg_by_ref(&cum, info);
1247                 break;
1248             default:
1249                 qemu_build_not_reached();
1250             }
1251             break;
1252 
1253         default:
1254             g_assert_not_reached();
1255         }
1256     }
1257     info->nr_in = cum.info_in_idx;
1258 
1259     /* Validate that we didn't overrun the input array. */
1260     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1261     /* Validate the backend has enough argument space. */
1262     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1263 
1264     /*
1265      * Relocate the "ref_slot" area to the end of the parameters.
1266      * Minimizing this stack offset helps code size for x86,
1267      * which has a signed 8-bit offset encoding.
1268      */
1269     if (cum.ref_slot != 0) {
1270         int ref_base = 0;
1271 
1272         if (cum.arg_slot > max_reg_slots) {
1273             int align = __alignof(Int128) / sizeof(tcg_target_long);
1274 
1275             ref_base = cum.arg_slot - max_reg_slots;
1276             if (align > 1) {
1277                 ref_base = ROUND_UP(ref_base, align);
1278             }
1279         }
1280         assert(ref_base + cum.ref_slot <= max_stk_slots);
1281         ref_base += max_reg_slots;
1282 
1283         if (ref_base != 0) {
1284             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1285                 TCGCallArgumentLoc *loc = &info->in[i];
1286                 switch (loc->kind) {
1287                 case TCG_CALL_ARG_BY_REF:
1288                 case TCG_CALL_ARG_BY_REF_N:
1289                     loc->ref_slot += ref_base;
1290                     break;
1291                 default:
1292                     break;
1293                 }
1294             }
1295         }
1296     }
1297 }
1298 
1299 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1300 static void process_op_defs(TCGContext *s);
1301 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1302                                             TCGReg reg, const char *name);
1303 
1304 static void tcg_context_init(unsigned max_cpus)
1305 {
1306     TCGContext *s = &tcg_init_ctx;
1307     int op, total_args, n, i;
1308     TCGOpDef *def;
1309     TCGArgConstraint *args_ct;
1310     TCGTemp *ts;
1311 
1312     memset(s, 0, sizeof(*s));
1313     s->nb_globals = 0;
1314 
1315     /* Count total number of arguments and allocate the corresponding
1316        space */
1317     total_args = 0;
1318     for(op = 0; op < NB_OPS; op++) {
1319         def = &tcg_op_defs[op];
1320         n = def->nb_iargs + def->nb_oargs;
1321         total_args += n;
1322     }
1323 
1324     args_ct = g_new0(TCGArgConstraint, total_args);
1325 
1326     for(op = 0; op < NB_OPS; op++) {
1327         def = &tcg_op_defs[op];
1328         def->args_ct = args_ct;
1329         n = def->nb_iargs + def->nb_oargs;
1330         args_ct += n;
1331     }
1332 
1333     init_call_layout(&info_helper_ld32_mmu);
1334     init_call_layout(&info_helper_ld64_mmu);
1335     init_call_layout(&info_helper_ld128_mmu);
1336     init_call_layout(&info_helper_st32_mmu);
1337     init_call_layout(&info_helper_st64_mmu);
1338     init_call_layout(&info_helper_st128_mmu);
1339 
1340     tcg_target_init(s);
1341     process_op_defs(s);
1342 
1343     /* Reverse the order of the saved registers, assuming they're all at
1344        the start of tcg_target_reg_alloc_order.  */
1345     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1346         int r = tcg_target_reg_alloc_order[n];
1347         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1348             break;
1349         }
1350     }
1351     for (i = 0; i < n; ++i) {
1352         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1353     }
1354     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1355         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1356     }
1357 
1358     alloc_tcg_plugin_context(s);
1359 
1360     tcg_ctx = s;
1361     /*
1362      * In user-mode we simply share the init context among threads, since we
1363      * use a single region. See the documentation tcg_region_init() for the
1364      * reasoning behind this.
1365      * In system-mode we will have at most max_cpus TCG threads.
1366      */
1367 #ifdef CONFIG_USER_ONLY
1368     tcg_ctxs = &tcg_ctx;
1369     tcg_cur_ctxs = 1;
1370     tcg_max_ctxs = 1;
1371 #else
1372     tcg_max_ctxs = max_cpus;
1373     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1374 #endif
1375 
1376     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1377     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1378     tcg_env = temp_tcgv_ptr(ts);
1379 }
1380 
1381 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1382 {
1383     tcg_context_init(max_cpus);
1384     tcg_region_init(tb_size, splitwx, max_cpus);
1385 }
1386 
1387 /*
1388  * Allocate TBs right before their corresponding translated code, making
1389  * sure that TBs and code are on different cache lines.
1390  */
1391 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1392 {
1393     uintptr_t align = qemu_icache_linesize;
1394     TranslationBlock *tb;
1395     void *next;
1396 
1397  retry:
1398     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1399     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1400 
1401     if (unlikely(next > s->code_gen_highwater)) {
1402         if (tcg_region_alloc(s)) {
1403             return NULL;
1404         }
1405         goto retry;
1406     }
1407     qatomic_set(&s->code_gen_ptr, next);
1408     s->data_gen_ptr = NULL;
1409     return tb;
1410 }
1411 
1412 void tcg_prologue_init(void)
1413 {
1414     TCGContext *s = tcg_ctx;
1415     size_t prologue_size;
1416 
1417     s->code_ptr = s->code_gen_ptr;
1418     s->code_buf = s->code_gen_ptr;
1419     s->data_gen_ptr = NULL;
1420 
1421 #ifndef CONFIG_TCG_INTERPRETER
1422     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1423 #endif
1424 
1425 #ifdef TCG_TARGET_NEED_POOL_LABELS
1426     s->pool_labels = NULL;
1427 #endif
1428 
1429     qemu_thread_jit_write();
1430     /* Generate the prologue.  */
1431     tcg_target_qemu_prologue(s);
1432 
1433 #ifdef TCG_TARGET_NEED_POOL_LABELS
1434     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1435     {
1436         int result = tcg_out_pool_finalize(s);
1437         tcg_debug_assert(result == 0);
1438     }
1439 #endif
1440 
1441     prologue_size = tcg_current_code_size(s);
1442     perf_report_prologue(s->code_gen_ptr, prologue_size);
1443 
1444 #ifndef CONFIG_TCG_INTERPRETER
1445     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1446                         (uintptr_t)s->code_buf, prologue_size);
1447 #endif
1448 
1449     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1450         FILE *logfile = qemu_log_trylock();
1451         if (logfile) {
1452             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1453             if (s->data_gen_ptr) {
1454                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1455                 size_t data_size = prologue_size - code_size;
1456                 size_t i;
1457 
1458                 disas(logfile, s->code_gen_ptr, code_size);
1459 
1460                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1461                     if (sizeof(tcg_target_ulong) == 8) {
1462                         fprintf(logfile,
1463                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1464                                 (uintptr_t)s->data_gen_ptr + i,
1465                                 *(uint64_t *)(s->data_gen_ptr + i));
1466                     } else {
1467                         fprintf(logfile,
1468                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1469                                 (uintptr_t)s->data_gen_ptr + i,
1470                                 *(uint32_t *)(s->data_gen_ptr + i));
1471                     }
1472                 }
1473             } else {
1474                 disas(logfile, s->code_gen_ptr, prologue_size);
1475             }
1476             fprintf(logfile, "\n");
1477             qemu_log_unlock(logfile);
1478         }
1479     }
1480 
1481 #ifndef CONFIG_TCG_INTERPRETER
1482     /*
1483      * Assert that goto_ptr is implemented completely, setting an epilogue.
1484      * For tci, we use NULL as the signal to return from the interpreter,
1485      * so skip this check.
1486      */
1487     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1488 #endif
1489 
1490     tcg_region_prologue_set(s);
1491 }
1492 
1493 void tcg_func_start(TCGContext *s)
1494 {
1495     tcg_pool_reset(s);
1496     s->nb_temps = s->nb_globals;
1497 
1498     /* No temps have been previously allocated for size or locality.  */
1499     memset(s->free_temps, 0, sizeof(s->free_temps));
1500 
1501     /* No constant temps have been previously allocated. */
1502     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1503         if (s->const_table[i]) {
1504             g_hash_table_remove_all(s->const_table[i]);
1505         }
1506     }
1507 
1508     s->nb_ops = 0;
1509     s->nb_labels = 0;
1510     s->current_frame_offset = s->frame_start;
1511 
1512 #ifdef CONFIG_DEBUG_TCG
1513     s->goto_tb_issue_mask = 0;
1514 #endif
1515 
1516     QTAILQ_INIT(&s->ops);
1517     QTAILQ_INIT(&s->free_ops);
1518     QSIMPLEQ_INIT(&s->labels);
1519 
1520     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1521                      s->addr_type == TCG_TYPE_I64);
1522 
1523     tcg_debug_assert(s->insn_start_words > 0);
1524 }
1525 
1526 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1527 {
1528     int n = s->nb_temps++;
1529 
1530     if (n >= TCG_MAX_TEMPS) {
1531         tcg_raise_tb_overflow(s);
1532     }
1533     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1534 }
1535 
1536 static TCGTemp *tcg_global_alloc(TCGContext *s)
1537 {
1538     TCGTemp *ts;
1539 
1540     tcg_debug_assert(s->nb_globals == s->nb_temps);
1541     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1542     s->nb_globals++;
1543     ts = tcg_temp_alloc(s);
1544     ts->kind = TEMP_GLOBAL;
1545 
1546     return ts;
1547 }
1548 
1549 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1550                                             TCGReg reg, const char *name)
1551 {
1552     TCGTemp *ts;
1553 
1554     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1555 
1556     ts = tcg_global_alloc(s);
1557     ts->base_type = type;
1558     ts->type = type;
1559     ts->kind = TEMP_FIXED;
1560     ts->reg = reg;
1561     ts->name = name;
1562     tcg_regset_set_reg(s->reserved_regs, reg);
1563 
1564     return ts;
1565 }
1566 
1567 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1568 {
1569     s->frame_start = start;
1570     s->frame_end = start + size;
1571     s->frame_temp
1572         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1573 }
1574 
1575 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1576                                      intptr_t offset, const char *name)
1577 {
1578     TCGContext *s = tcg_ctx;
1579     TCGTemp *base_ts = tcgv_ptr_temp(base);
1580     TCGTemp *ts = tcg_global_alloc(s);
1581     int indirect_reg = 0;
1582 
1583     switch (base_ts->kind) {
1584     case TEMP_FIXED:
1585         break;
1586     case TEMP_GLOBAL:
1587         /* We do not support double-indirect registers.  */
1588         tcg_debug_assert(!base_ts->indirect_reg);
1589         base_ts->indirect_base = 1;
1590         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1591                             ? 2 : 1);
1592         indirect_reg = 1;
1593         break;
1594     default:
1595         g_assert_not_reached();
1596     }
1597 
1598     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1599         TCGTemp *ts2 = tcg_global_alloc(s);
1600         char buf[64];
1601 
1602         ts->base_type = TCG_TYPE_I64;
1603         ts->type = TCG_TYPE_I32;
1604         ts->indirect_reg = indirect_reg;
1605         ts->mem_allocated = 1;
1606         ts->mem_base = base_ts;
1607         ts->mem_offset = offset;
1608         pstrcpy(buf, sizeof(buf), name);
1609         pstrcat(buf, sizeof(buf), "_0");
1610         ts->name = strdup(buf);
1611 
1612         tcg_debug_assert(ts2 == ts + 1);
1613         ts2->base_type = TCG_TYPE_I64;
1614         ts2->type = TCG_TYPE_I32;
1615         ts2->indirect_reg = indirect_reg;
1616         ts2->mem_allocated = 1;
1617         ts2->mem_base = base_ts;
1618         ts2->mem_offset = offset + 4;
1619         ts2->temp_subindex = 1;
1620         pstrcpy(buf, sizeof(buf), name);
1621         pstrcat(buf, sizeof(buf), "_1");
1622         ts2->name = strdup(buf);
1623     } else {
1624         ts->base_type = type;
1625         ts->type = type;
1626         ts->indirect_reg = indirect_reg;
1627         ts->mem_allocated = 1;
1628         ts->mem_base = base_ts;
1629         ts->mem_offset = offset;
1630         ts->name = name;
1631     }
1632     return ts;
1633 }
1634 
1635 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1636 {
1637     TCGContext *s = tcg_ctx;
1638     TCGTemp *ts;
1639     int n;
1640 
1641     if (kind == TEMP_EBB) {
1642         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1643 
1644         if (idx < TCG_MAX_TEMPS) {
1645             /* There is already an available temp with the right type.  */
1646             clear_bit(idx, s->free_temps[type].l);
1647 
1648             ts = &s->temps[idx];
1649             ts->temp_allocated = 1;
1650             tcg_debug_assert(ts->base_type == type);
1651             tcg_debug_assert(ts->kind == kind);
1652             return ts;
1653         }
1654     } else {
1655         tcg_debug_assert(kind == TEMP_TB);
1656     }
1657 
1658     switch (type) {
1659     case TCG_TYPE_I32:
1660     case TCG_TYPE_V64:
1661     case TCG_TYPE_V128:
1662     case TCG_TYPE_V256:
1663         n = 1;
1664         break;
1665     case TCG_TYPE_I64:
1666         n = 64 / TCG_TARGET_REG_BITS;
1667         break;
1668     case TCG_TYPE_I128:
1669         n = 128 / TCG_TARGET_REG_BITS;
1670         break;
1671     default:
1672         g_assert_not_reached();
1673     }
1674 
1675     ts = tcg_temp_alloc(s);
1676     ts->base_type = type;
1677     ts->temp_allocated = 1;
1678     ts->kind = kind;
1679 
1680     if (n == 1) {
1681         ts->type = type;
1682     } else {
1683         ts->type = TCG_TYPE_REG;
1684 
1685         for (int i = 1; i < n; ++i) {
1686             TCGTemp *ts2 = tcg_temp_alloc(s);
1687 
1688             tcg_debug_assert(ts2 == ts + i);
1689             ts2->base_type = type;
1690             ts2->type = TCG_TYPE_REG;
1691             ts2->temp_allocated = 1;
1692             ts2->temp_subindex = i;
1693             ts2->kind = kind;
1694         }
1695     }
1696     return ts;
1697 }
1698 
1699 TCGv_vec tcg_temp_new_vec(TCGType type)
1700 {
1701     TCGTemp *t;
1702 
1703 #ifdef CONFIG_DEBUG_TCG
1704     switch (type) {
1705     case TCG_TYPE_V64:
1706         assert(TCG_TARGET_HAS_v64);
1707         break;
1708     case TCG_TYPE_V128:
1709         assert(TCG_TARGET_HAS_v128);
1710         break;
1711     case TCG_TYPE_V256:
1712         assert(TCG_TARGET_HAS_v256);
1713         break;
1714     default:
1715         g_assert_not_reached();
1716     }
1717 #endif
1718 
1719     t = tcg_temp_new_internal(type, TEMP_EBB);
1720     return temp_tcgv_vec(t);
1721 }
1722 
1723 /* Create a new temp of the same type as an existing temp.  */
1724 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1725 {
1726     TCGTemp *t = tcgv_vec_temp(match);
1727 
1728     tcg_debug_assert(t->temp_allocated != 0);
1729 
1730     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1731     return temp_tcgv_vec(t);
1732 }
1733 
1734 void tcg_temp_free_internal(TCGTemp *ts)
1735 {
1736     TCGContext *s = tcg_ctx;
1737 
1738     switch (ts->kind) {
1739     case TEMP_CONST:
1740     case TEMP_TB:
1741         /* Silently ignore free. */
1742         break;
1743     case TEMP_EBB:
1744         tcg_debug_assert(ts->temp_allocated != 0);
1745         ts->temp_allocated = 0;
1746         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1747         break;
1748     default:
1749         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1750         g_assert_not_reached();
1751     }
1752 }
1753 
1754 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1755 {
1756     TCGContext *s = tcg_ctx;
1757     GHashTable *h = s->const_table[type];
1758     TCGTemp *ts;
1759 
1760     if (h == NULL) {
1761         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1762         s->const_table[type] = h;
1763     }
1764 
1765     ts = g_hash_table_lookup(h, &val);
1766     if (ts == NULL) {
1767         int64_t *val_ptr;
1768 
1769         ts = tcg_temp_alloc(s);
1770 
1771         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1772             TCGTemp *ts2 = tcg_temp_alloc(s);
1773 
1774             tcg_debug_assert(ts2 == ts + 1);
1775 
1776             ts->base_type = TCG_TYPE_I64;
1777             ts->type = TCG_TYPE_I32;
1778             ts->kind = TEMP_CONST;
1779             ts->temp_allocated = 1;
1780 
1781             ts2->base_type = TCG_TYPE_I64;
1782             ts2->type = TCG_TYPE_I32;
1783             ts2->kind = TEMP_CONST;
1784             ts2->temp_allocated = 1;
1785             ts2->temp_subindex = 1;
1786 
1787             /*
1788              * Retain the full value of the 64-bit constant in the low
1789              * part, so that the hash table works.  Actual uses will
1790              * truncate the value to the low part.
1791              */
1792             ts[HOST_BIG_ENDIAN].val = val;
1793             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1794             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1795         } else {
1796             ts->base_type = type;
1797             ts->type = type;
1798             ts->kind = TEMP_CONST;
1799             ts->temp_allocated = 1;
1800             ts->val = val;
1801             val_ptr = &ts->val;
1802         }
1803         g_hash_table_insert(h, val_ptr, ts);
1804     }
1805 
1806     return ts;
1807 }
1808 
1809 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1810 {
1811     val = dup_const(vece, val);
1812     return temp_tcgv_vec(tcg_constant_internal(type, val));
1813 }
1814 
1815 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1816 {
1817     TCGTemp *t = tcgv_vec_temp(match);
1818 
1819     tcg_debug_assert(t->temp_allocated != 0);
1820     return tcg_constant_vec(t->base_type, vece, val);
1821 }
1822 
1823 #ifdef CONFIG_DEBUG_TCG
1824 size_t temp_idx(TCGTemp *ts)
1825 {
1826     ptrdiff_t n = ts - tcg_ctx->temps;
1827     assert(n >= 0 && n < tcg_ctx->nb_temps);
1828     return n;
1829 }
1830 
1831 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1832 {
1833     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1834 
1835     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1836     assert(o % sizeof(TCGTemp) == 0);
1837 
1838     return (void *)tcg_ctx + (uintptr_t)v;
1839 }
1840 #endif /* CONFIG_DEBUG_TCG */
1841 
1842 /* Return true if OP may appear in the opcode stream.
1843    Test the runtime variable that controls each opcode.  */
1844 bool tcg_op_supported(TCGOpcode op)
1845 {
1846     const bool have_vec
1847         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1848 
1849     switch (op) {
1850     case INDEX_op_discard:
1851     case INDEX_op_set_label:
1852     case INDEX_op_call:
1853     case INDEX_op_br:
1854     case INDEX_op_mb:
1855     case INDEX_op_insn_start:
1856     case INDEX_op_exit_tb:
1857     case INDEX_op_goto_tb:
1858     case INDEX_op_goto_ptr:
1859     case INDEX_op_qemu_ld_a32_i32:
1860     case INDEX_op_qemu_ld_a64_i32:
1861     case INDEX_op_qemu_st_a32_i32:
1862     case INDEX_op_qemu_st_a64_i32:
1863     case INDEX_op_qemu_ld_a32_i64:
1864     case INDEX_op_qemu_ld_a64_i64:
1865     case INDEX_op_qemu_st_a32_i64:
1866     case INDEX_op_qemu_st_a64_i64:
1867         return true;
1868 
1869     case INDEX_op_qemu_st8_a32_i32:
1870     case INDEX_op_qemu_st8_a64_i32:
1871         return TCG_TARGET_HAS_qemu_st8_i32;
1872 
1873     case INDEX_op_qemu_ld_a32_i128:
1874     case INDEX_op_qemu_ld_a64_i128:
1875     case INDEX_op_qemu_st_a32_i128:
1876     case INDEX_op_qemu_st_a64_i128:
1877         return TCG_TARGET_HAS_qemu_ldst_i128;
1878 
1879     case INDEX_op_mov_i32:
1880     case INDEX_op_setcond_i32:
1881     case INDEX_op_brcond_i32:
1882     case INDEX_op_ld8u_i32:
1883     case INDEX_op_ld8s_i32:
1884     case INDEX_op_ld16u_i32:
1885     case INDEX_op_ld16s_i32:
1886     case INDEX_op_ld_i32:
1887     case INDEX_op_st8_i32:
1888     case INDEX_op_st16_i32:
1889     case INDEX_op_st_i32:
1890     case INDEX_op_add_i32:
1891     case INDEX_op_sub_i32:
1892     case INDEX_op_mul_i32:
1893     case INDEX_op_and_i32:
1894     case INDEX_op_or_i32:
1895     case INDEX_op_xor_i32:
1896     case INDEX_op_shl_i32:
1897     case INDEX_op_shr_i32:
1898     case INDEX_op_sar_i32:
1899         return true;
1900 
1901     case INDEX_op_negsetcond_i32:
1902         return TCG_TARGET_HAS_negsetcond_i32;
1903     case INDEX_op_movcond_i32:
1904         return TCG_TARGET_HAS_movcond_i32;
1905     case INDEX_op_div_i32:
1906     case INDEX_op_divu_i32:
1907         return TCG_TARGET_HAS_div_i32;
1908     case INDEX_op_rem_i32:
1909     case INDEX_op_remu_i32:
1910         return TCG_TARGET_HAS_rem_i32;
1911     case INDEX_op_div2_i32:
1912     case INDEX_op_divu2_i32:
1913         return TCG_TARGET_HAS_div2_i32;
1914     case INDEX_op_rotl_i32:
1915     case INDEX_op_rotr_i32:
1916         return TCG_TARGET_HAS_rot_i32;
1917     case INDEX_op_deposit_i32:
1918         return TCG_TARGET_HAS_deposit_i32;
1919     case INDEX_op_extract_i32:
1920         return TCG_TARGET_HAS_extract_i32;
1921     case INDEX_op_sextract_i32:
1922         return TCG_TARGET_HAS_sextract_i32;
1923     case INDEX_op_extract2_i32:
1924         return TCG_TARGET_HAS_extract2_i32;
1925     case INDEX_op_add2_i32:
1926         return TCG_TARGET_HAS_add2_i32;
1927     case INDEX_op_sub2_i32:
1928         return TCG_TARGET_HAS_sub2_i32;
1929     case INDEX_op_mulu2_i32:
1930         return TCG_TARGET_HAS_mulu2_i32;
1931     case INDEX_op_muls2_i32:
1932         return TCG_TARGET_HAS_muls2_i32;
1933     case INDEX_op_muluh_i32:
1934         return TCG_TARGET_HAS_muluh_i32;
1935     case INDEX_op_mulsh_i32:
1936         return TCG_TARGET_HAS_mulsh_i32;
1937     case INDEX_op_ext8s_i32:
1938         return TCG_TARGET_HAS_ext8s_i32;
1939     case INDEX_op_ext16s_i32:
1940         return TCG_TARGET_HAS_ext16s_i32;
1941     case INDEX_op_ext8u_i32:
1942         return TCG_TARGET_HAS_ext8u_i32;
1943     case INDEX_op_ext16u_i32:
1944         return TCG_TARGET_HAS_ext16u_i32;
1945     case INDEX_op_bswap16_i32:
1946         return TCG_TARGET_HAS_bswap16_i32;
1947     case INDEX_op_bswap32_i32:
1948         return TCG_TARGET_HAS_bswap32_i32;
1949     case INDEX_op_not_i32:
1950         return TCG_TARGET_HAS_not_i32;
1951     case INDEX_op_neg_i32:
1952         return TCG_TARGET_HAS_neg_i32;
1953     case INDEX_op_andc_i32:
1954         return TCG_TARGET_HAS_andc_i32;
1955     case INDEX_op_orc_i32:
1956         return TCG_TARGET_HAS_orc_i32;
1957     case INDEX_op_eqv_i32:
1958         return TCG_TARGET_HAS_eqv_i32;
1959     case INDEX_op_nand_i32:
1960         return TCG_TARGET_HAS_nand_i32;
1961     case INDEX_op_nor_i32:
1962         return TCG_TARGET_HAS_nor_i32;
1963     case INDEX_op_clz_i32:
1964         return TCG_TARGET_HAS_clz_i32;
1965     case INDEX_op_ctz_i32:
1966         return TCG_TARGET_HAS_ctz_i32;
1967     case INDEX_op_ctpop_i32:
1968         return TCG_TARGET_HAS_ctpop_i32;
1969 
1970     case INDEX_op_brcond2_i32:
1971     case INDEX_op_setcond2_i32:
1972         return TCG_TARGET_REG_BITS == 32;
1973 
1974     case INDEX_op_mov_i64:
1975     case INDEX_op_setcond_i64:
1976     case INDEX_op_brcond_i64:
1977     case INDEX_op_ld8u_i64:
1978     case INDEX_op_ld8s_i64:
1979     case INDEX_op_ld16u_i64:
1980     case INDEX_op_ld16s_i64:
1981     case INDEX_op_ld32u_i64:
1982     case INDEX_op_ld32s_i64:
1983     case INDEX_op_ld_i64:
1984     case INDEX_op_st8_i64:
1985     case INDEX_op_st16_i64:
1986     case INDEX_op_st32_i64:
1987     case INDEX_op_st_i64:
1988     case INDEX_op_add_i64:
1989     case INDEX_op_sub_i64:
1990     case INDEX_op_mul_i64:
1991     case INDEX_op_and_i64:
1992     case INDEX_op_or_i64:
1993     case INDEX_op_xor_i64:
1994     case INDEX_op_shl_i64:
1995     case INDEX_op_shr_i64:
1996     case INDEX_op_sar_i64:
1997     case INDEX_op_ext_i32_i64:
1998     case INDEX_op_extu_i32_i64:
1999         return TCG_TARGET_REG_BITS == 64;
2000 
2001     case INDEX_op_negsetcond_i64:
2002         return TCG_TARGET_HAS_negsetcond_i64;
2003     case INDEX_op_movcond_i64:
2004         return TCG_TARGET_HAS_movcond_i64;
2005     case INDEX_op_div_i64:
2006     case INDEX_op_divu_i64:
2007         return TCG_TARGET_HAS_div_i64;
2008     case INDEX_op_rem_i64:
2009     case INDEX_op_remu_i64:
2010         return TCG_TARGET_HAS_rem_i64;
2011     case INDEX_op_div2_i64:
2012     case INDEX_op_divu2_i64:
2013         return TCG_TARGET_HAS_div2_i64;
2014     case INDEX_op_rotl_i64:
2015     case INDEX_op_rotr_i64:
2016         return TCG_TARGET_HAS_rot_i64;
2017     case INDEX_op_deposit_i64:
2018         return TCG_TARGET_HAS_deposit_i64;
2019     case INDEX_op_extract_i64:
2020         return TCG_TARGET_HAS_extract_i64;
2021     case INDEX_op_sextract_i64:
2022         return TCG_TARGET_HAS_sextract_i64;
2023     case INDEX_op_extract2_i64:
2024         return TCG_TARGET_HAS_extract2_i64;
2025     case INDEX_op_extrl_i64_i32:
2026     case INDEX_op_extrh_i64_i32:
2027         return TCG_TARGET_HAS_extr_i64_i32;
2028     case INDEX_op_ext8s_i64:
2029         return TCG_TARGET_HAS_ext8s_i64;
2030     case INDEX_op_ext16s_i64:
2031         return TCG_TARGET_HAS_ext16s_i64;
2032     case INDEX_op_ext32s_i64:
2033         return TCG_TARGET_HAS_ext32s_i64;
2034     case INDEX_op_ext8u_i64:
2035         return TCG_TARGET_HAS_ext8u_i64;
2036     case INDEX_op_ext16u_i64:
2037         return TCG_TARGET_HAS_ext16u_i64;
2038     case INDEX_op_ext32u_i64:
2039         return TCG_TARGET_HAS_ext32u_i64;
2040     case INDEX_op_bswap16_i64:
2041         return TCG_TARGET_HAS_bswap16_i64;
2042     case INDEX_op_bswap32_i64:
2043         return TCG_TARGET_HAS_bswap32_i64;
2044     case INDEX_op_bswap64_i64:
2045         return TCG_TARGET_HAS_bswap64_i64;
2046     case INDEX_op_not_i64:
2047         return TCG_TARGET_HAS_not_i64;
2048     case INDEX_op_neg_i64:
2049         return TCG_TARGET_HAS_neg_i64;
2050     case INDEX_op_andc_i64:
2051         return TCG_TARGET_HAS_andc_i64;
2052     case INDEX_op_orc_i64:
2053         return TCG_TARGET_HAS_orc_i64;
2054     case INDEX_op_eqv_i64:
2055         return TCG_TARGET_HAS_eqv_i64;
2056     case INDEX_op_nand_i64:
2057         return TCG_TARGET_HAS_nand_i64;
2058     case INDEX_op_nor_i64:
2059         return TCG_TARGET_HAS_nor_i64;
2060     case INDEX_op_clz_i64:
2061         return TCG_TARGET_HAS_clz_i64;
2062     case INDEX_op_ctz_i64:
2063         return TCG_TARGET_HAS_ctz_i64;
2064     case INDEX_op_ctpop_i64:
2065         return TCG_TARGET_HAS_ctpop_i64;
2066     case INDEX_op_add2_i64:
2067         return TCG_TARGET_HAS_add2_i64;
2068     case INDEX_op_sub2_i64:
2069         return TCG_TARGET_HAS_sub2_i64;
2070     case INDEX_op_mulu2_i64:
2071         return TCG_TARGET_HAS_mulu2_i64;
2072     case INDEX_op_muls2_i64:
2073         return TCG_TARGET_HAS_muls2_i64;
2074     case INDEX_op_muluh_i64:
2075         return TCG_TARGET_HAS_muluh_i64;
2076     case INDEX_op_mulsh_i64:
2077         return TCG_TARGET_HAS_mulsh_i64;
2078 
2079     case INDEX_op_mov_vec:
2080     case INDEX_op_dup_vec:
2081     case INDEX_op_dupm_vec:
2082     case INDEX_op_ld_vec:
2083     case INDEX_op_st_vec:
2084     case INDEX_op_add_vec:
2085     case INDEX_op_sub_vec:
2086     case INDEX_op_and_vec:
2087     case INDEX_op_or_vec:
2088     case INDEX_op_xor_vec:
2089     case INDEX_op_cmp_vec:
2090         return have_vec;
2091     case INDEX_op_dup2_vec:
2092         return have_vec && TCG_TARGET_REG_BITS == 32;
2093     case INDEX_op_not_vec:
2094         return have_vec && TCG_TARGET_HAS_not_vec;
2095     case INDEX_op_neg_vec:
2096         return have_vec && TCG_TARGET_HAS_neg_vec;
2097     case INDEX_op_abs_vec:
2098         return have_vec && TCG_TARGET_HAS_abs_vec;
2099     case INDEX_op_andc_vec:
2100         return have_vec && TCG_TARGET_HAS_andc_vec;
2101     case INDEX_op_orc_vec:
2102         return have_vec && TCG_TARGET_HAS_orc_vec;
2103     case INDEX_op_nand_vec:
2104         return have_vec && TCG_TARGET_HAS_nand_vec;
2105     case INDEX_op_nor_vec:
2106         return have_vec && TCG_TARGET_HAS_nor_vec;
2107     case INDEX_op_eqv_vec:
2108         return have_vec && TCG_TARGET_HAS_eqv_vec;
2109     case INDEX_op_mul_vec:
2110         return have_vec && TCG_TARGET_HAS_mul_vec;
2111     case INDEX_op_shli_vec:
2112     case INDEX_op_shri_vec:
2113     case INDEX_op_sari_vec:
2114         return have_vec && TCG_TARGET_HAS_shi_vec;
2115     case INDEX_op_shls_vec:
2116     case INDEX_op_shrs_vec:
2117     case INDEX_op_sars_vec:
2118         return have_vec && TCG_TARGET_HAS_shs_vec;
2119     case INDEX_op_shlv_vec:
2120     case INDEX_op_shrv_vec:
2121     case INDEX_op_sarv_vec:
2122         return have_vec && TCG_TARGET_HAS_shv_vec;
2123     case INDEX_op_rotli_vec:
2124         return have_vec && TCG_TARGET_HAS_roti_vec;
2125     case INDEX_op_rotls_vec:
2126         return have_vec && TCG_TARGET_HAS_rots_vec;
2127     case INDEX_op_rotlv_vec:
2128     case INDEX_op_rotrv_vec:
2129         return have_vec && TCG_TARGET_HAS_rotv_vec;
2130     case INDEX_op_ssadd_vec:
2131     case INDEX_op_usadd_vec:
2132     case INDEX_op_sssub_vec:
2133     case INDEX_op_ussub_vec:
2134         return have_vec && TCG_TARGET_HAS_sat_vec;
2135     case INDEX_op_smin_vec:
2136     case INDEX_op_umin_vec:
2137     case INDEX_op_smax_vec:
2138     case INDEX_op_umax_vec:
2139         return have_vec && TCG_TARGET_HAS_minmax_vec;
2140     case INDEX_op_bitsel_vec:
2141         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2142     case INDEX_op_cmpsel_vec:
2143         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2144 
2145     default:
2146         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2147         return true;
2148     }
2149 }
2150 
2151 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2152 
2153 static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2154 {
2155     TCGv_i64 extend_free[MAX_CALL_IARGS];
2156     int n_extend = 0;
2157     TCGOp *op;
2158     int i, n, pi = 0, total_args;
2159 
2160     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2161         init_call_layout(info);
2162         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2163     }
2164 
2165     total_args = info->nr_out + info->nr_in + 2;
2166     op = tcg_op_alloc(INDEX_op_call, total_args);
2167 
2168 #ifdef CONFIG_PLUGIN
2169     /* Flag helpers that may affect guest state */
2170     if (tcg_ctx->plugin_insn &&
2171         !(info->flags & TCG_CALL_PLUGIN) &&
2172         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2173         tcg_ctx->plugin_insn->calls_helpers = true;
2174     }
2175 #endif
2176 
2177     TCGOP_CALLO(op) = n = info->nr_out;
2178     switch (n) {
2179     case 0:
2180         tcg_debug_assert(ret == NULL);
2181         break;
2182     case 1:
2183         tcg_debug_assert(ret != NULL);
2184         op->args[pi++] = temp_arg(ret);
2185         break;
2186     case 2:
2187     case 4:
2188         tcg_debug_assert(ret != NULL);
2189         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2190         tcg_debug_assert(ret->temp_subindex == 0);
2191         for (i = 0; i < n; ++i) {
2192             op->args[pi++] = temp_arg(ret + i);
2193         }
2194         break;
2195     default:
2196         g_assert_not_reached();
2197     }
2198 
2199     TCGOP_CALLI(op) = n = info->nr_in;
2200     for (i = 0; i < n; i++) {
2201         const TCGCallArgumentLoc *loc = &info->in[i];
2202         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2203 
2204         switch (loc->kind) {
2205         case TCG_CALL_ARG_NORMAL:
2206         case TCG_CALL_ARG_BY_REF:
2207         case TCG_CALL_ARG_BY_REF_N:
2208             op->args[pi++] = temp_arg(ts);
2209             break;
2210 
2211         case TCG_CALL_ARG_EXTEND_U:
2212         case TCG_CALL_ARG_EXTEND_S:
2213             {
2214                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2215                 TCGv_i32 orig = temp_tcgv_i32(ts);
2216 
2217                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2218                     tcg_gen_ext_i32_i64(temp, orig);
2219                 } else {
2220                     tcg_gen_extu_i32_i64(temp, orig);
2221                 }
2222                 op->args[pi++] = tcgv_i64_arg(temp);
2223                 extend_free[n_extend++] = temp;
2224             }
2225             break;
2226 
2227         default:
2228             g_assert_not_reached();
2229         }
2230     }
2231     op->args[pi++] = (uintptr_t)info->func;
2232     op->args[pi++] = (uintptr_t)info;
2233     tcg_debug_assert(pi == total_args);
2234 
2235     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2236 
2237     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2238     for (i = 0; i < n_extend; ++i) {
2239         tcg_temp_free_i64(extend_free[i]);
2240     }
2241 }
2242 
2243 void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2244 {
2245     tcg_gen_callN(info, ret, NULL);
2246 }
2247 
2248 void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2249 {
2250     tcg_gen_callN(info, ret, &t1);
2251 }
2252 
2253 void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2254 {
2255     TCGTemp *args[2] = { t1, t2 };
2256     tcg_gen_callN(info, ret, args);
2257 }
2258 
2259 void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2260                    TCGTemp *t2, TCGTemp *t3)
2261 {
2262     TCGTemp *args[3] = { t1, t2, t3 };
2263     tcg_gen_callN(info, ret, args);
2264 }
2265 
2266 void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2267                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2268 {
2269     TCGTemp *args[4] = { t1, t2, t3, t4 };
2270     tcg_gen_callN(info, ret, args);
2271 }
2272 
2273 void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2274                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2275 {
2276     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2277     tcg_gen_callN(info, ret, args);
2278 }
2279 
2280 void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2281                    TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2282 {
2283     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2284     tcg_gen_callN(info, ret, args);
2285 }
2286 
2287 void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2288                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2289                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2290 {
2291     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2292     tcg_gen_callN(info, ret, args);
2293 }
2294 
2295 static void tcg_reg_alloc_start(TCGContext *s)
2296 {
2297     int i, n;
2298 
2299     for (i = 0, n = s->nb_temps; i < n; i++) {
2300         TCGTemp *ts = &s->temps[i];
2301         TCGTempVal val = TEMP_VAL_MEM;
2302 
2303         switch (ts->kind) {
2304         case TEMP_CONST:
2305             val = TEMP_VAL_CONST;
2306             break;
2307         case TEMP_FIXED:
2308             val = TEMP_VAL_REG;
2309             break;
2310         case TEMP_GLOBAL:
2311             break;
2312         case TEMP_EBB:
2313             val = TEMP_VAL_DEAD;
2314             /* fall through */
2315         case TEMP_TB:
2316             ts->mem_allocated = 0;
2317             break;
2318         default:
2319             g_assert_not_reached();
2320         }
2321         ts->val_type = val;
2322     }
2323 
2324     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2325 }
2326 
2327 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2328                                  TCGTemp *ts)
2329 {
2330     int idx = temp_idx(ts);
2331 
2332     switch (ts->kind) {
2333     case TEMP_FIXED:
2334     case TEMP_GLOBAL:
2335         pstrcpy(buf, buf_size, ts->name);
2336         break;
2337     case TEMP_TB:
2338         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2339         break;
2340     case TEMP_EBB:
2341         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2342         break;
2343     case TEMP_CONST:
2344         switch (ts->type) {
2345         case TCG_TYPE_I32:
2346             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2347             break;
2348 #if TCG_TARGET_REG_BITS > 32
2349         case TCG_TYPE_I64:
2350             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2351             break;
2352 #endif
2353         case TCG_TYPE_V64:
2354         case TCG_TYPE_V128:
2355         case TCG_TYPE_V256:
2356             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2357                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2358             break;
2359         default:
2360             g_assert_not_reached();
2361         }
2362         break;
2363     }
2364     return buf;
2365 }
2366 
2367 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2368                              int buf_size, TCGArg arg)
2369 {
2370     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2371 }
2372 
2373 static const char * const cond_name[] =
2374 {
2375     [TCG_COND_NEVER] = "never",
2376     [TCG_COND_ALWAYS] = "always",
2377     [TCG_COND_EQ] = "eq",
2378     [TCG_COND_NE] = "ne",
2379     [TCG_COND_LT] = "lt",
2380     [TCG_COND_GE] = "ge",
2381     [TCG_COND_LE] = "le",
2382     [TCG_COND_GT] = "gt",
2383     [TCG_COND_LTU] = "ltu",
2384     [TCG_COND_GEU] = "geu",
2385     [TCG_COND_LEU] = "leu",
2386     [TCG_COND_GTU] = "gtu"
2387 };
2388 
2389 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2390 {
2391     [MO_UB]   = "ub",
2392     [MO_SB]   = "sb",
2393     [MO_LEUW] = "leuw",
2394     [MO_LESW] = "lesw",
2395     [MO_LEUL] = "leul",
2396     [MO_LESL] = "lesl",
2397     [MO_LEUQ] = "leq",
2398     [MO_BEUW] = "beuw",
2399     [MO_BESW] = "besw",
2400     [MO_BEUL] = "beul",
2401     [MO_BESL] = "besl",
2402     [MO_BEUQ] = "beq",
2403     [MO_128 + MO_BE] = "beo",
2404     [MO_128 + MO_LE] = "leo",
2405 };
2406 
2407 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2408     [MO_UNALN >> MO_ASHIFT]    = "un+",
2409     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2410     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2411     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2412     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2413     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2414     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2415     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2416 };
2417 
2418 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2419     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2420     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2421     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2422     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2423     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2424     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2425 };
2426 
2427 static const char bswap_flag_name[][6] = {
2428     [TCG_BSWAP_IZ] = "iz",
2429     [TCG_BSWAP_OZ] = "oz",
2430     [TCG_BSWAP_OS] = "os",
2431     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2432     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2433 };
2434 
2435 static inline bool tcg_regset_single(TCGRegSet d)
2436 {
2437     return (d & (d - 1)) == 0;
2438 }
2439 
2440 static inline TCGReg tcg_regset_first(TCGRegSet d)
2441 {
2442     if (TCG_TARGET_NB_REGS <= 32) {
2443         return ctz32(d);
2444     } else {
2445         return ctz64(d);
2446     }
2447 }
2448 
2449 /* Return only the number of characters output -- no error return. */
2450 #define ne_fprintf(...) \
2451     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2452 
2453 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2454 {
2455     char buf[128];
2456     TCGOp *op;
2457 
2458     QTAILQ_FOREACH(op, &s->ops, link) {
2459         int i, k, nb_oargs, nb_iargs, nb_cargs;
2460         const TCGOpDef *def;
2461         TCGOpcode c;
2462         int col = 0;
2463 
2464         c = op->opc;
2465         def = &tcg_op_defs[c];
2466 
2467         if (c == INDEX_op_insn_start) {
2468             nb_oargs = 0;
2469             col += ne_fprintf(f, "\n ----");
2470 
2471             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2472                 col += ne_fprintf(f, " %016" PRIx64,
2473                                   tcg_get_insn_start_param(op, i));
2474             }
2475         } else if (c == INDEX_op_call) {
2476             const TCGHelperInfo *info = tcg_call_info(op);
2477             void *func = tcg_call_func(op);
2478 
2479             /* variable number of arguments */
2480             nb_oargs = TCGOP_CALLO(op);
2481             nb_iargs = TCGOP_CALLI(op);
2482             nb_cargs = def->nb_cargs;
2483 
2484             col += ne_fprintf(f, " %s ", def->name);
2485 
2486             /*
2487              * Print the function name from TCGHelperInfo, if available.
2488              * Note that plugins have a template function for the info,
2489              * but the actual function pointer comes from the plugin.
2490              */
2491             if (func == info->func) {
2492                 col += ne_fprintf(f, "%s", info->name);
2493             } else {
2494                 col += ne_fprintf(f, "plugin(%p)", func);
2495             }
2496 
2497             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2498             for (i = 0; i < nb_oargs; i++) {
2499                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2500                                                             op->args[i]));
2501             }
2502             for (i = 0; i < nb_iargs; i++) {
2503                 TCGArg arg = op->args[nb_oargs + i];
2504                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2505                 col += ne_fprintf(f, ",%s", t);
2506             }
2507         } else {
2508             col += ne_fprintf(f, " %s ", def->name);
2509 
2510             nb_oargs = def->nb_oargs;
2511             nb_iargs = def->nb_iargs;
2512             nb_cargs = def->nb_cargs;
2513 
2514             if (def->flags & TCG_OPF_VECTOR) {
2515                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2516                                   8 << TCGOP_VECE(op));
2517             }
2518 
2519             k = 0;
2520             for (i = 0; i < nb_oargs; i++) {
2521                 const char *sep =  k ? "," : "";
2522                 col += ne_fprintf(f, "%s%s", sep,
2523                                   tcg_get_arg_str(s, buf, sizeof(buf),
2524                                                   op->args[k++]));
2525             }
2526             for (i = 0; i < nb_iargs; i++) {
2527                 const char *sep =  k ? "," : "";
2528                 col += ne_fprintf(f, "%s%s", sep,
2529                                   tcg_get_arg_str(s, buf, sizeof(buf),
2530                                                   op->args[k++]));
2531             }
2532             switch (c) {
2533             case INDEX_op_brcond_i32:
2534             case INDEX_op_setcond_i32:
2535             case INDEX_op_negsetcond_i32:
2536             case INDEX_op_movcond_i32:
2537             case INDEX_op_brcond2_i32:
2538             case INDEX_op_setcond2_i32:
2539             case INDEX_op_brcond_i64:
2540             case INDEX_op_setcond_i64:
2541             case INDEX_op_negsetcond_i64:
2542             case INDEX_op_movcond_i64:
2543             case INDEX_op_cmp_vec:
2544             case INDEX_op_cmpsel_vec:
2545                 if (op->args[k] < ARRAY_SIZE(cond_name)
2546                     && cond_name[op->args[k]]) {
2547                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2548                 } else {
2549                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2550                 }
2551                 i = 1;
2552                 break;
2553             case INDEX_op_qemu_ld_a32_i32:
2554             case INDEX_op_qemu_ld_a64_i32:
2555             case INDEX_op_qemu_st_a32_i32:
2556             case INDEX_op_qemu_st_a64_i32:
2557             case INDEX_op_qemu_st8_a32_i32:
2558             case INDEX_op_qemu_st8_a64_i32:
2559             case INDEX_op_qemu_ld_a32_i64:
2560             case INDEX_op_qemu_ld_a64_i64:
2561             case INDEX_op_qemu_st_a32_i64:
2562             case INDEX_op_qemu_st_a64_i64:
2563             case INDEX_op_qemu_ld_a32_i128:
2564             case INDEX_op_qemu_ld_a64_i128:
2565             case INDEX_op_qemu_st_a32_i128:
2566             case INDEX_op_qemu_st_a64_i128:
2567                 {
2568                     const char *s_al, *s_op, *s_at;
2569                     MemOpIdx oi = op->args[k++];
2570                     MemOp mop = get_memop(oi);
2571                     unsigned ix = get_mmuidx(oi);
2572 
2573                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2574                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2575                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2576                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2577 
2578                     /* If all fields are accounted for, print symbolically. */
2579                     if (!mop && s_al && s_op && s_at) {
2580                         col += ne_fprintf(f, ",%s%s%s,%u",
2581                                           s_at, s_al, s_op, ix);
2582                     } else {
2583                         mop = get_memop(oi);
2584                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2585                     }
2586                     i = 1;
2587                 }
2588                 break;
2589             case INDEX_op_bswap16_i32:
2590             case INDEX_op_bswap16_i64:
2591             case INDEX_op_bswap32_i32:
2592             case INDEX_op_bswap32_i64:
2593             case INDEX_op_bswap64_i64:
2594                 {
2595                     TCGArg flags = op->args[k];
2596                     const char *name = NULL;
2597 
2598                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2599                         name = bswap_flag_name[flags];
2600                     }
2601                     if (name) {
2602                         col += ne_fprintf(f, ",%s", name);
2603                     } else {
2604                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2605                     }
2606                     i = k = 1;
2607                 }
2608                 break;
2609             default:
2610                 i = 0;
2611                 break;
2612             }
2613             switch (c) {
2614             case INDEX_op_set_label:
2615             case INDEX_op_br:
2616             case INDEX_op_brcond_i32:
2617             case INDEX_op_brcond_i64:
2618             case INDEX_op_brcond2_i32:
2619                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2620                                   arg_label(op->args[k])->id);
2621                 i++, k++;
2622                 break;
2623             case INDEX_op_mb:
2624                 {
2625                     TCGBar membar = op->args[k];
2626                     const char *b_op, *m_op;
2627 
2628                     switch (membar & TCG_BAR_SC) {
2629                     case 0:
2630                         b_op = "none";
2631                         break;
2632                     case TCG_BAR_LDAQ:
2633                         b_op = "acq";
2634                         break;
2635                     case TCG_BAR_STRL:
2636                         b_op = "rel";
2637                         break;
2638                     case TCG_BAR_SC:
2639                         b_op = "seq";
2640                         break;
2641                     default:
2642                         g_assert_not_reached();
2643                     }
2644 
2645                     switch (membar & TCG_MO_ALL) {
2646                     case 0:
2647                         m_op = "none";
2648                         break;
2649                     case TCG_MO_LD_LD:
2650                         m_op = "rr";
2651                         break;
2652                     case TCG_MO_LD_ST:
2653                         m_op = "rw";
2654                         break;
2655                     case TCG_MO_ST_LD:
2656                         m_op = "wr";
2657                         break;
2658                     case TCG_MO_ST_ST:
2659                         m_op = "ww";
2660                         break;
2661                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2662                         m_op = "rr+rw";
2663                         break;
2664                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2665                         m_op = "rr+wr";
2666                         break;
2667                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2668                         m_op = "rr+ww";
2669                         break;
2670                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2671                         m_op = "rw+wr";
2672                         break;
2673                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2674                         m_op = "rw+ww";
2675                         break;
2676                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2677                         m_op = "wr+ww";
2678                         break;
2679                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2680                         m_op = "rr+rw+wr";
2681                         break;
2682                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2683                         m_op = "rr+rw+ww";
2684                         break;
2685                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2686                         m_op = "rr+wr+ww";
2687                         break;
2688                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2689                         m_op = "rw+wr+ww";
2690                         break;
2691                     case TCG_MO_ALL:
2692                         m_op = "all";
2693                         break;
2694                     default:
2695                         g_assert_not_reached();
2696                     }
2697 
2698                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2699                     i++, k++;
2700                 }
2701                 break;
2702             default:
2703                 break;
2704             }
2705             for (; i < nb_cargs; i++, k++) {
2706                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2707                                   op->args[k]);
2708             }
2709         }
2710 
2711         if (have_prefs || op->life) {
2712             for (; col < 40; ++col) {
2713                 putc(' ', f);
2714             }
2715         }
2716 
2717         if (op->life) {
2718             unsigned life = op->life;
2719 
2720             if (life & (SYNC_ARG * 3)) {
2721                 ne_fprintf(f, "  sync:");
2722                 for (i = 0; i < 2; ++i) {
2723                     if (life & (SYNC_ARG << i)) {
2724                         ne_fprintf(f, " %d", i);
2725                     }
2726                 }
2727             }
2728             life /= DEAD_ARG;
2729             if (life) {
2730                 ne_fprintf(f, "  dead:");
2731                 for (i = 0; life; ++i, life >>= 1) {
2732                     if (life & 1) {
2733                         ne_fprintf(f, " %d", i);
2734                     }
2735                 }
2736             }
2737         }
2738 
2739         if (have_prefs) {
2740             for (i = 0; i < nb_oargs; ++i) {
2741                 TCGRegSet set = output_pref(op, i);
2742 
2743                 if (i == 0) {
2744                     ne_fprintf(f, "  pref=");
2745                 } else {
2746                     ne_fprintf(f, ",");
2747                 }
2748                 if (set == 0) {
2749                     ne_fprintf(f, "none");
2750                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2751                     ne_fprintf(f, "all");
2752 #ifdef CONFIG_DEBUG_TCG
2753                 } else if (tcg_regset_single(set)) {
2754                     TCGReg reg = tcg_regset_first(set);
2755                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2756 #endif
2757                 } else if (TCG_TARGET_NB_REGS <= 32) {
2758                     ne_fprintf(f, "0x%x", (uint32_t)set);
2759                 } else {
2760                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2761                 }
2762             }
2763         }
2764 
2765         putc('\n', f);
2766     }
2767 }
2768 
2769 /* we give more priority to constraints with less registers */
2770 static int get_constraint_priority(const TCGOpDef *def, int k)
2771 {
2772     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2773     int n = ctpop64(arg_ct->regs);
2774 
2775     /*
2776      * Sort constraints of a single register first, which includes output
2777      * aliases (which must exactly match the input already allocated).
2778      */
2779     if (n == 1 || arg_ct->oalias) {
2780         return INT_MAX;
2781     }
2782 
2783     /*
2784      * Sort register pairs next, first then second immediately after.
2785      * Arbitrarily sort multiple pairs by the index of the first reg;
2786      * there shouldn't be many pairs.
2787      */
2788     switch (arg_ct->pair) {
2789     case 1:
2790     case 3:
2791         return (k + 1) * 2;
2792     case 2:
2793         return (arg_ct->pair_index + 1) * 2 - 1;
2794     }
2795 
2796     /* Finally, sort by decreasing register count. */
2797     assert(n > 1);
2798     return -n;
2799 }
2800 
2801 /* sort from highest priority to lowest */
2802 static void sort_constraints(TCGOpDef *def, int start, int n)
2803 {
2804     int i, j;
2805     TCGArgConstraint *a = def->args_ct;
2806 
2807     for (i = 0; i < n; i++) {
2808         a[start + i].sort_index = start + i;
2809     }
2810     if (n <= 1) {
2811         return;
2812     }
2813     for (i = 0; i < n - 1; i++) {
2814         for (j = i + 1; j < n; j++) {
2815             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2816             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2817             if (p1 < p2) {
2818                 int tmp = a[start + i].sort_index;
2819                 a[start + i].sort_index = a[start + j].sort_index;
2820                 a[start + j].sort_index = tmp;
2821             }
2822         }
2823     }
2824 }
2825 
2826 static void process_op_defs(TCGContext *s)
2827 {
2828     TCGOpcode op;
2829 
2830     for (op = 0; op < NB_OPS; op++) {
2831         TCGOpDef *def = &tcg_op_defs[op];
2832         const TCGTargetOpDef *tdefs;
2833         bool saw_alias_pair = false;
2834         int i, o, i2, o2, nb_args;
2835 
2836         if (def->flags & TCG_OPF_NOT_PRESENT) {
2837             continue;
2838         }
2839 
2840         nb_args = def->nb_iargs + def->nb_oargs;
2841         if (nb_args == 0) {
2842             continue;
2843         }
2844 
2845         /*
2846          * Macro magic should make it impossible, but double-check that
2847          * the array index is in range.  Since the signness of an enum
2848          * is implementation defined, force the result to unsigned.
2849          */
2850         unsigned con_set = tcg_target_op_def(op);
2851         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2852         tdefs = &constraint_sets[con_set];
2853 
2854         for (i = 0; i < nb_args; i++) {
2855             const char *ct_str = tdefs->args_ct_str[i];
2856             bool input_p = i >= def->nb_oargs;
2857 
2858             /* Incomplete TCGTargetOpDef entry. */
2859             tcg_debug_assert(ct_str != NULL);
2860 
2861             switch (*ct_str) {
2862             case '0' ... '9':
2863                 o = *ct_str - '0';
2864                 tcg_debug_assert(input_p);
2865                 tcg_debug_assert(o < def->nb_oargs);
2866                 tcg_debug_assert(def->args_ct[o].regs != 0);
2867                 tcg_debug_assert(!def->args_ct[o].oalias);
2868                 def->args_ct[i] = def->args_ct[o];
2869                 /* The output sets oalias.  */
2870                 def->args_ct[o].oalias = 1;
2871                 def->args_ct[o].alias_index = i;
2872                 /* The input sets ialias. */
2873                 def->args_ct[i].ialias = 1;
2874                 def->args_ct[i].alias_index = o;
2875                 if (def->args_ct[i].pair) {
2876                     saw_alias_pair = true;
2877                 }
2878                 tcg_debug_assert(ct_str[1] == '\0');
2879                 continue;
2880 
2881             case '&':
2882                 tcg_debug_assert(!input_p);
2883                 def->args_ct[i].newreg = true;
2884                 ct_str++;
2885                 break;
2886 
2887             case 'p': /* plus */
2888                 /* Allocate to the register after the previous. */
2889                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2890                 o = i - 1;
2891                 tcg_debug_assert(!def->args_ct[o].pair);
2892                 tcg_debug_assert(!def->args_ct[o].ct);
2893                 def->args_ct[i] = (TCGArgConstraint){
2894                     .pair = 2,
2895                     .pair_index = o,
2896                     .regs = def->args_ct[o].regs << 1,
2897                 };
2898                 def->args_ct[o].pair = 1;
2899                 def->args_ct[o].pair_index = i;
2900                 tcg_debug_assert(ct_str[1] == '\0');
2901                 continue;
2902 
2903             case 'm': /* minus */
2904                 /* Allocate to the register before the previous. */
2905                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2906                 o = i - 1;
2907                 tcg_debug_assert(!def->args_ct[o].pair);
2908                 tcg_debug_assert(!def->args_ct[o].ct);
2909                 def->args_ct[i] = (TCGArgConstraint){
2910                     .pair = 1,
2911                     .pair_index = o,
2912                     .regs = def->args_ct[o].regs >> 1,
2913                 };
2914                 def->args_ct[o].pair = 2;
2915                 def->args_ct[o].pair_index = i;
2916                 tcg_debug_assert(ct_str[1] == '\0');
2917                 continue;
2918             }
2919 
2920             do {
2921                 switch (*ct_str) {
2922                 case 'i':
2923                     def->args_ct[i].ct |= TCG_CT_CONST;
2924                     break;
2925 
2926                 /* Include all of the target-specific constraints. */
2927 
2928 #undef CONST
2929 #define CONST(CASE, MASK) \
2930     case CASE: def->args_ct[i].ct |= MASK; break;
2931 #define REGS(CASE, MASK) \
2932     case CASE: def->args_ct[i].regs |= MASK; break;
2933 
2934 #include "tcg-target-con-str.h"
2935 
2936 #undef REGS
2937 #undef CONST
2938                 default:
2939                 case '0' ... '9':
2940                 case '&':
2941                 case 'p':
2942                 case 'm':
2943                     /* Typo in TCGTargetOpDef constraint. */
2944                     g_assert_not_reached();
2945                 }
2946             } while (*++ct_str != '\0');
2947         }
2948 
2949         /* TCGTargetOpDef entry with too much information? */
2950         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2951 
2952         /*
2953          * Fix up output pairs that are aliased with inputs.
2954          * When we created the alias, we copied pair from the output.
2955          * There are three cases:
2956          *    (1a) Pairs of inputs alias pairs of outputs.
2957          *    (1b) One input aliases the first of a pair of outputs.
2958          *    (2)  One input aliases the second of a pair of outputs.
2959          *
2960          * Case 1a is handled by making sure that the pair_index'es are
2961          * properly updated so that they appear the same as a pair of inputs.
2962          *
2963          * Case 1b is handled by setting the pair_index of the input to
2964          * itself, simply so it doesn't point to an unrelated argument.
2965          * Since we don't encounter the "second" during the input allocation
2966          * phase, nothing happens with the second half of the input pair.
2967          *
2968          * Case 2 is handled by setting the second input to pair=3, the
2969          * first output to pair=3, and the pair_index'es to match.
2970          */
2971         if (saw_alias_pair) {
2972             for (i = def->nb_oargs; i < nb_args; i++) {
2973                 /*
2974                  * Since [0-9pm] must be alone in the constraint string,
2975                  * the only way they can both be set is if the pair comes
2976                  * from the output alias.
2977                  */
2978                 if (!def->args_ct[i].ialias) {
2979                     continue;
2980                 }
2981                 switch (def->args_ct[i].pair) {
2982                 case 0:
2983                     break;
2984                 case 1:
2985                     o = def->args_ct[i].alias_index;
2986                     o2 = def->args_ct[o].pair_index;
2987                     tcg_debug_assert(def->args_ct[o].pair == 1);
2988                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2989                     if (def->args_ct[o2].oalias) {
2990                         /* Case 1a */
2991                         i2 = def->args_ct[o2].alias_index;
2992                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2993                         def->args_ct[i2].pair_index = i;
2994                         def->args_ct[i].pair_index = i2;
2995                     } else {
2996                         /* Case 1b */
2997                         def->args_ct[i].pair_index = i;
2998                     }
2999                     break;
3000                 case 2:
3001                     o = def->args_ct[i].alias_index;
3002                     o2 = def->args_ct[o].pair_index;
3003                     tcg_debug_assert(def->args_ct[o].pair == 2);
3004                     tcg_debug_assert(def->args_ct[o2].pair == 1);
3005                     if (def->args_ct[o2].oalias) {
3006                         /* Case 1a */
3007                         i2 = def->args_ct[o2].alias_index;
3008                         tcg_debug_assert(def->args_ct[i2].pair == 1);
3009                         def->args_ct[i2].pair_index = i;
3010                         def->args_ct[i].pair_index = i2;
3011                     } else {
3012                         /* Case 2 */
3013                         def->args_ct[i].pair = 3;
3014                         def->args_ct[o2].pair = 3;
3015                         def->args_ct[i].pair_index = o2;
3016                         def->args_ct[o2].pair_index = i;
3017                     }
3018                     break;
3019                 default:
3020                     g_assert_not_reached();
3021                 }
3022             }
3023         }
3024 
3025         /* sort the constraints (XXX: this is just an heuristic) */
3026         sort_constraints(def, 0, def->nb_oargs);
3027         sort_constraints(def, def->nb_oargs, def->nb_iargs);
3028     }
3029 }
3030 
3031 static void remove_label_use(TCGOp *op, int idx)
3032 {
3033     TCGLabel *label = arg_label(op->args[idx]);
3034     TCGLabelUse *use;
3035 
3036     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3037         if (use->op == op) {
3038             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3039             return;
3040         }
3041     }
3042     g_assert_not_reached();
3043 }
3044 
3045 void tcg_op_remove(TCGContext *s, TCGOp *op)
3046 {
3047     switch (op->opc) {
3048     case INDEX_op_br:
3049         remove_label_use(op, 0);
3050         break;
3051     case INDEX_op_brcond_i32:
3052     case INDEX_op_brcond_i64:
3053         remove_label_use(op, 3);
3054         break;
3055     case INDEX_op_brcond2_i32:
3056         remove_label_use(op, 5);
3057         break;
3058     default:
3059         break;
3060     }
3061 
3062     QTAILQ_REMOVE(&s->ops, op, link);
3063     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3064     s->nb_ops--;
3065 }
3066 
3067 void tcg_remove_ops_after(TCGOp *op)
3068 {
3069     TCGContext *s = tcg_ctx;
3070 
3071     while (true) {
3072         TCGOp *last = tcg_last_op();
3073         if (last == op) {
3074             return;
3075         }
3076         tcg_op_remove(s, last);
3077     }
3078 }
3079 
3080 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3081 {
3082     TCGContext *s = tcg_ctx;
3083     TCGOp *op = NULL;
3084 
3085     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3086         QTAILQ_FOREACH(op, &s->free_ops, link) {
3087             if (nargs <= op->nargs) {
3088                 QTAILQ_REMOVE(&s->free_ops, op, link);
3089                 nargs = op->nargs;
3090                 goto found;
3091             }
3092         }
3093     }
3094 
3095     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3096     nargs = MAX(4, nargs);
3097     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3098 
3099  found:
3100     memset(op, 0, offsetof(TCGOp, link));
3101     op->opc = opc;
3102     op->nargs = nargs;
3103 
3104     /* Check for bitfield overflow. */
3105     tcg_debug_assert(op->nargs == nargs);
3106 
3107     s->nb_ops++;
3108     return op;
3109 }
3110 
3111 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3112 {
3113     TCGOp *op = tcg_op_alloc(opc, nargs);
3114     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3115     return op;
3116 }
3117 
3118 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3119                             TCGOpcode opc, unsigned nargs)
3120 {
3121     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3122     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3123     return new_op;
3124 }
3125 
3126 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3127                            TCGOpcode opc, unsigned nargs)
3128 {
3129     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3130     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3131     return new_op;
3132 }
3133 
3134 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3135 {
3136     TCGLabelUse *u;
3137 
3138     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3139         TCGOp *op = u->op;
3140         switch (op->opc) {
3141         case INDEX_op_br:
3142             op->args[0] = label_arg(to);
3143             break;
3144         case INDEX_op_brcond_i32:
3145         case INDEX_op_brcond_i64:
3146             op->args[3] = label_arg(to);
3147             break;
3148         case INDEX_op_brcond2_i32:
3149             op->args[5] = label_arg(to);
3150             break;
3151         default:
3152             g_assert_not_reached();
3153         }
3154     }
3155 
3156     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3157 }
3158 
3159 /* Reachable analysis : remove unreachable code.  */
3160 static void __attribute__((noinline))
3161 reachable_code_pass(TCGContext *s)
3162 {
3163     TCGOp *op, *op_next, *op_prev;
3164     bool dead = false;
3165 
3166     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3167         bool remove = dead;
3168         TCGLabel *label;
3169 
3170         switch (op->opc) {
3171         case INDEX_op_set_label:
3172             label = arg_label(op->args[0]);
3173 
3174             /*
3175              * Note that the first op in the TB is always a load,
3176              * so there is always something before a label.
3177              */
3178             op_prev = QTAILQ_PREV(op, link);
3179 
3180             /*
3181              * If we find two sequential labels, move all branches to
3182              * reference the second label and remove the first label.
3183              * Do this before branch to next optimization, so that the
3184              * middle label is out of the way.
3185              */
3186             if (op_prev->opc == INDEX_op_set_label) {
3187                 move_label_uses(label, arg_label(op_prev->args[0]));
3188                 tcg_op_remove(s, op_prev);
3189                 op_prev = QTAILQ_PREV(op, link);
3190             }
3191 
3192             /*
3193              * Optimization can fold conditional branches to unconditional.
3194              * If we find a label which is preceded by an unconditional
3195              * branch to next, remove the branch.  We couldn't do this when
3196              * processing the branch because any dead code between the branch
3197              * and label had not yet been removed.
3198              */
3199             if (op_prev->opc == INDEX_op_br &&
3200                 label == arg_label(op_prev->args[0])) {
3201                 tcg_op_remove(s, op_prev);
3202                 /* Fall through means insns become live again.  */
3203                 dead = false;
3204             }
3205 
3206             if (QSIMPLEQ_EMPTY(&label->branches)) {
3207                 /*
3208                  * While there is an occasional backward branch, virtually
3209                  * all branches generated by the translators are forward.
3210                  * Which means that generally we will have already removed
3211                  * all references to the label that will be, and there is
3212                  * little to be gained by iterating.
3213                  */
3214                 remove = true;
3215             } else {
3216                 /* Once we see a label, insns become live again.  */
3217                 dead = false;
3218                 remove = false;
3219             }
3220             break;
3221 
3222         case INDEX_op_br:
3223         case INDEX_op_exit_tb:
3224         case INDEX_op_goto_ptr:
3225             /* Unconditional branches; everything following is dead.  */
3226             dead = true;
3227             break;
3228 
3229         case INDEX_op_call:
3230             /* Notice noreturn helper calls, raising exceptions.  */
3231             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3232                 dead = true;
3233             }
3234             break;
3235 
3236         case INDEX_op_insn_start:
3237             /* Never remove -- we need to keep these for unwind.  */
3238             remove = false;
3239             break;
3240 
3241         default:
3242             break;
3243         }
3244 
3245         if (remove) {
3246             tcg_op_remove(s, op);
3247         }
3248     }
3249 }
3250 
3251 #define TS_DEAD  1
3252 #define TS_MEM   2
3253 
3254 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3255 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3256 
3257 /* For liveness_pass_1, the register preferences for a given temp.  */
3258 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3259 {
3260     return ts->state_ptr;
3261 }
3262 
3263 /* For liveness_pass_1, reset the preferences for a given temp to the
3264  * maximal regset for its type.
3265  */
3266 static inline void la_reset_pref(TCGTemp *ts)
3267 {
3268     *la_temp_pref(ts)
3269         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3270 }
3271 
3272 /* liveness analysis: end of function: all temps are dead, and globals
3273    should be in memory. */
3274 static void la_func_end(TCGContext *s, int ng, int nt)
3275 {
3276     int i;
3277 
3278     for (i = 0; i < ng; ++i) {
3279         s->temps[i].state = TS_DEAD | TS_MEM;
3280         la_reset_pref(&s->temps[i]);
3281     }
3282     for (i = ng; i < nt; ++i) {
3283         s->temps[i].state = TS_DEAD;
3284         la_reset_pref(&s->temps[i]);
3285     }
3286 }
3287 
3288 /* liveness analysis: end of basic block: all temps are dead, globals
3289    and local temps should be in memory. */
3290 static void la_bb_end(TCGContext *s, int ng, int nt)
3291 {
3292     int i;
3293 
3294     for (i = 0; i < nt; ++i) {
3295         TCGTemp *ts = &s->temps[i];
3296         int state;
3297 
3298         switch (ts->kind) {
3299         case TEMP_FIXED:
3300         case TEMP_GLOBAL:
3301         case TEMP_TB:
3302             state = TS_DEAD | TS_MEM;
3303             break;
3304         case TEMP_EBB:
3305         case TEMP_CONST:
3306             state = TS_DEAD;
3307             break;
3308         default:
3309             g_assert_not_reached();
3310         }
3311         ts->state = state;
3312         la_reset_pref(ts);
3313     }
3314 }
3315 
3316 /* liveness analysis: sync globals back to memory.  */
3317 static void la_global_sync(TCGContext *s, int ng)
3318 {
3319     int i;
3320 
3321     for (i = 0; i < ng; ++i) {
3322         int state = s->temps[i].state;
3323         s->temps[i].state = state | TS_MEM;
3324         if (state == TS_DEAD) {
3325             /* If the global was previously dead, reset prefs.  */
3326             la_reset_pref(&s->temps[i]);
3327         }
3328     }
3329 }
3330 
3331 /*
3332  * liveness analysis: conditional branch: all temps are dead unless
3333  * explicitly live-across-conditional-branch, globals and local temps
3334  * should be synced.
3335  */
3336 static void la_bb_sync(TCGContext *s, int ng, int nt)
3337 {
3338     la_global_sync(s, ng);
3339 
3340     for (int i = ng; i < nt; ++i) {
3341         TCGTemp *ts = &s->temps[i];
3342         int state;
3343 
3344         switch (ts->kind) {
3345         case TEMP_TB:
3346             state = ts->state;
3347             ts->state = state | TS_MEM;
3348             if (state != TS_DEAD) {
3349                 continue;
3350             }
3351             break;
3352         case TEMP_EBB:
3353         case TEMP_CONST:
3354             continue;
3355         default:
3356             g_assert_not_reached();
3357         }
3358         la_reset_pref(&s->temps[i]);
3359     }
3360 }
3361 
3362 /* liveness analysis: sync globals back to memory and kill.  */
3363 static void la_global_kill(TCGContext *s, int ng)
3364 {
3365     int i;
3366 
3367     for (i = 0; i < ng; i++) {
3368         s->temps[i].state = TS_DEAD | TS_MEM;
3369         la_reset_pref(&s->temps[i]);
3370     }
3371 }
3372 
3373 /* liveness analysis: note live globals crossing calls.  */
3374 static void la_cross_call(TCGContext *s, int nt)
3375 {
3376     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3377     int i;
3378 
3379     for (i = 0; i < nt; i++) {
3380         TCGTemp *ts = &s->temps[i];
3381         if (!(ts->state & TS_DEAD)) {
3382             TCGRegSet *pset = la_temp_pref(ts);
3383             TCGRegSet set = *pset;
3384 
3385             set &= mask;
3386             /* If the combination is not possible, restart.  */
3387             if (set == 0) {
3388                 set = tcg_target_available_regs[ts->type] & mask;
3389             }
3390             *pset = set;
3391         }
3392     }
3393 }
3394 
3395 /*
3396  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3397  * to TEMP_EBB, if possible.
3398  */
3399 static void __attribute__((noinline))
3400 liveness_pass_0(TCGContext *s)
3401 {
3402     void * const multiple_ebb = (void *)(uintptr_t)-1;
3403     int nb_temps = s->nb_temps;
3404     TCGOp *op, *ebb;
3405 
3406     for (int i = s->nb_globals; i < nb_temps; ++i) {
3407         s->temps[i].state_ptr = NULL;
3408     }
3409 
3410     /*
3411      * Represent each EBB by the op at which it begins.  In the case of
3412      * the first EBB, this is the first op, otherwise it is a label.
3413      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3414      * within a single EBB, else MULTIPLE_EBB.
3415      */
3416     ebb = QTAILQ_FIRST(&s->ops);
3417     QTAILQ_FOREACH(op, &s->ops, link) {
3418         const TCGOpDef *def;
3419         int nb_oargs, nb_iargs;
3420 
3421         switch (op->opc) {
3422         case INDEX_op_set_label:
3423             ebb = op;
3424             continue;
3425         case INDEX_op_discard:
3426             continue;
3427         case INDEX_op_call:
3428             nb_oargs = TCGOP_CALLO(op);
3429             nb_iargs = TCGOP_CALLI(op);
3430             break;
3431         default:
3432             def = &tcg_op_defs[op->opc];
3433             nb_oargs = def->nb_oargs;
3434             nb_iargs = def->nb_iargs;
3435             break;
3436         }
3437 
3438         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3439             TCGTemp *ts = arg_temp(op->args[i]);
3440 
3441             if (ts->kind != TEMP_TB) {
3442                 continue;
3443             }
3444             if (ts->state_ptr == NULL) {
3445                 ts->state_ptr = ebb;
3446             } else if (ts->state_ptr != ebb) {
3447                 ts->state_ptr = multiple_ebb;
3448             }
3449         }
3450     }
3451 
3452     /*
3453      * For TEMP_TB that turned out not to be used beyond one EBB,
3454      * reduce the liveness to TEMP_EBB.
3455      */
3456     for (int i = s->nb_globals; i < nb_temps; ++i) {
3457         TCGTemp *ts = &s->temps[i];
3458         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3459             ts->kind = TEMP_EBB;
3460         }
3461     }
3462 }
3463 
3464 /* Liveness analysis : update the opc_arg_life array to tell if a
3465    given input arguments is dead. Instructions updating dead
3466    temporaries are removed. */
3467 static void __attribute__((noinline))
3468 liveness_pass_1(TCGContext *s)
3469 {
3470     int nb_globals = s->nb_globals;
3471     int nb_temps = s->nb_temps;
3472     TCGOp *op, *op_prev;
3473     TCGRegSet *prefs;
3474     int i;
3475 
3476     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3477     for (i = 0; i < nb_temps; ++i) {
3478         s->temps[i].state_ptr = prefs + i;
3479     }
3480 
3481     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3482     la_func_end(s, nb_globals, nb_temps);
3483 
3484     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3485         int nb_iargs, nb_oargs;
3486         TCGOpcode opc_new, opc_new2;
3487         bool have_opc_new2;
3488         TCGLifeData arg_life = 0;
3489         TCGTemp *ts;
3490         TCGOpcode opc = op->opc;
3491         const TCGOpDef *def = &tcg_op_defs[opc];
3492 
3493         switch (opc) {
3494         case INDEX_op_call:
3495             {
3496                 const TCGHelperInfo *info = tcg_call_info(op);
3497                 int call_flags = tcg_call_flags(op);
3498 
3499                 nb_oargs = TCGOP_CALLO(op);
3500                 nb_iargs = TCGOP_CALLI(op);
3501 
3502                 /* pure functions can be removed if their result is unused */
3503                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3504                     for (i = 0; i < nb_oargs; i++) {
3505                         ts = arg_temp(op->args[i]);
3506                         if (ts->state != TS_DEAD) {
3507                             goto do_not_remove_call;
3508                         }
3509                     }
3510                     goto do_remove;
3511                 }
3512             do_not_remove_call:
3513 
3514                 /* Output args are dead.  */
3515                 for (i = 0; i < nb_oargs; i++) {
3516                     ts = arg_temp(op->args[i]);
3517                     if (ts->state & TS_DEAD) {
3518                         arg_life |= DEAD_ARG << i;
3519                     }
3520                     if (ts->state & TS_MEM) {
3521                         arg_life |= SYNC_ARG << i;
3522                     }
3523                     ts->state = TS_DEAD;
3524                     la_reset_pref(ts);
3525                 }
3526 
3527                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3528                 memset(op->output_pref, 0, sizeof(op->output_pref));
3529 
3530                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3531                                     TCG_CALL_NO_READ_GLOBALS))) {
3532                     la_global_kill(s, nb_globals);
3533                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3534                     la_global_sync(s, nb_globals);
3535                 }
3536 
3537                 /* Record arguments that die in this helper.  */
3538                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3539                     ts = arg_temp(op->args[i]);
3540                     if (ts->state & TS_DEAD) {
3541                         arg_life |= DEAD_ARG << i;
3542                     }
3543                 }
3544 
3545                 /* For all live registers, remove call-clobbered prefs.  */
3546                 la_cross_call(s, nb_temps);
3547 
3548                 /*
3549                  * Input arguments are live for preceding opcodes.
3550                  *
3551                  * For those arguments that die, and will be allocated in
3552                  * registers, clear the register set for that arg, to be
3553                  * filled in below.  For args that will be on the stack,
3554                  * reset to any available reg.  Process arguments in reverse
3555                  * order so that if a temp is used more than once, the stack
3556                  * reset to max happens before the register reset to 0.
3557                  */
3558                 for (i = nb_iargs - 1; i >= 0; i--) {
3559                     const TCGCallArgumentLoc *loc = &info->in[i];
3560                     ts = arg_temp(op->args[nb_oargs + i]);
3561 
3562                     if (ts->state & TS_DEAD) {
3563                         switch (loc->kind) {
3564                         case TCG_CALL_ARG_NORMAL:
3565                         case TCG_CALL_ARG_EXTEND_U:
3566                         case TCG_CALL_ARG_EXTEND_S:
3567                             if (arg_slot_reg_p(loc->arg_slot)) {
3568                                 *la_temp_pref(ts) = 0;
3569                                 break;
3570                             }
3571                             /* fall through */
3572                         default:
3573                             *la_temp_pref(ts) =
3574                                 tcg_target_available_regs[ts->type];
3575                             break;
3576                         }
3577                         ts->state &= ~TS_DEAD;
3578                     }
3579                 }
3580 
3581                 /*
3582                  * For each input argument, add its input register to prefs.
3583                  * If a temp is used once, this produces a single set bit;
3584                  * if a temp is used multiple times, this produces a set.
3585                  */
3586                 for (i = 0; i < nb_iargs; i++) {
3587                     const TCGCallArgumentLoc *loc = &info->in[i];
3588                     ts = arg_temp(op->args[nb_oargs + i]);
3589 
3590                     switch (loc->kind) {
3591                     case TCG_CALL_ARG_NORMAL:
3592                     case TCG_CALL_ARG_EXTEND_U:
3593                     case TCG_CALL_ARG_EXTEND_S:
3594                         if (arg_slot_reg_p(loc->arg_slot)) {
3595                             tcg_regset_set_reg(*la_temp_pref(ts),
3596                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3597                         }
3598                         break;
3599                     default:
3600                         break;
3601                     }
3602                 }
3603             }
3604             break;
3605         case INDEX_op_insn_start:
3606             break;
3607         case INDEX_op_discard:
3608             /* mark the temporary as dead */
3609             ts = arg_temp(op->args[0]);
3610             ts->state = TS_DEAD;
3611             la_reset_pref(ts);
3612             break;
3613 
3614         case INDEX_op_add2_i32:
3615             opc_new = INDEX_op_add_i32;
3616             goto do_addsub2;
3617         case INDEX_op_sub2_i32:
3618             opc_new = INDEX_op_sub_i32;
3619             goto do_addsub2;
3620         case INDEX_op_add2_i64:
3621             opc_new = INDEX_op_add_i64;
3622             goto do_addsub2;
3623         case INDEX_op_sub2_i64:
3624             opc_new = INDEX_op_sub_i64;
3625         do_addsub2:
3626             nb_iargs = 4;
3627             nb_oargs = 2;
3628             /* Test if the high part of the operation is dead, but not
3629                the low part.  The result can be optimized to a simple
3630                add or sub.  This happens often for x86_64 guest when the
3631                cpu mode is set to 32 bit.  */
3632             if (arg_temp(op->args[1])->state == TS_DEAD) {
3633                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3634                     goto do_remove;
3635                 }
3636                 /* Replace the opcode and adjust the args in place,
3637                    leaving 3 unused args at the end.  */
3638                 op->opc = opc = opc_new;
3639                 op->args[1] = op->args[2];
3640                 op->args[2] = op->args[4];
3641                 /* Fall through and mark the single-word operation live.  */
3642                 nb_iargs = 2;
3643                 nb_oargs = 1;
3644             }
3645             goto do_not_remove;
3646 
3647         case INDEX_op_mulu2_i32:
3648             opc_new = INDEX_op_mul_i32;
3649             opc_new2 = INDEX_op_muluh_i32;
3650             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3651             goto do_mul2;
3652         case INDEX_op_muls2_i32:
3653             opc_new = INDEX_op_mul_i32;
3654             opc_new2 = INDEX_op_mulsh_i32;
3655             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3656             goto do_mul2;
3657         case INDEX_op_mulu2_i64:
3658             opc_new = INDEX_op_mul_i64;
3659             opc_new2 = INDEX_op_muluh_i64;
3660             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3661             goto do_mul2;
3662         case INDEX_op_muls2_i64:
3663             opc_new = INDEX_op_mul_i64;
3664             opc_new2 = INDEX_op_mulsh_i64;
3665             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3666             goto do_mul2;
3667         do_mul2:
3668             nb_iargs = 2;
3669             nb_oargs = 2;
3670             if (arg_temp(op->args[1])->state == TS_DEAD) {
3671                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3672                     /* Both parts of the operation are dead.  */
3673                     goto do_remove;
3674                 }
3675                 /* The high part of the operation is dead; generate the low. */
3676                 op->opc = opc = opc_new;
3677                 op->args[1] = op->args[2];
3678                 op->args[2] = op->args[3];
3679             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3680                 /* The low part of the operation is dead; generate the high. */
3681                 op->opc = opc = opc_new2;
3682                 op->args[0] = op->args[1];
3683                 op->args[1] = op->args[2];
3684                 op->args[2] = op->args[3];
3685             } else {
3686                 goto do_not_remove;
3687             }
3688             /* Mark the single-word operation live.  */
3689             nb_oargs = 1;
3690             goto do_not_remove;
3691 
3692         default:
3693             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3694             nb_iargs = def->nb_iargs;
3695             nb_oargs = def->nb_oargs;
3696 
3697             /* Test if the operation can be removed because all
3698                its outputs are dead. We assume that nb_oargs == 0
3699                implies side effects */
3700             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3701                 for (i = 0; i < nb_oargs; i++) {
3702                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3703                         goto do_not_remove;
3704                     }
3705                 }
3706                 goto do_remove;
3707             }
3708             goto do_not_remove;
3709 
3710         do_remove:
3711             tcg_op_remove(s, op);
3712             break;
3713 
3714         do_not_remove:
3715             for (i = 0; i < nb_oargs; i++) {
3716                 ts = arg_temp(op->args[i]);
3717 
3718                 /* Remember the preference of the uses that followed.  */
3719                 if (i < ARRAY_SIZE(op->output_pref)) {
3720                     op->output_pref[i] = *la_temp_pref(ts);
3721                 }
3722 
3723                 /* Output args are dead.  */
3724                 if (ts->state & TS_DEAD) {
3725                     arg_life |= DEAD_ARG << i;
3726                 }
3727                 if (ts->state & TS_MEM) {
3728                     arg_life |= SYNC_ARG << i;
3729                 }
3730                 ts->state = TS_DEAD;
3731                 la_reset_pref(ts);
3732             }
3733 
3734             /* If end of basic block, update.  */
3735             if (def->flags & TCG_OPF_BB_EXIT) {
3736                 la_func_end(s, nb_globals, nb_temps);
3737             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3738                 la_bb_sync(s, nb_globals, nb_temps);
3739             } else if (def->flags & TCG_OPF_BB_END) {
3740                 la_bb_end(s, nb_globals, nb_temps);
3741             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3742                 la_global_sync(s, nb_globals);
3743                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3744                     la_cross_call(s, nb_temps);
3745                 }
3746             }
3747 
3748             /* Record arguments that die in this opcode.  */
3749             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3750                 ts = arg_temp(op->args[i]);
3751                 if (ts->state & TS_DEAD) {
3752                     arg_life |= DEAD_ARG << i;
3753                 }
3754             }
3755 
3756             /* Input arguments are live for preceding opcodes.  */
3757             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3758                 ts = arg_temp(op->args[i]);
3759                 if (ts->state & TS_DEAD) {
3760                     /* For operands that were dead, initially allow
3761                        all regs for the type.  */
3762                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3763                     ts->state &= ~TS_DEAD;
3764                 }
3765             }
3766 
3767             /* Incorporate constraints for this operand.  */
3768             switch (opc) {
3769             case INDEX_op_mov_i32:
3770             case INDEX_op_mov_i64:
3771                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3772                    have proper constraints.  That said, special case
3773                    moves to propagate preferences backward.  */
3774                 if (IS_DEAD_ARG(1)) {
3775                     *la_temp_pref(arg_temp(op->args[0]))
3776                         = *la_temp_pref(arg_temp(op->args[1]));
3777                 }
3778                 break;
3779 
3780             default:
3781                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3782                     const TCGArgConstraint *ct = &def->args_ct[i];
3783                     TCGRegSet set, *pset;
3784 
3785                     ts = arg_temp(op->args[i]);
3786                     pset = la_temp_pref(ts);
3787                     set = *pset;
3788 
3789                     set &= ct->regs;
3790                     if (ct->ialias) {
3791                         set &= output_pref(op, ct->alias_index);
3792                     }
3793                     /* If the combination is not possible, restart.  */
3794                     if (set == 0) {
3795                         set = ct->regs;
3796                     }
3797                     *pset = set;
3798                 }
3799                 break;
3800             }
3801             break;
3802         }
3803         op->life = arg_life;
3804     }
3805 }
3806 
3807 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3808 static bool __attribute__((noinline))
3809 liveness_pass_2(TCGContext *s)
3810 {
3811     int nb_globals = s->nb_globals;
3812     int nb_temps, i;
3813     bool changes = false;
3814     TCGOp *op, *op_next;
3815 
3816     /* Create a temporary for each indirect global.  */
3817     for (i = 0; i < nb_globals; ++i) {
3818         TCGTemp *its = &s->temps[i];
3819         if (its->indirect_reg) {
3820             TCGTemp *dts = tcg_temp_alloc(s);
3821             dts->type = its->type;
3822             dts->base_type = its->base_type;
3823             dts->temp_subindex = its->temp_subindex;
3824             dts->kind = TEMP_EBB;
3825             its->state_ptr = dts;
3826         } else {
3827             its->state_ptr = NULL;
3828         }
3829         /* All globals begin dead.  */
3830         its->state = TS_DEAD;
3831     }
3832     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3833         TCGTemp *its = &s->temps[i];
3834         its->state_ptr = NULL;
3835         its->state = TS_DEAD;
3836     }
3837 
3838     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3839         TCGOpcode opc = op->opc;
3840         const TCGOpDef *def = &tcg_op_defs[opc];
3841         TCGLifeData arg_life = op->life;
3842         int nb_iargs, nb_oargs, call_flags;
3843         TCGTemp *arg_ts, *dir_ts;
3844 
3845         if (opc == INDEX_op_call) {
3846             nb_oargs = TCGOP_CALLO(op);
3847             nb_iargs = TCGOP_CALLI(op);
3848             call_flags = tcg_call_flags(op);
3849         } else {
3850             nb_iargs = def->nb_iargs;
3851             nb_oargs = def->nb_oargs;
3852 
3853             /* Set flags similar to how calls require.  */
3854             if (def->flags & TCG_OPF_COND_BRANCH) {
3855                 /* Like reading globals: sync_globals */
3856                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3857             } else if (def->flags & TCG_OPF_BB_END) {
3858                 /* Like writing globals: save_globals */
3859                 call_flags = 0;
3860             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3861                 /* Like reading globals: sync_globals */
3862                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3863             } else {
3864                 /* No effect on globals.  */
3865                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3866                               TCG_CALL_NO_WRITE_GLOBALS);
3867             }
3868         }
3869 
3870         /* Make sure that input arguments are available.  */
3871         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3872             arg_ts = arg_temp(op->args[i]);
3873             dir_ts = arg_ts->state_ptr;
3874             if (dir_ts && arg_ts->state == TS_DEAD) {
3875                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3876                                   ? INDEX_op_ld_i32
3877                                   : INDEX_op_ld_i64);
3878                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3879 
3880                 lop->args[0] = temp_arg(dir_ts);
3881                 lop->args[1] = temp_arg(arg_ts->mem_base);
3882                 lop->args[2] = arg_ts->mem_offset;
3883 
3884                 /* Loaded, but synced with memory.  */
3885                 arg_ts->state = TS_MEM;
3886             }
3887         }
3888 
3889         /* Perform input replacement, and mark inputs that became dead.
3890            No action is required except keeping temp_state up to date
3891            so that we reload when needed.  */
3892         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3893             arg_ts = arg_temp(op->args[i]);
3894             dir_ts = arg_ts->state_ptr;
3895             if (dir_ts) {
3896                 op->args[i] = temp_arg(dir_ts);
3897                 changes = true;
3898                 if (IS_DEAD_ARG(i)) {
3899                     arg_ts->state = TS_DEAD;
3900                 }
3901             }
3902         }
3903 
3904         /* Liveness analysis should ensure that the following are
3905            all correct, for call sites and basic block end points.  */
3906         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3907             /* Nothing to do */
3908         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3909             for (i = 0; i < nb_globals; ++i) {
3910                 /* Liveness should see that globals are synced back,
3911                    that is, either TS_DEAD or TS_MEM.  */
3912                 arg_ts = &s->temps[i];
3913                 tcg_debug_assert(arg_ts->state_ptr == 0
3914                                  || arg_ts->state != 0);
3915             }
3916         } else {
3917             for (i = 0; i < nb_globals; ++i) {
3918                 /* Liveness should see that globals are saved back,
3919                    that is, TS_DEAD, waiting to be reloaded.  */
3920                 arg_ts = &s->temps[i];
3921                 tcg_debug_assert(arg_ts->state_ptr == 0
3922                                  || arg_ts->state == TS_DEAD);
3923             }
3924         }
3925 
3926         /* Outputs become available.  */
3927         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3928             arg_ts = arg_temp(op->args[0]);
3929             dir_ts = arg_ts->state_ptr;
3930             if (dir_ts) {
3931                 op->args[0] = temp_arg(dir_ts);
3932                 changes = true;
3933 
3934                 /* The output is now live and modified.  */
3935                 arg_ts->state = 0;
3936 
3937                 if (NEED_SYNC_ARG(0)) {
3938                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3939                                       ? INDEX_op_st_i32
3940                                       : INDEX_op_st_i64);
3941                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3942                     TCGTemp *out_ts = dir_ts;
3943 
3944                     if (IS_DEAD_ARG(0)) {
3945                         out_ts = arg_temp(op->args[1]);
3946                         arg_ts->state = TS_DEAD;
3947                         tcg_op_remove(s, op);
3948                     } else {
3949                         arg_ts->state = TS_MEM;
3950                     }
3951 
3952                     sop->args[0] = temp_arg(out_ts);
3953                     sop->args[1] = temp_arg(arg_ts->mem_base);
3954                     sop->args[2] = arg_ts->mem_offset;
3955                 } else {
3956                     tcg_debug_assert(!IS_DEAD_ARG(0));
3957                 }
3958             }
3959         } else {
3960             for (i = 0; i < nb_oargs; i++) {
3961                 arg_ts = arg_temp(op->args[i]);
3962                 dir_ts = arg_ts->state_ptr;
3963                 if (!dir_ts) {
3964                     continue;
3965                 }
3966                 op->args[i] = temp_arg(dir_ts);
3967                 changes = true;
3968 
3969                 /* The output is now live and modified.  */
3970                 arg_ts->state = 0;
3971 
3972                 /* Sync outputs upon their last write.  */
3973                 if (NEED_SYNC_ARG(i)) {
3974                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3975                                       ? INDEX_op_st_i32
3976                                       : INDEX_op_st_i64);
3977                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3978 
3979                     sop->args[0] = temp_arg(dir_ts);
3980                     sop->args[1] = temp_arg(arg_ts->mem_base);
3981                     sop->args[2] = arg_ts->mem_offset;
3982 
3983                     arg_ts->state = TS_MEM;
3984                 }
3985                 /* Drop outputs that are dead.  */
3986                 if (IS_DEAD_ARG(i)) {
3987                     arg_ts->state = TS_DEAD;
3988                 }
3989             }
3990         }
3991     }
3992 
3993     return changes;
3994 }
3995 
3996 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3997 {
3998     intptr_t off;
3999     int size, align;
4000 
4001     /* When allocating an object, look at the full type. */
4002     size = tcg_type_size(ts->base_type);
4003     switch (ts->base_type) {
4004     case TCG_TYPE_I32:
4005         align = 4;
4006         break;
4007     case TCG_TYPE_I64:
4008     case TCG_TYPE_V64:
4009         align = 8;
4010         break;
4011     case TCG_TYPE_I128:
4012     case TCG_TYPE_V128:
4013     case TCG_TYPE_V256:
4014         /*
4015          * Note that we do not require aligned storage for V256,
4016          * and that we provide alignment for I128 to match V128,
4017          * even if that's above what the host ABI requires.
4018          */
4019         align = 16;
4020         break;
4021     default:
4022         g_assert_not_reached();
4023     }
4024 
4025     /*
4026      * Assume the stack is sufficiently aligned.
4027      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4028      * and do not require 16 byte vector alignment.  This seems slightly
4029      * easier than fully parameterizing the above switch statement.
4030      */
4031     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4032     off = ROUND_UP(s->current_frame_offset, align);
4033 
4034     /* If we've exhausted the stack frame, restart with a smaller TB. */
4035     if (off + size > s->frame_end) {
4036         tcg_raise_tb_overflow(s);
4037     }
4038     s->current_frame_offset = off + size;
4039 #if defined(__sparc__)
4040     off += TCG_TARGET_STACK_BIAS;
4041 #endif
4042 
4043     /* If the object was subdivided, assign memory to all the parts. */
4044     if (ts->base_type != ts->type) {
4045         int part_size = tcg_type_size(ts->type);
4046         int part_count = size / part_size;
4047 
4048         /*
4049          * Each part is allocated sequentially in tcg_temp_new_internal.
4050          * Jump back to the first part by subtracting the current index.
4051          */
4052         ts -= ts->temp_subindex;
4053         for (int i = 0; i < part_count; ++i) {
4054             ts[i].mem_offset = off + i * part_size;
4055             ts[i].mem_base = s->frame_temp;
4056             ts[i].mem_allocated = 1;
4057         }
4058     } else {
4059         ts->mem_offset = off;
4060         ts->mem_base = s->frame_temp;
4061         ts->mem_allocated = 1;
4062     }
4063 }
4064 
4065 /* Assign @reg to @ts, and update reg_to_temp[]. */
4066 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4067 {
4068     if (ts->val_type == TEMP_VAL_REG) {
4069         TCGReg old = ts->reg;
4070         tcg_debug_assert(s->reg_to_temp[old] == ts);
4071         if (old == reg) {
4072             return;
4073         }
4074         s->reg_to_temp[old] = NULL;
4075     }
4076     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4077     s->reg_to_temp[reg] = ts;
4078     ts->val_type = TEMP_VAL_REG;
4079     ts->reg = reg;
4080 }
4081 
4082 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4083 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4084 {
4085     tcg_debug_assert(type != TEMP_VAL_REG);
4086     if (ts->val_type == TEMP_VAL_REG) {
4087         TCGReg reg = ts->reg;
4088         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4089         s->reg_to_temp[reg] = NULL;
4090     }
4091     ts->val_type = type;
4092 }
4093 
4094 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4095 
4096 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4097    mark it free; otherwise mark it dead.  */
4098 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4099 {
4100     TCGTempVal new_type;
4101 
4102     switch (ts->kind) {
4103     case TEMP_FIXED:
4104         return;
4105     case TEMP_GLOBAL:
4106     case TEMP_TB:
4107         new_type = TEMP_VAL_MEM;
4108         break;
4109     case TEMP_EBB:
4110         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4111         break;
4112     case TEMP_CONST:
4113         new_type = TEMP_VAL_CONST;
4114         break;
4115     default:
4116         g_assert_not_reached();
4117     }
4118     set_temp_val_nonreg(s, ts, new_type);
4119 }
4120 
4121 /* Mark a temporary as dead.  */
4122 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4123 {
4124     temp_free_or_dead(s, ts, 1);
4125 }
4126 
4127 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4128    registers needs to be allocated to store a constant.  If 'free_or_dead'
4129    is non-zero, subsequently release the temporary; if it is positive, the
4130    temp is dead; if it is negative, the temp is free.  */
4131 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4132                       TCGRegSet preferred_regs, int free_or_dead)
4133 {
4134     if (!temp_readonly(ts) && !ts->mem_coherent) {
4135         if (!ts->mem_allocated) {
4136             temp_allocate_frame(s, ts);
4137         }
4138         switch (ts->val_type) {
4139         case TEMP_VAL_CONST:
4140             /* If we're going to free the temp immediately, then we won't
4141                require it later in a register, so attempt to store the
4142                constant to memory directly.  */
4143             if (free_or_dead
4144                 && tcg_out_sti(s, ts->type, ts->val,
4145                                ts->mem_base->reg, ts->mem_offset)) {
4146                 break;
4147             }
4148             temp_load(s, ts, tcg_target_available_regs[ts->type],
4149                       allocated_regs, preferred_regs);
4150             /* fallthrough */
4151 
4152         case TEMP_VAL_REG:
4153             tcg_out_st(s, ts->type, ts->reg,
4154                        ts->mem_base->reg, ts->mem_offset);
4155             break;
4156 
4157         case TEMP_VAL_MEM:
4158             break;
4159 
4160         case TEMP_VAL_DEAD:
4161         default:
4162             g_assert_not_reached();
4163         }
4164         ts->mem_coherent = 1;
4165     }
4166     if (free_or_dead) {
4167         temp_free_or_dead(s, ts, free_or_dead);
4168     }
4169 }
4170 
4171 /* free register 'reg' by spilling the corresponding temporary if necessary */
4172 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4173 {
4174     TCGTemp *ts = s->reg_to_temp[reg];
4175     if (ts != NULL) {
4176         temp_sync(s, ts, allocated_regs, 0, -1);
4177     }
4178 }
4179 
4180 /**
4181  * tcg_reg_alloc:
4182  * @required_regs: Set of registers in which we must allocate.
4183  * @allocated_regs: Set of registers which must be avoided.
4184  * @preferred_regs: Set of registers we should prefer.
4185  * @rev: True if we search the registers in "indirect" order.
4186  *
4187  * The allocated register must be in @required_regs & ~@allocated_regs,
4188  * but if we can put it in @preferred_regs we may save a move later.
4189  */
4190 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4191                             TCGRegSet allocated_regs,
4192                             TCGRegSet preferred_regs, bool rev)
4193 {
4194     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4195     TCGRegSet reg_ct[2];
4196     const int *order;
4197 
4198     reg_ct[1] = required_regs & ~allocated_regs;
4199     tcg_debug_assert(reg_ct[1] != 0);
4200     reg_ct[0] = reg_ct[1] & preferred_regs;
4201 
4202     /* Skip the preferred_regs option if it cannot be satisfied,
4203        or if the preference made no difference.  */
4204     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4205 
4206     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4207 
4208     /* Try free registers, preferences first.  */
4209     for (j = f; j < 2; j++) {
4210         TCGRegSet set = reg_ct[j];
4211 
4212         if (tcg_regset_single(set)) {
4213             /* One register in the set.  */
4214             TCGReg reg = tcg_regset_first(set);
4215             if (s->reg_to_temp[reg] == NULL) {
4216                 return reg;
4217             }
4218         } else {
4219             for (i = 0; i < n; i++) {
4220                 TCGReg reg = order[i];
4221                 if (s->reg_to_temp[reg] == NULL &&
4222                     tcg_regset_test_reg(set, reg)) {
4223                     return reg;
4224                 }
4225             }
4226         }
4227     }
4228 
4229     /* We must spill something.  */
4230     for (j = f; j < 2; j++) {
4231         TCGRegSet set = reg_ct[j];
4232 
4233         if (tcg_regset_single(set)) {
4234             /* One register in the set.  */
4235             TCGReg reg = tcg_regset_first(set);
4236             tcg_reg_free(s, reg, allocated_regs);
4237             return reg;
4238         } else {
4239             for (i = 0; i < n; i++) {
4240                 TCGReg reg = order[i];
4241                 if (tcg_regset_test_reg(set, reg)) {
4242                     tcg_reg_free(s, reg, allocated_regs);
4243                     return reg;
4244                 }
4245             }
4246         }
4247     }
4248 
4249     g_assert_not_reached();
4250 }
4251 
4252 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4253                                  TCGRegSet allocated_regs,
4254                                  TCGRegSet preferred_regs, bool rev)
4255 {
4256     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4257     TCGRegSet reg_ct[2];
4258     const int *order;
4259 
4260     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4261     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4262     tcg_debug_assert(reg_ct[1] != 0);
4263     reg_ct[0] = reg_ct[1] & preferred_regs;
4264 
4265     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4266 
4267     /*
4268      * Skip the preferred_regs option if it cannot be satisfied,
4269      * or if the preference made no difference.
4270      */
4271     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4272 
4273     /*
4274      * Minimize the number of flushes by looking for 2 free registers first,
4275      * then a single flush, then two flushes.
4276      */
4277     for (fmin = 2; fmin >= 0; fmin--) {
4278         for (j = k; j < 2; j++) {
4279             TCGRegSet set = reg_ct[j];
4280 
4281             for (i = 0; i < n; i++) {
4282                 TCGReg reg = order[i];
4283 
4284                 if (tcg_regset_test_reg(set, reg)) {
4285                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4286                     if (f >= fmin) {
4287                         tcg_reg_free(s, reg, allocated_regs);
4288                         tcg_reg_free(s, reg + 1, allocated_regs);
4289                         return reg;
4290                     }
4291                 }
4292             }
4293         }
4294     }
4295     g_assert_not_reached();
4296 }
4297 
4298 /* Make sure the temporary is in a register.  If needed, allocate the register
4299    from DESIRED while avoiding ALLOCATED.  */
4300 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4301                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4302 {
4303     TCGReg reg;
4304 
4305     switch (ts->val_type) {
4306     case TEMP_VAL_REG:
4307         return;
4308     case TEMP_VAL_CONST:
4309         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4310                             preferred_regs, ts->indirect_base);
4311         if (ts->type <= TCG_TYPE_I64) {
4312             tcg_out_movi(s, ts->type, reg, ts->val);
4313         } else {
4314             uint64_t val = ts->val;
4315             MemOp vece = MO_64;
4316 
4317             /*
4318              * Find the minimal vector element that matches the constant.
4319              * The targets will, in general, have to do this search anyway,
4320              * do this generically.
4321              */
4322             if (val == dup_const(MO_8, val)) {
4323                 vece = MO_8;
4324             } else if (val == dup_const(MO_16, val)) {
4325                 vece = MO_16;
4326             } else if (val == dup_const(MO_32, val)) {
4327                 vece = MO_32;
4328             }
4329 
4330             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4331         }
4332         ts->mem_coherent = 0;
4333         break;
4334     case TEMP_VAL_MEM:
4335         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4336                             preferred_regs, ts->indirect_base);
4337         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4338         ts->mem_coherent = 1;
4339         break;
4340     case TEMP_VAL_DEAD:
4341     default:
4342         g_assert_not_reached();
4343     }
4344     set_temp_val_reg(s, ts, reg);
4345 }
4346 
4347 /* Save a temporary to memory. 'allocated_regs' is used in case a
4348    temporary registers needs to be allocated to store a constant.  */
4349 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4350 {
4351     /* The liveness analysis already ensures that globals are back
4352        in memory. Keep an tcg_debug_assert for safety. */
4353     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4354 }
4355 
4356 /* save globals to their canonical location and assume they can be
4357    modified be the following code. 'allocated_regs' is used in case a
4358    temporary registers needs to be allocated to store a constant. */
4359 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4360 {
4361     int i, n;
4362 
4363     for (i = 0, n = s->nb_globals; i < n; i++) {
4364         temp_save(s, &s->temps[i], allocated_regs);
4365     }
4366 }
4367 
4368 /* sync globals to their canonical location and assume they can be
4369    read by the following code. 'allocated_regs' is used in case a
4370    temporary registers needs to be allocated to store a constant. */
4371 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4372 {
4373     int i, n;
4374 
4375     for (i = 0, n = s->nb_globals; i < n; i++) {
4376         TCGTemp *ts = &s->temps[i];
4377         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4378                          || ts->kind == TEMP_FIXED
4379                          || ts->mem_coherent);
4380     }
4381 }
4382 
4383 /* at the end of a basic block, we assume all temporaries are dead and
4384    all globals are stored at their canonical location. */
4385 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4386 {
4387     int i;
4388 
4389     for (i = s->nb_globals; i < s->nb_temps; i++) {
4390         TCGTemp *ts = &s->temps[i];
4391 
4392         switch (ts->kind) {
4393         case TEMP_TB:
4394             temp_save(s, ts, allocated_regs);
4395             break;
4396         case TEMP_EBB:
4397             /* The liveness analysis already ensures that temps are dead.
4398                Keep an tcg_debug_assert for safety. */
4399             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4400             break;
4401         case TEMP_CONST:
4402             /* Similarly, we should have freed any allocated register. */
4403             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4404             break;
4405         default:
4406             g_assert_not_reached();
4407         }
4408     }
4409 
4410     save_globals(s, allocated_regs);
4411 }
4412 
4413 /*
4414  * At a conditional branch, we assume all temporaries are dead unless
4415  * explicitly live-across-conditional-branch; all globals and local
4416  * temps are synced to their location.
4417  */
4418 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4419 {
4420     sync_globals(s, allocated_regs);
4421 
4422     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4423         TCGTemp *ts = &s->temps[i];
4424         /*
4425          * The liveness analysis already ensures that temps are dead.
4426          * Keep tcg_debug_asserts for safety.
4427          */
4428         switch (ts->kind) {
4429         case TEMP_TB:
4430             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4431             break;
4432         case TEMP_EBB:
4433         case TEMP_CONST:
4434             break;
4435         default:
4436             g_assert_not_reached();
4437         }
4438     }
4439 }
4440 
4441 /*
4442  * Specialized code generation for INDEX_op_mov_* with a constant.
4443  */
4444 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4445                                   tcg_target_ulong val, TCGLifeData arg_life,
4446                                   TCGRegSet preferred_regs)
4447 {
4448     /* ENV should not be modified.  */
4449     tcg_debug_assert(!temp_readonly(ots));
4450 
4451     /* The movi is not explicitly generated here.  */
4452     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4453     ots->val = val;
4454     ots->mem_coherent = 0;
4455     if (NEED_SYNC_ARG(0)) {
4456         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4457     } else if (IS_DEAD_ARG(0)) {
4458         temp_dead(s, ots);
4459     }
4460 }
4461 
4462 /*
4463  * Specialized code generation for INDEX_op_mov_*.
4464  */
4465 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4466 {
4467     const TCGLifeData arg_life = op->life;
4468     TCGRegSet allocated_regs, preferred_regs;
4469     TCGTemp *ts, *ots;
4470     TCGType otype, itype;
4471     TCGReg oreg, ireg;
4472 
4473     allocated_regs = s->reserved_regs;
4474     preferred_regs = output_pref(op, 0);
4475     ots = arg_temp(op->args[0]);
4476     ts = arg_temp(op->args[1]);
4477 
4478     /* ENV should not be modified.  */
4479     tcg_debug_assert(!temp_readonly(ots));
4480 
4481     /* Note that otype != itype for no-op truncation.  */
4482     otype = ots->type;
4483     itype = ts->type;
4484 
4485     if (ts->val_type == TEMP_VAL_CONST) {
4486         /* propagate constant or generate sti */
4487         tcg_target_ulong val = ts->val;
4488         if (IS_DEAD_ARG(1)) {
4489             temp_dead(s, ts);
4490         }
4491         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4492         return;
4493     }
4494 
4495     /* If the source value is in memory we're going to be forced
4496        to have it in a register in order to perform the copy.  Copy
4497        the SOURCE value into its own register first, that way we
4498        don't have to reload SOURCE the next time it is used. */
4499     if (ts->val_type == TEMP_VAL_MEM) {
4500         temp_load(s, ts, tcg_target_available_regs[itype],
4501                   allocated_regs, preferred_regs);
4502     }
4503     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4504     ireg = ts->reg;
4505 
4506     if (IS_DEAD_ARG(0)) {
4507         /* mov to a non-saved dead register makes no sense (even with
4508            liveness analysis disabled). */
4509         tcg_debug_assert(NEED_SYNC_ARG(0));
4510         if (!ots->mem_allocated) {
4511             temp_allocate_frame(s, ots);
4512         }
4513         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4514         if (IS_DEAD_ARG(1)) {
4515             temp_dead(s, ts);
4516         }
4517         temp_dead(s, ots);
4518         return;
4519     }
4520 
4521     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4522         /*
4523          * The mov can be suppressed.  Kill input first, so that it
4524          * is unlinked from reg_to_temp, then set the output to the
4525          * reg that we saved from the input.
4526          */
4527         temp_dead(s, ts);
4528         oreg = ireg;
4529     } else {
4530         if (ots->val_type == TEMP_VAL_REG) {
4531             oreg = ots->reg;
4532         } else {
4533             /* Make sure to not spill the input register during allocation. */
4534             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4535                                  allocated_regs | ((TCGRegSet)1 << ireg),
4536                                  preferred_regs, ots->indirect_base);
4537         }
4538         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4539             /*
4540              * Cross register class move not supported.
4541              * Store the source register into the destination slot
4542              * and leave the destination temp as TEMP_VAL_MEM.
4543              */
4544             assert(!temp_readonly(ots));
4545             if (!ts->mem_allocated) {
4546                 temp_allocate_frame(s, ots);
4547             }
4548             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4549             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4550             ots->mem_coherent = 1;
4551             return;
4552         }
4553     }
4554     set_temp_val_reg(s, ots, oreg);
4555     ots->mem_coherent = 0;
4556 
4557     if (NEED_SYNC_ARG(0)) {
4558         temp_sync(s, ots, allocated_regs, 0, 0);
4559     }
4560 }
4561 
4562 /*
4563  * Specialized code generation for INDEX_op_dup_vec.
4564  */
4565 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4566 {
4567     const TCGLifeData arg_life = op->life;
4568     TCGRegSet dup_out_regs, dup_in_regs;
4569     TCGTemp *its, *ots;
4570     TCGType itype, vtype;
4571     unsigned vece;
4572     int lowpart_ofs;
4573     bool ok;
4574 
4575     ots = arg_temp(op->args[0]);
4576     its = arg_temp(op->args[1]);
4577 
4578     /* ENV should not be modified.  */
4579     tcg_debug_assert(!temp_readonly(ots));
4580 
4581     itype = its->type;
4582     vece = TCGOP_VECE(op);
4583     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4584 
4585     if (its->val_type == TEMP_VAL_CONST) {
4586         /* Propagate constant via movi -> dupi.  */
4587         tcg_target_ulong val = its->val;
4588         if (IS_DEAD_ARG(1)) {
4589             temp_dead(s, its);
4590         }
4591         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4592         return;
4593     }
4594 
4595     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4596     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4597 
4598     /* Allocate the output register now.  */
4599     if (ots->val_type != TEMP_VAL_REG) {
4600         TCGRegSet allocated_regs = s->reserved_regs;
4601         TCGReg oreg;
4602 
4603         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4604             /* Make sure to not spill the input register. */
4605             tcg_regset_set_reg(allocated_regs, its->reg);
4606         }
4607         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4608                              output_pref(op, 0), ots->indirect_base);
4609         set_temp_val_reg(s, ots, oreg);
4610     }
4611 
4612     switch (its->val_type) {
4613     case TEMP_VAL_REG:
4614         /*
4615          * The dup constriaints must be broad, covering all possible VECE.
4616          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4617          * to fail, indicating that extra moves are required for that case.
4618          */
4619         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4620             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4621                 goto done;
4622             }
4623             /* Try again from memory or a vector input register.  */
4624         }
4625         if (!its->mem_coherent) {
4626             /*
4627              * The input register is not synced, and so an extra store
4628              * would be required to use memory.  Attempt an integer-vector
4629              * register move first.  We do not have a TCGRegSet for this.
4630              */
4631             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4632                 break;
4633             }
4634             /* Sync the temp back to its slot and load from there.  */
4635             temp_sync(s, its, s->reserved_regs, 0, 0);
4636         }
4637         /* fall through */
4638 
4639     case TEMP_VAL_MEM:
4640         lowpart_ofs = 0;
4641         if (HOST_BIG_ENDIAN) {
4642             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4643         }
4644         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4645                              its->mem_offset + lowpart_ofs)) {
4646             goto done;
4647         }
4648         /* Load the input into the destination vector register. */
4649         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4650         break;
4651 
4652     default:
4653         g_assert_not_reached();
4654     }
4655 
4656     /* We now have a vector input register, so dup must succeed. */
4657     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4658     tcg_debug_assert(ok);
4659 
4660  done:
4661     ots->mem_coherent = 0;
4662     if (IS_DEAD_ARG(1)) {
4663         temp_dead(s, its);
4664     }
4665     if (NEED_SYNC_ARG(0)) {
4666         temp_sync(s, ots, s->reserved_regs, 0, 0);
4667     }
4668     if (IS_DEAD_ARG(0)) {
4669         temp_dead(s, ots);
4670     }
4671 }
4672 
4673 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4674 {
4675     const TCGLifeData arg_life = op->life;
4676     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4677     TCGRegSet i_allocated_regs;
4678     TCGRegSet o_allocated_regs;
4679     int i, k, nb_iargs, nb_oargs;
4680     TCGReg reg;
4681     TCGArg arg;
4682     const TCGArgConstraint *arg_ct;
4683     TCGTemp *ts;
4684     TCGArg new_args[TCG_MAX_OP_ARGS];
4685     int const_args[TCG_MAX_OP_ARGS];
4686 
4687     nb_oargs = def->nb_oargs;
4688     nb_iargs = def->nb_iargs;
4689 
4690     /* copy constants */
4691     memcpy(new_args + nb_oargs + nb_iargs,
4692            op->args + nb_oargs + nb_iargs,
4693            sizeof(TCGArg) * def->nb_cargs);
4694 
4695     i_allocated_regs = s->reserved_regs;
4696     o_allocated_regs = s->reserved_regs;
4697 
4698     /* satisfy input constraints */
4699     for (k = 0; k < nb_iargs; k++) {
4700         TCGRegSet i_preferred_regs, i_required_regs;
4701         bool allocate_new_reg, copyto_new_reg;
4702         TCGTemp *ts2;
4703         int i1, i2;
4704 
4705         i = def->args_ct[nb_oargs + k].sort_index;
4706         arg = op->args[i];
4707         arg_ct = &def->args_ct[i];
4708         ts = arg_temp(arg);
4709 
4710         if (ts->val_type == TEMP_VAL_CONST
4711             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct, TCGOP_VECE(op))) {
4712             /* constant is OK for instruction */
4713             const_args[i] = 1;
4714             new_args[i] = ts->val;
4715             continue;
4716         }
4717 
4718         reg = ts->reg;
4719         i_preferred_regs = 0;
4720         i_required_regs = arg_ct->regs;
4721         allocate_new_reg = false;
4722         copyto_new_reg = false;
4723 
4724         switch (arg_ct->pair) {
4725         case 0: /* not paired */
4726             if (arg_ct->ialias) {
4727                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4728 
4729                 /*
4730                  * If the input is readonly, then it cannot also be an
4731                  * output and aliased to itself.  If the input is not
4732                  * dead after the instruction, we must allocate a new
4733                  * register and move it.
4734                  */
4735                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4736                     || def->args_ct[arg_ct->alias_index].newreg) {
4737                     allocate_new_reg = true;
4738                 } else if (ts->val_type == TEMP_VAL_REG) {
4739                     /*
4740                      * Check if the current register has already been
4741                      * allocated for another input.
4742                      */
4743                     allocate_new_reg =
4744                         tcg_regset_test_reg(i_allocated_regs, reg);
4745                 }
4746             }
4747             if (!allocate_new_reg) {
4748                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4749                           i_preferred_regs);
4750                 reg = ts->reg;
4751                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4752             }
4753             if (allocate_new_reg) {
4754                 /*
4755                  * Allocate a new register matching the constraint
4756                  * and move the temporary register into it.
4757                  */
4758                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4759                           i_allocated_regs, 0);
4760                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4761                                     i_preferred_regs, ts->indirect_base);
4762                 copyto_new_reg = true;
4763             }
4764             break;
4765 
4766         case 1:
4767             /* First of an input pair; if i1 == i2, the second is an output. */
4768             i1 = i;
4769             i2 = arg_ct->pair_index;
4770             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4771 
4772             /*
4773              * It is easier to default to allocating a new pair
4774              * and to identify a few cases where it's not required.
4775              */
4776             if (arg_ct->ialias) {
4777                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4778                 if (IS_DEAD_ARG(i1) &&
4779                     IS_DEAD_ARG(i2) &&
4780                     !temp_readonly(ts) &&
4781                     ts->val_type == TEMP_VAL_REG &&
4782                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4783                     tcg_regset_test_reg(i_required_regs, reg) &&
4784                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4785                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4786                     (ts2
4787                      ? ts2->val_type == TEMP_VAL_REG &&
4788                        ts2->reg == reg + 1 &&
4789                        !temp_readonly(ts2)
4790                      : s->reg_to_temp[reg + 1] == NULL)) {
4791                     break;
4792                 }
4793             } else {
4794                 /* Without aliasing, the pair must also be an input. */
4795                 tcg_debug_assert(ts2);
4796                 if (ts->val_type == TEMP_VAL_REG &&
4797                     ts2->val_type == TEMP_VAL_REG &&
4798                     ts2->reg == reg + 1 &&
4799                     tcg_regset_test_reg(i_required_regs, reg)) {
4800                     break;
4801                 }
4802             }
4803             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4804                                      0, ts->indirect_base);
4805             goto do_pair;
4806 
4807         case 2: /* pair second */
4808             reg = new_args[arg_ct->pair_index] + 1;
4809             goto do_pair;
4810 
4811         case 3: /* ialias with second output, no first input */
4812             tcg_debug_assert(arg_ct->ialias);
4813             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4814 
4815             if (IS_DEAD_ARG(i) &&
4816                 !temp_readonly(ts) &&
4817                 ts->val_type == TEMP_VAL_REG &&
4818                 reg > 0 &&
4819                 s->reg_to_temp[reg - 1] == NULL &&
4820                 tcg_regset_test_reg(i_required_regs, reg) &&
4821                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4822                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4823                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4824                 break;
4825             }
4826             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4827                                      i_allocated_regs, 0,
4828                                      ts->indirect_base);
4829             tcg_regset_set_reg(i_allocated_regs, reg);
4830             reg += 1;
4831             goto do_pair;
4832 
4833         do_pair:
4834             /*
4835              * If an aliased input is not dead after the instruction,
4836              * we must allocate a new register and move it.
4837              */
4838             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4839                 TCGRegSet t_allocated_regs = i_allocated_regs;
4840 
4841                 /*
4842                  * Because of the alias, and the continued life, make sure
4843                  * that the temp is somewhere *other* than the reg pair,
4844                  * and we get a copy in reg.
4845                  */
4846                 tcg_regset_set_reg(t_allocated_regs, reg);
4847                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4848                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4849                     /* If ts was already in reg, copy it somewhere else. */
4850                     TCGReg nr;
4851                     bool ok;
4852 
4853                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4854                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4855                                        t_allocated_regs, 0, ts->indirect_base);
4856                     ok = tcg_out_mov(s, ts->type, nr, reg);
4857                     tcg_debug_assert(ok);
4858 
4859                     set_temp_val_reg(s, ts, nr);
4860                 } else {
4861                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4862                               t_allocated_regs, 0);
4863                     copyto_new_reg = true;
4864                 }
4865             } else {
4866                 /* Preferably allocate to reg, otherwise copy. */
4867                 i_required_regs = (TCGRegSet)1 << reg;
4868                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4869                           i_preferred_regs);
4870                 copyto_new_reg = ts->reg != reg;
4871             }
4872             break;
4873 
4874         default:
4875             g_assert_not_reached();
4876         }
4877 
4878         if (copyto_new_reg) {
4879             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4880                 /*
4881                  * Cross register class move not supported.  Sync the
4882                  * temp back to its slot and load from there.
4883                  */
4884                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4885                 tcg_out_ld(s, ts->type, reg,
4886                            ts->mem_base->reg, ts->mem_offset);
4887             }
4888         }
4889         new_args[i] = reg;
4890         const_args[i] = 0;
4891         tcg_regset_set_reg(i_allocated_regs, reg);
4892     }
4893 
4894     /* mark dead temporaries and free the associated registers */
4895     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4896         if (IS_DEAD_ARG(i)) {
4897             temp_dead(s, arg_temp(op->args[i]));
4898         }
4899     }
4900 
4901     if (def->flags & TCG_OPF_COND_BRANCH) {
4902         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4903     } else if (def->flags & TCG_OPF_BB_END) {
4904         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4905     } else {
4906         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4907             /* XXX: permit generic clobber register list ? */
4908             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4909                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4910                     tcg_reg_free(s, i, i_allocated_regs);
4911                 }
4912             }
4913         }
4914         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4915             /* sync globals if the op has side effects and might trigger
4916                an exception. */
4917             sync_globals(s, i_allocated_regs);
4918         }
4919 
4920         /* satisfy the output constraints */
4921         for(k = 0; k < nb_oargs; k++) {
4922             i = def->args_ct[k].sort_index;
4923             arg = op->args[i];
4924             arg_ct = &def->args_ct[i];
4925             ts = arg_temp(arg);
4926 
4927             /* ENV should not be modified.  */
4928             tcg_debug_assert(!temp_readonly(ts));
4929 
4930             switch (arg_ct->pair) {
4931             case 0: /* not paired */
4932                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4933                     reg = new_args[arg_ct->alias_index];
4934                 } else if (arg_ct->newreg) {
4935                     reg = tcg_reg_alloc(s, arg_ct->regs,
4936                                         i_allocated_regs | o_allocated_regs,
4937                                         output_pref(op, k), ts->indirect_base);
4938                 } else {
4939                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4940                                         output_pref(op, k), ts->indirect_base);
4941                 }
4942                 break;
4943 
4944             case 1: /* first of pair */
4945                 tcg_debug_assert(!arg_ct->newreg);
4946                 if (arg_ct->oalias) {
4947                     reg = new_args[arg_ct->alias_index];
4948                     break;
4949                 }
4950                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4951                                          output_pref(op, k), ts->indirect_base);
4952                 break;
4953 
4954             case 2: /* second of pair */
4955                 tcg_debug_assert(!arg_ct->newreg);
4956                 if (arg_ct->oalias) {
4957                     reg = new_args[arg_ct->alias_index];
4958                 } else {
4959                     reg = new_args[arg_ct->pair_index] + 1;
4960                 }
4961                 break;
4962 
4963             case 3: /* first of pair, aliasing with a second input */
4964                 tcg_debug_assert(!arg_ct->newreg);
4965                 reg = new_args[arg_ct->pair_index] - 1;
4966                 break;
4967 
4968             default:
4969                 g_assert_not_reached();
4970             }
4971             tcg_regset_set_reg(o_allocated_regs, reg);
4972             set_temp_val_reg(s, ts, reg);
4973             ts->mem_coherent = 0;
4974             new_args[i] = reg;
4975         }
4976     }
4977 
4978     /* emit instruction */
4979     switch (op->opc) {
4980     case INDEX_op_ext8s_i32:
4981         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4982         break;
4983     case INDEX_op_ext8s_i64:
4984         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4985         break;
4986     case INDEX_op_ext8u_i32:
4987     case INDEX_op_ext8u_i64:
4988         tcg_out_ext8u(s, new_args[0], new_args[1]);
4989         break;
4990     case INDEX_op_ext16s_i32:
4991         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4992         break;
4993     case INDEX_op_ext16s_i64:
4994         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4995         break;
4996     case INDEX_op_ext16u_i32:
4997     case INDEX_op_ext16u_i64:
4998         tcg_out_ext16u(s, new_args[0], new_args[1]);
4999         break;
5000     case INDEX_op_ext32s_i64:
5001         tcg_out_ext32s(s, new_args[0], new_args[1]);
5002         break;
5003     case INDEX_op_ext32u_i64:
5004         tcg_out_ext32u(s, new_args[0], new_args[1]);
5005         break;
5006     case INDEX_op_ext_i32_i64:
5007         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5008         break;
5009     case INDEX_op_extu_i32_i64:
5010         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5011         break;
5012     case INDEX_op_extrl_i64_i32:
5013         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5014         break;
5015     default:
5016         if (def->flags & TCG_OPF_VECTOR) {
5017             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
5018                            new_args, const_args);
5019         } else {
5020             tcg_out_op(s, op->opc, new_args, const_args);
5021         }
5022         break;
5023     }
5024 
5025     /* move the outputs in the correct register if needed */
5026     for(i = 0; i < nb_oargs; i++) {
5027         ts = arg_temp(op->args[i]);
5028 
5029         /* ENV should not be modified.  */
5030         tcg_debug_assert(!temp_readonly(ts));
5031 
5032         if (NEED_SYNC_ARG(i)) {
5033             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5034         } else if (IS_DEAD_ARG(i)) {
5035             temp_dead(s, ts);
5036         }
5037     }
5038 }
5039 
5040 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5041 {
5042     const TCGLifeData arg_life = op->life;
5043     TCGTemp *ots, *itsl, *itsh;
5044     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5045 
5046     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5047     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5048     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5049 
5050     ots = arg_temp(op->args[0]);
5051     itsl = arg_temp(op->args[1]);
5052     itsh = arg_temp(op->args[2]);
5053 
5054     /* ENV should not be modified.  */
5055     tcg_debug_assert(!temp_readonly(ots));
5056 
5057     /* Allocate the output register now.  */
5058     if (ots->val_type != TEMP_VAL_REG) {
5059         TCGRegSet allocated_regs = s->reserved_regs;
5060         TCGRegSet dup_out_regs =
5061             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5062         TCGReg oreg;
5063 
5064         /* Make sure to not spill the input registers. */
5065         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5066             tcg_regset_set_reg(allocated_regs, itsl->reg);
5067         }
5068         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5069             tcg_regset_set_reg(allocated_regs, itsh->reg);
5070         }
5071 
5072         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5073                              output_pref(op, 0), ots->indirect_base);
5074         set_temp_val_reg(s, ots, oreg);
5075     }
5076 
5077     /* Promote dup2 of immediates to dupi_vec. */
5078     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5079         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5080         MemOp vece = MO_64;
5081 
5082         if (val == dup_const(MO_8, val)) {
5083             vece = MO_8;
5084         } else if (val == dup_const(MO_16, val)) {
5085             vece = MO_16;
5086         } else if (val == dup_const(MO_32, val)) {
5087             vece = MO_32;
5088         }
5089 
5090         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5091         goto done;
5092     }
5093 
5094     /* If the two inputs form one 64-bit value, try dupm_vec. */
5095     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5096         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5097         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5098         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5099 
5100         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5101         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5102 
5103         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5104                              its->mem_base->reg, its->mem_offset)) {
5105             goto done;
5106         }
5107     }
5108 
5109     /* Fall back to generic expansion. */
5110     return false;
5111 
5112  done:
5113     ots->mem_coherent = 0;
5114     if (IS_DEAD_ARG(1)) {
5115         temp_dead(s, itsl);
5116     }
5117     if (IS_DEAD_ARG(2)) {
5118         temp_dead(s, itsh);
5119     }
5120     if (NEED_SYNC_ARG(0)) {
5121         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5122     } else if (IS_DEAD_ARG(0)) {
5123         temp_dead(s, ots);
5124     }
5125     return true;
5126 }
5127 
5128 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5129                          TCGRegSet allocated_regs)
5130 {
5131     if (ts->val_type == TEMP_VAL_REG) {
5132         if (ts->reg != reg) {
5133             tcg_reg_free(s, reg, allocated_regs);
5134             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5135                 /*
5136                  * Cross register class move not supported.  Sync the
5137                  * temp back to its slot and load from there.
5138                  */
5139                 temp_sync(s, ts, allocated_regs, 0, 0);
5140                 tcg_out_ld(s, ts->type, reg,
5141                            ts->mem_base->reg, ts->mem_offset);
5142             }
5143         }
5144     } else {
5145         TCGRegSet arg_set = 0;
5146 
5147         tcg_reg_free(s, reg, allocated_regs);
5148         tcg_regset_set_reg(arg_set, reg);
5149         temp_load(s, ts, arg_set, allocated_regs, 0);
5150     }
5151 }
5152 
5153 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5154                          TCGRegSet allocated_regs)
5155 {
5156     /*
5157      * When the destination is on the stack, load up the temp and store.
5158      * If there are many call-saved registers, the temp might live to
5159      * see another use; otherwise it'll be discarded.
5160      */
5161     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5162     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5163                arg_slot_stk_ofs(arg_slot));
5164 }
5165 
5166 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5167                             TCGTemp *ts, TCGRegSet *allocated_regs)
5168 {
5169     if (arg_slot_reg_p(l->arg_slot)) {
5170         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5171         load_arg_reg(s, reg, ts, *allocated_regs);
5172         tcg_regset_set_reg(*allocated_regs, reg);
5173     } else {
5174         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5175     }
5176 }
5177 
5178 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5179                          intptr_t ref_off, TCGRegSet *allocated_regs)
5180 {
5181     TCGReg reg;
5182 
5183     if (arg_slot_reg_p(arg_slot)) {
5184         reg = tcg_target_call_iarg_regs[arg_slot];
5185         tcg_reg_free(s, reg, *allocated_regs);
5186         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5187         tcg_regset_set_reg(*allocated_regs, reg);
5188     } else {
5189         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5190                             *allocated_regs, 0, false);
5191         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5192         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5193                    arg_slot_stk_ofs(arg_slot));
5194     }
5195 }
5196 
5197 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5198 {
5199     const int nb_oargs = TCGOP_CALLO(op);
5200     const int nb_iargs = TCGOP_CALLI(op);
5201     const TCGLifeData arg_life = op->life;
5202     const TCGHelperInfo *info = tcg_call_info(op);
5203     TCGRegSet allocated_regs = s->reserved_regs;
5204     int i;
5205 
5206     /*
5207      * Move inputs into place in reverse order,
5208      * so that we place stacked arguments first.
5209      */
5210     for (i = nb_iargs - 1; i >= 0; --i) {
5211         const TCGCallArgumentLoc *loc = &info->in[i];
5212         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5213 
5214         switch (loc->kind) {
5215         case TCG_CALL_ARG_NORMAL:
5216         case TCG_CALL_ARG_EXTEND_U:
5217         case TCG_CALL_ARG_EXTEND_S:
5218             load_arg_normal(s, loc, ts, &allocated_regs);
5219             break;
5220         case TCG_CALL_ARG_BY_REF:
5221             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5222             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5223                          arg_slot_stk_ofs(loc->ref_slot),
5224                          &allocated_regs);
5225             break;
5226         case TCG_CALL_ARG_BY_REF_N:
5227             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5228             break;
5229         default:
5230             g_assert_not_reached();
5231         }
5232     }
5233 
5234     /* Mark dead temporaries and free the associated registers.  */
5235     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5236         if (IS_DEAD_ARG(i)) {
5237             temp_dead(s, arg_temp(op->args[i]));
5238         }
5239     }
5240 
5241     /* Clobber call registers.  */
5242     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5243         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5244             tcg_reg_free(s, i, allocated_regs);
5245         }
5246     }
5247 
5248     /*
5249      * Save globals if they might be written by the helper,
5250      * sync them if they might be read.
5251      */
5252     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5253         /* Nothing to do */
5254     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5255         sync_globals(s, allocated_regs);
5256     } else {
5257         save_globals(s, allocated_regs);
5258     }
5259 
5260     /*
5261      * If the ABI passes a pointer to the returned struct as the first
5262      * argument, load that now.  Pass a pointer to the output home slot.
5263      */
5264     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5265         TCGTemp *ts = arg_temp(op->args[0]);
5266 
5267         if (!ts->mem_allocated) {
5268             temp_allocate_frame(s, ts);
5269         }
5270         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5271     }
5272 
5273     tcg_out_call(s, tcg_call_func(op), info);
5274 
5275     /* Assign output registers and emit moves if needed.  */
5276     switch (info->out_kind) {
5277     case TCG_CALL_RET_NORMAL:
5278         for (i = 0; i < nb_oargs; i++) {
5279             TCGTemp *ts = arg_temp(op->args[i]);
5280             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5281 
5282             /* ENV should not be modified.  */
5283             tcg_debug_assert(!temp_readonly(ts));
5284 
5285             set_temp_val_reg(s, ts, reg);
5286             ts->mem_coherent = 0;
5287         }
5288         break;
5289 
5290     case TCG_CALL_RET_BY_VEC:
5291         {
5292             TCGTemp *ts = arg_temp(op->args[0]);
5293 
5294             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5295             tcg_debug_assert(ts->temp_subindex == 0);
5296             if (!ts->mem_allocated) {
5297                 temp_allocate_frame(s, ts);
5298             }
5299             tcg_out_st(s, TCG_TYPE_V128,
5300                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5301                        ts->mem_base->reg, ts->mem_offset);
5302         }
5303         /* fall through to mark all parts in memory */
5304 
5305     case TCG_CALL_RET_BY_REF:
5306         /* The callee has performed a write through the reference. */
5307         for (i = 0; i < nb_oargs; i++) {
5308             TCGTemp *ts = arg_temp(op->args[i]);
5309             ts->val_type = TEMP_VAL_MEM;
5310         }
5311         break;
5312 
5313     default:
5314         g_assert_not_reached();
5315     }
5316 
5317     /* Flush or discard output registers as needed. */
5318     for (i = 0; i < nb_oargs; i++) {
5319         TCGTemp *ts = arg_temp(op->args[i]);
5320         if (NEED_SYNC_ARG(i)) {
5321             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5322         } else if (IS_DEAD_ARG(i)) {
5323             temp_dead(s, ts);
5324         }
5325     }
5326 }
5327 
5328 /**
5329  * atom_and_align_for_opc:
5330  * @s: tcg context
5331  * @opc: memory operation code
5332  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5333  * @allow_two_ops: true if we are prepared to issue two operations
5334  *
5335  * Return the alignment and atomicity to use for the inline fast path
5336  * for the given memory operation.  The alignment may be larger than
5337  * that specified in @opc, and the correct alignment will be diagnosed
5338  * by the slow path helper.
5339  *
5340  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5341  * and issue two loads or stores for subalignment.
5342  */
5343 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5344                                            MemOp host_atom, bool allow_two_ops)
5345 {
5346     MemOp align = get_alignment_bits(opc);
5347     MemOp size = opc & MO_SIZE;
5348     MemOp half = size ? size - 1 : 0;
5349     MemOp atmax;
5350     MemOp atom;
5351 
5352     /* When serialized, no further atomicity required.  */
5353     if (s->gen_tb->cflags & CF_PARALLEL) {
5354         atom = opc & MO_ATOM_MASK;
5355     } else {
5356         atom = MO_ATOM_NONE;
5357     }
5358 
5359     switch (atom) {
5360     case MO_ATOM_NONE:
5361         /* The operation requires no specific atomicity. */
5362         atmax = MO_8;
5363         break;
5364 
5365     case MO_ATOM_IFALIGN:
5366         atmax = size;
5367         break;
5368 
5369     case MO_ATOM_IFALIGN_PAIR:
5370         atmax = half;
5371         break;
5372 
5373     case MO_ATOM_WITHIN16:
5374         atmax = size;
5375         if (size == MO_128) {
5376             /* Misalignment implies !within16, and therefore no atomicity. */
5377         } else if (host_atom != MO_ATOM_WITHIN16) {
5378             /* The host does not implement within16, so require alignment. */
5379             align = MAX(align, size);
5380         }
5381         break;
5382 
5383     case MO_ATOM_WITHIN16_PAIR:
5384         atmax = size;
5385         /*
5386          * Misalignment implies !within16, and therefore half atomicity.
5387          * Any host prepared for two operations can implement this with
5388          * half alignment.
5389          */
5390         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5391             align = MAX(align, half);
5392         }
5393         break;
5394 
5395     case MO_ATOM_SUBALIGN:
5396         atmax = size;
5397         if (host_atom != MO_ATOM_SUBALIGN) {
5398             /* If unaligned but not odd, there are subobjects up to half. */
5399             if (allow_two_ops) {
5400                 align = MAX(align, half);
5401             } else {
5402                 align = MAX(align, size);
5403             }
5404         }
5405         break;
5406 
5407     default:
5408         g_assert_not_reached();
5409     }
5410 
5411     return (TCGAtomAlign){ .atom = atmax, .align = align };
5412 }
5413 
5414 /*
5415  * Similarly for qemu_ld/st slow path helpers.
5416  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5417  * using only the provided backend tcg_out_* functions.
5418  */
5419 
5420 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5421 {
5422     int ofs = arg_slot_stk_ofs(slot);
5423 
5424     /*
5425      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5426      * require extension to uint64_t, adjust the address for uint32_t.
5427      */
5428     if (HOST_BIG_ENDIAN &&
5429         TCG_TARGET_REG_BITS == 64 &&
5430         type == TCG_TYPE_I32) {
5431         ofs += 4;
5432     }
5433     return ofs;
5434 }
5435 
5436 static void tcg_out_helper_load_slots(TCGContext *s,
5437                                       unsigned nmov, TCGMovExtend *mov,
5438                                       const TCGLdstHelperParam *parm)
5439 {
5440     unsigned i;
5441     TCGReg dst3;
5442 
5443     /*
5444      * Start from the end, storing to the stack first.
5445      * This frees those registers, so we need not consider overlap.
5446      */
5447     for (i = nmov; i-- > 0; ) {
5448         unsigned slot = mov[i].dst;
5449 
5450         if (arg_slot_reg_p(slot)) {
5451             goto found_reg;
5452         }
5453 
5454         TCGReg src = mov[i].src;
5455         TCGType dst_type = mov[i].dst_type;
5456         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5457 
5458         /* The argument is going onto the stack; extend into scratch. */
5459         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5460             tcg_debug_assert(parm->ntmp != 0);
5461             mov[i].dst = src = parm->tmp[0];
5462             tcg_out_movext1(s, &mov[i]);
5463         }
5464 
5465         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5466                    tcg_out_helper_stk_ofs(dst_type, slot));
5467     }
5468     return;
5469 
5470  found_reg:
5471     /*
5472      * The remaining arguments are in registers.
5473      * Convert slot numbers to argument registers.
5474      */
5475     nmov = i + 1;
5476     for (i = 0; i < nmov; ++i) {
5477         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5478     }
5479 
5480     switch (nmov) {
5481     case 4:
5482         /* The backend must have provided enough temps for the worst case. */
5483         tcg_debug_assert(parm->ntmp >= 2);
5484 
5485         dst3 = mov[3].dst;
5486         for (unsigned j = 0; j < 3; ++j) {
5487             if (dst3 == mov[j].src) {
5488                 /*
5489                  * Conflict. Copy the source to a temporary, perform the
5490                  * remaining moves, then the extension from our scratch
5491                  * on the way out.
5492                  */
5493                 TCGReg scratch = parm->tmp[1];
5494 
5495                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5496                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5497                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5498                 break;
5499             }
5500         }
5501 
5502         /* No conflicts: perform this move and continue. */
5503         tcg_out_movext1(s, &mov[3]);
5504         /* fall through */
5505 
5506     case 3:
5507         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5508                         parm->ntmp ? parm->tmp[0] : -1);
5509         break;
5510     case 2:
5511         tcg_out_movext2(s, mov, mov + 1,
5512                         parm->ntmp ? parm->tmp[0] : -1);
5513         break;
5514     case 1:
5515         tcg_out_movext1(s, mov);
5516         break;
5517     default:
5518         g_assert_not_reached();
5519     }
5520 }
5521 
5522 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5523                                     TCGType type, tcg_target_long imm,
5524                                     const TCGLdstHelperParam *parm)
5525 {
5526     if (arg_slot_reg_p(slot)) {
5527         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5528     } else {
5529         int ofs = tcg_out_helper_stk_ofs(type, slot);
5530         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5531             tcg_debug_assert(parm->ntmp != 0);
5532             tcg_out_movi(s, type, parm->tmp[0], imm);
5533             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5534         }
5535     }
5536 }
5537 
5538 static void tcg_out_helper_load_common_args(TCGContext *s,
5539                                             const TCGLabelQemuLdst *ldst,
5540                                             const TCGLdstHelperParam *parm,
5541                                             const TCGHelperInfo *info,
5542                                             unsigned next_arg)
5543 {
5544     TCGMovExtend ptr_mov = {
5545         .dst_type = TCG_TYPE_PTR,
5546         .src_type = TCG_TYPE_PTR,
5547         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5548     };
5549     const TCGCallArgumentLoc *loc = &info->in[0];
5550     TCGType type;
5551     unsigned slot;
5552     tcg_target_ulong imm;
5553 
5554     /*
5555      * Handle env, which is always first.
5556      */
5557     ptr_mov.dst = loc->arg_slot;
5558     ptr_mov.src = TCG_AREG0;
5559     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5560 
5561     /*
5562      * Handle oi.
5563      */
5564     imm = ldst->oi;
5565     loc = &info->in[next_arg];
5566     type = TCG_TYPE_I32;
5567     switch (loc->kind) {
5568     case TCG_CALL_ARG_NORMAL:
5569         break;
5570     case TCG_CALL_ARG_EXTEND_U:
5571     case TCG_CALL_ARG_EXTEND_S:
5572         /* No extension required for MemOpIdx. */
5573         tcg_debug_assert(imm <= INT32_MAX);
5574         type = TCG_TYPE_REG;
5575         break;
5576     default:
5577         g_assert_not_reached();
5578     }
5579     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5580     next_arg++;
5581 
5582     /*
5583      * Handle ra.
5584      */
5585     loc = &info->in[next_arg];
5586     slot = loc->arg_slot;
5587     if (parm->ra_gen) {
5588         int arg_reg = -1;
5589         TCGReg ra_reg;
5590 
5591         if (arg_slot_reg_p(slot)) {
5592             arg_reg = tcg_target_call_iarg_regs[slot];
5593         }
5594         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5595 
5596         ptr_mov.dst = slot;
5597         ptr_mov.src = ra_reg;
5598         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5599     } else {
5600         imm = (uintptr_t)ldst->raddr;
5601         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5602     }
5603 }
5604 
5605 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5606                                        const TCGCallArgumentLoc *loc,
5607                                        TCGType dst_type, TCGType src_type,
5608                                        TCGReg lo, TCGReg hi)
5609 {
5610     MemOp reg_mo;
5611 
5612     if (dst_type <= TCG_TYPE_REG) {
5613         MemOp src_ext;
5614 
5615         switch (loc->kind) {
5616         case TCG_CALL_ARG_NORMAL:
5617             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5618             break;
5619         case TCG_CALL_ARG_EXTEND_U:
5620             dst_type = TCG_TYPE_REG;
5621             src_ext = MO_UL;
5622             break;
5623         case TCG_CALL_ARG_EXTEND_S:
5624             dst_type = TCG_TYPE_REG;
5625             src_ext = MO_SL;
5626             break;
5627         default:
5628             g_assert_not_reached();
5629         }
5630 
5631         mov[0].dst = loc->arg_slot;
5632         mov[0].dst_type = dst_type;
5633         mov[0].src = lo;
5634         mov[0].src_type = src_type;
5635         mov[0].src_ext = src_ext;
5636         return 1;
5637     }
5638 
5639     if (TCG_TARGET_REG_BITS == 32) {
5640         assert(dst_type == TCG_TYPE_I64);
5641         reg_mo = MO_32;
5642     } else {
5643         assert(dst_type == TCG_TYPE_I128);
5644         reg_mo = MO_64;
5645     }
5646 
5647     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5648     mov[0].src = lo;
5649     mov[0].dst_type = TCG_TYPE_REG;
5650     mov[0].src_type = TCG_TYPE_REG;
5651     mov[0].src_ext = reg_mo;
5652 
5653     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5654     mov[1].src = hi;
5655     mov[1].dst_type = TCG_TYPE_REG;
5656     mov[1].src_type = TCG_TYPE_REG;
5657     mov[1].src_ext = reg_mo;
5658 
5659     return 2;
5660 }
5661 
5662 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5663                                    const TCGLdstHelperParam *parm)
5664 {
5665     const TCGHelperInfo *info;
5666     const TCGCallArgumentLoc *loc;
5667     TCGMovExtend mov[2];
5668     unsigned next_arg, nmov;
5669     MemOp mop = get_memop(ldst->oi);
5670 
5671     switch (mop & MO_SIZE) {
5672     case MO_8:
5673     case MO_16:
5674     case MO_32:
5675         info = &info_helper_ld32_mmu;
5676         break;
5677     case MO_64:
5678         info = &info_helper_ld64_mmu;
5679         break;
5680     case MO_128:
5681         info = &info_helper_ld128_mmu;
5682         break;
5683     default:
5684         g_assert_not_reached();
5685     }
5686 
5687     /* Defer env argument. */
5688     next_arg = 1;
5689 
5690     loc = &info->in[next_arg];
5691     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5692         /*
5693          * 32-bit host with 32-bit guest: zero-extend the guest address
5694          * to 64-bits for the helper by storing the low part, then
5695          * load a zero for the high part.
5696          */
5697         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5698                                TCG_TYPE_I32, TCG_TYPE_I32,
5699                                ldst->addrlo_reg, -1);
5700         tcg_out_helper_load_slots(s, 1, mov, parm);
5701 
5702         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5703                                 TCG_TYPE_I32, 0, parm);
5704         next_arg += 2;
5705     } else {
5706         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5707                                       ldst->addrlo_reg, ldst->addrhi_reg);
5708         tcg_out_helper_load_slots(s, nmov, mov, parm);
5709         next_arg += nmov;
5710     }
5711 
5712     switch (info->out_kind) {
5713     case TCG_CALL_RET_NORMAL:
5714     case TCG_CALL_RET_BY_VEC:
5715         break;
5716     case TCG_CALL_RET_BY_REF:
5717         /*
5718          * The return reference is in the first argument slot.
5719          * We need memory in which to return: re-use the top of stack.
5720          */
5721         {
5722             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5723 
5724             if (arg_slot_reg_p(0)) {
5725                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5726                                  TCG_REG_CALL_STACK, ofs_slot0);
5727             } else {
5728                 tcg_debug_assert(parm->ntmp != 0);
5729                 tcg_out_addi_ptr(s, parm->tmp[0],
5730                                  TCG_REG_CALL_STACK, ofs_slot0);
5731                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5732                            TCG_REG_CALL_STACK, ofs_slot0);
5733             }
5734         }
5735         break;
5736     default:
5737         g_assert_not_reached();
5738     }
5739 
5740     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5741 }
5742 
5743 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5744                                   bool load_sign,
5745                                   const TCGLdstHelperParam *parm)
5746 {
5747     MemOp mop = get_memop(ldst->oi);
5748     TCGMovExtend mov[2];
5749     int ofs_slot0;
5750 
5751     switch (ldst->type) {
5752     case TCG_TYPE_I64:
5753         if (TCG_TARGET_REG_BITS == 32) {
5754             break;
5755         }
5756         /* fall through */
5757 
5758     case TCG_TYPE_I32:
5759         mov[0].dst = ldst->datalo_reg;
5760         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5761         mov[0].dst_type = ldst->type;
5762         mov[0].src_type = TCG_TYPE_REG;
5763 
5764         /*
5765          * If load_sign, then we allowed the helper to perform the
5766          * appropriate sign extension to tcg_target_ulong, and all
5767          * we need now is a plain move.
5768          *
5769          * If they do not, then we expect the relevant extension
5770          * instruction to be no more expensive than a move, and
5771          * we thus save the icache etc by only using one of two
5772          * helper functions.
5773          */
5774         if (load_sign || !(mop & MO_SIGN)) {
5775             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5776                 mov[0].src_ext = MO_32;
5777             } else {
5778                 mov[0].src_ext = MO_64;
5779             }
5780         } else {
5781             mov[0].src_ext = mop & MO_SSIZE;
5782         }
5783         tcg_out_movext1(s, mov);
5784         return;
5785 
5786     case TCG_TYPE_I128:
5787         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5788         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5789         switch (TCG_TARGET_CALL_RET_I128) {
5790         case TCG_CALL_RET_NORMAL:
5791             break;
5792         case TCG_CALL_RET_BY_VEC:
5793             tcg_out_st(s, TCG_TYPE_V128,
5794                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5795                        TCG_REG_CALL_STACK, ofs_slot0);
5796             /* fall through */
5797         case TCG_CALL_RET_BY_REF:
5798             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5799                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5800             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5801                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5802             return;
5803         default:
5804             g_assert_not_reached();
5805         }
5806         break;
5807 
5808     default:
5809         g_assert_not_reached();
5810     }
5811 
5812     mov[0].dst = ldst->datalo_reg;
5813     mov[0].src =
5814         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5815     mov[0].dst_type = TCG_TYPE_REG;
5816     mov[0].src_type = TCG_TYPE_REG;
5817     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5818 
5819     mov[1].dst = ldst->datahi_reg;
5820     mov[1].src =
5821         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5822     mov[1].dst_type = TCG_TYPE_REG;
5823     mov[1].src_type = TCG_TYPE_REG;
5824     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5825 
5826     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5827 }
5828 
5829 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5830                                    const TCGLdstHelperParam *parm)
5831 {
5832     const TCGHelperInfo *info;
5833     const TCGCallArgumentLoc *loc;
5834     TCGMovExtend mov[4];
5835     TCGType data_type;
5836     unsigned next_arg, nmov, n;
5837     MemOp mop = get_memop(ldst->oi);
5838 
5839     switch (mop & MO_SIZE) {
5840     case MO_8:
5841     case MO_16:
5842     case MO_32:
5843         info = &info_helper_st32_mmu;
5844         data_type = TCG_TYPE_I32;
5845         break;
5846     case MO_64:
5847         info = &info_helper_st64_mmu;
5848         data_type = TCG_TYPE_I64;
5849         break;
5850     case MO_128:
5851         info = &info_helper_st128_mmu;
5852         data_type = TCG_TYPE_I128;
5853         break;
5854     default:
5855         g_assert_not_reached();
5856     }
5857 
5858     /* Defer env argument. */
5859     next_arg = 1;
5860     nmov = 0;
5861 
5862     /* Handle addr argument. */
5863     loc = &info->in[next_arg];
5864     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5865         /*
5866          * 32-bit host with 32-bit guest: zero-extend the guest address
5867          * to 64-bits for the helper by storing the low part.  Later,
5868          * after we have processed the register inputs, we will load a
5869          * zero for the high part.
5870          */
5871         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5872                                TCG_TYPE_I32, TCG_TYPE_I32,
5873                                ldst->addrlo_reg, -1);
5874         next_arg += 2;
5875         nmov += 1;
5876     } else {
5877         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5878                                    ldst->addrlo_reg, ldst->addrhi_reg);
5879         next_arg += n;
5880         nmov += n;
5881     }
5882 
5883     /* Handle data argument. */
5884     loc = &info->in[next_arg];
5885     switch (loc->kind) {
5886     case TCG_CALL_ARG_NORMAL:
5887     case TCG_CALL_ARG_EXTEND_U:
5888     case TCG_CALL_ARG_EXTEND_S:
5889         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5890                                    ldst->datalo_reg, ldst->datahi_reg);
5891         next_arg += n;
5892         nmov += n;
5893         tcg_out_helper_load_slots(s, nmov, mov, parm);
5894         break;
5895 
5896     case TCG_CALL_ARG_BY_REF:
5897         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5898         tcg_debug_assert(data_type == TCG_TYPE_I128);
5899         tcg_out_st(s, TCG_TYPE_I64,
5900                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5901                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5902         tcg_out_st(s, TCG_TYPE_I64,
5903                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5904                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5905 
5906         tcg_out_helper_load_slots(s, nmov, mov, parm);
5907 
5908         if (arg_slot_reg_p(loc->arg_slot)) {
5909             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
5910                              TCG_REG_CALL_STACK,
5911                              arg_slot_stk_ofs(loc->ref_slot));
5912         } else {
5913             tcg_debug_assert(parm->ntmp != 0);
5914             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
5915                              arg_slot_stk_ofs(loc->ref_slot));
5916             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5917                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
5918         }
5919         next_arg += 2;
5920         break;
5921 
5922     default:
5923         g_assert_not_reached();
5924     }
5925 
5926     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5927         /* Zero extend the address by loading a zero for the high part. */
5928         loc = &info->in[1 + !HOST_BIG_ENDIAN];
5929         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
5930     }
5931 
5932     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5933 }
5934 
5935 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
5936 {
5937     int i, start_words, num_insns;
5938     TCGOp *op;
5939 
5940     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
5941                  && qemu_log_in_addr_range(pc_start))) {
5942         FILE *logfile = qemu_log_trylock();
5943         if (logfile) {
5944             fprintf(logfile, "OP:\n");
5945             tcg_dump_ops(s, logfile, false);
5946             fprintf(logfile, "\n");
5947             qemu_log_unlock(logfile);
5948         }
5949     }
5950 
5951 #ifdef CONFIG_DEBUG_TCG
5952     /* Ensure all labels referenced have been emitted.  */
5953     {
5954         TCGLabel *l;
5955         bool error = false;
5956 
5957         QSIMPLEQ_FOREACH(l, &s->labels, next) {
5958             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
5959                 qemu_log_mask(CPU_LOG_TB_OP,
5960                               "$L%d referenced but not present.\n", l->id);
5961                 error = true;
5962             }
5963         }
5964         assert(!error);
5965     }
5966 #endif
5967 
5968     tcg_optimize(s);
5969 
5970     reachable_code_pass(s);
5971     liveness_pass_0(s);
5972     liveness_pass_1(s);
5973 
5974     if (s->nb_indirects > 0) {
5975         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
5976                      && qemu_log_in_addr_range(pc_start))) {
5977             FILE *logfile = qemu_log_trylock();
5978             if (logfile) {
5979                 fprintf(logfile, "OP before indirect lowering:\n");
5980                 tcg_dump_ops(s, logfile, false);
5981                 fprintf(logfile, "\n");
5982                 qemu_log_unlock(logfile);
5983             }
5984         }
5985 
5986         /* Replace indirect temps with direct temps.  */
5987         if (liveness_pass_2(s)) {
5988             /* If changes were made, re-run liveness.  */
5989             liveness_pass_1(s);
5990         }
5991     }
5992 
5993     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5994                  && qemu_log_in_addr_range(pc_start))) {
5995         FILE *logfile = qemu_log_trylock();
5996         if (logfile) {
5997             fprintf(logfile, "OP after optimization and liveness analysis:\n");
5998             tcg_dump_ops(s, logfile, true);
5999             fprintf(logfile, "\n");
6000             qemu_log_unlock(logfile);
6001         }
6002     }
6003 
6004     /* Initialize goto_tb jump offsets. */
6005     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6006     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6007     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6008     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6009 
6010     tcg_reg_alloc_start(s);
6011 
6012     /*
6013      * Reset the buffer pointers when restarting after overflow.
6014      * TODO: Move this into translate-all.c with the rest of the
6015      * buffer management.  Having only this done here is confusing.
6016      */
6017     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6018     s->code_ptr = s->code_buf;
6019 
6020 #ifdef TCG_TARGET_NEED_LDST_LABELS
6021     QSIMPLEQ_INIT(&s->ldst_labels);
6022 #endif
6023 #ifdef TCG_TARGET_NEED_POOL_LABELS
6024     s->pool_labels = NULL;
6025 #endif
6026 
6027     start_words = s->insn_start_words;
6028     s->gen_insn_data =
6029         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6030 
6031     tcg_out_tb_start(s);
6032 
6033     num_insns = -1;
6034     QTAILQ_FOREACH(op, &s->ops, link) {
6035         TCGOpcode opc = op->opc;
6036 
6037         switch (opc) {
6038         case INDEX_op_mov_i32:
6039         case INDEX_op_mov_i64:
6040         case INDEX_op_mov_vec:
6041             tcg_reg_alloc_mov(s, op);
6042             break;
6043         case INDEX_op_dup_vec:
6044             tcg_reg_alloc_dup(s, op);
6045             break;
6046         case INDEX_op_insn_start:
6047             if (num_insns >= 0) {
6048                 size_t off = tcg_current_code_size(s);
6049                 s->gen_insn_end_off[num_insns] = off;
6050                 /* Assert that we do not overflow our stored offset.  */
6051                 assert(s->gen_insn_end_off[num_insns] == off);
6052             }
6053             num_insns++;
6054             for (i = 0; i < start_words; ++i) {
6055                 s->gen_insn_data[num_insns * start_words + i] =
6056                     tcg_get_insn_start_param(op, i);
6057             }
6058             break;
6059         case INDEX_op_discard:
6060             temp_dead(s, arg_temp(op->args[0]));
6061             break;
6062         case INDEX_op_set_label:
6063             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6064             tcg_out_label(s, arg_label(op->args[0]));
6065             break;
6066         case INDEX_op_call:
6067             tcg_reg_alloc_call(s, op);
6068             break;
6069         case INDEX_op_exit_tb:
6070             tcg_out_exit_tb(s, op->args[0]);
6071             break;
6072         case INDEX_op_goto_tb:
6073             tcg_out_goto_tb(s, op->args[0]);
6074             break;
6075         case INDEX_op_dup2_vec:
6076             if (tcg_reg_alloc_dup2(s, op)) {
6077                 break;
6078             }
6079             /* fall through */
6080         default:
6081             /* Sanity check that we've not introduced any unhandled opcodes. */
6082             tcg_debug_assert(tcg_op_supported(opc));
6083             /* Note: in order to speed up the code, it would be much
6084                faster to have specialized register allocator functions for
6085                some common argument patterns */
6086             tcg_reg_alloc_op(s, op);
6087             break;
6088         }
6089         /* Test for (pending) buffer overflow.  The assumption is that any
6090            one operation beginning below the high water mark cannot overrun
6091            the buffer completely.  Thus we can test for overflow after
6092            generating code without having to check during generation.  */
6093         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6094             return -1;
6095         }
6096         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6097         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6098             return -2;
6099         }
6100     }
6101     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6102     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6103 
6104     /* Generate TB finalization at the end of block */
6105 #ifdef TCG_TARGET_NEED_LDST_LABELS
6106     i = tcg_out_ldst_finalize(s);
6107     if (i < 0) {
6108         return i;
6109     }
6110 #endif
6111 #ifdef TCG_TARGET_NEED_POOL_LABELS
6112     i = tcg_out_pool_finalize(s);
6113     if (i < 0) {
6114         return i;
6115     }
6116 #endif
6117     if (!tcg_resolve_relocs(s)) {
6118         return -2;
6119     }
6120 
6121 #ifndef CONFIG_TCG_INTERPRETER
6122     /* flush instruction cache */
6123     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6124                         (uintptr_t)s->code_buf,
6125                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6126 #endif
6127 
6128     return tcg_current_code_size(s);
6129 }
6130 
6131 #ifdef ELF_HOST_MACHINE
6132 /* In order to use this feature, the backend needs to do three things:
6133 
6134    (1) Define ELF_HOST_MACHINE to indicate both what value to
6135        put into the ELF image and to indicate support for the feature.
6136 
6137    (2) Define tcg_register_jit.  This should create a buffer containing
6138        the contents of a .debug_frame section that describes the post-
6139        prologue unwind info for the tcg machine.
6140 
6141    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6142 */
6143 
6144 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6145 typedef enum {
6146     JIT_NOACTION = 0,
6147     JIT_REGISTER_FN,
6148     JIT_UNREGISTER_FN
6149 } jit_actions_t;
6150 
6151 struct jit_code_entry {
6152     struct jit_code_entry *next_entry;
6153     struct jit_code_entry *prev_entry;
6154     const void *symfile_addr;
6155     uint64_t symfile_size;
6156 };
6157 
6158 struct jit_descriptor {
6159     uint32_t version;
6160     uint32_t action_flag;
6161     struct jit_code_entry *relevant_entry;
6162     struct jit_code_entry *first_entry;
6163 };
6164 
6165 void __jit_debug_register_code(void) __attribute__((noinline));
6166 void __jit_debug_register_code(void)
6167 {
6168     asm("");
6169 }
6170 
6171 /* Must statically initialize the version, because GDB may check
6172    the version before we can set it.  */
6173 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6174 
6175 /* End GDB interface.  */
6176 
6177 static int find_string(const char *strtab, const char *str)
6178 {
6179     const char *p = strtab + 1;
6180 
6181     while (1) {
6182         if (strcmp(p, str) == 0) {
6183             return p - strtab;
6184         }
6185         p += strlen(p) + 1;
6186     }
6187 }
6188 
6189 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6190                                  const void *debug_frame,
6191                                  size_t debug_frame_size)
6192 {
6193     struct __attribute__((packed)) DebugInfo {
6194         uint32_t  len;
6195         uint16_t  version;
6196         uint32_t  abbrev;
6197         uint8_t   ptr_size;
6198         uint8_t   cu_die;
6199         uint16_t  cu_lang;
6200         uintptr_t cu_low_pc;
6201         uintptr_t cu_high_pc;
6202         uint8_t   fn_die;
6203         char      fn_name[16];
6204         uintptr_t fn_low_pc;
6205         uintptr_t fn_high_pc;
6206         uint8_t   cu_eoc;
6207     };
6208 
6209     struct ElfImage {
6210         ElfW(Ehdr) ehdr;
6211         ElfW(Phdr) phdr;
6212         ElfW(Shdr) shdr[7];
6213         ElfW(Sym)  sym[2];
6214         struct DebugInfo di;
6215         uint8_t    da[24];
6216         char       str[80];
6217     };
6218 
6219     struct ElfImage *img;
6220 
6221     static const struct ElfImage img_template = {
6222         .ehdr = {
6223             .e_ident[EI_MAG0] = ELFMAG0,
6224             .e_ident[EI_MAG1] = ELFMAG1,
6225             .e_ident[EI_MAG2] = ELFMAG2,
6226             .e_ident[EI_MAG3] = ELFMAG3,
6227             .e_ident[EI_CLASS] = ELF_CLASS,
6228             .e_ident[EI_DATA] = ELF_DATA,
6229             .e_ident[EI_VERSION] = EV_CURRENT,
6230             .e_type = ET_EXEC,
6231             .e_machine = ELF_HOST_MACHINE,
6232             .e_version = EV_CURRENT,
6233             .e_phoff = offsetof(struct ElfImage, phdr),
6234             .e_shoff = offsetof(struct ElfImage, shdr),
6235             .e_ehsize = sizeof(ElfW(Shdr)),
6236             .e_phentsize = sizeof(ElfW(Phdr)),
6237             .e_phnum = 1,
6238             .e_shentsize = sizeof(ElfW(Shdr)),
6239             .e_shnum = ARRAY_SIZE(img->shdr),
6240             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6241 #ifdef ELF_HOST_FLAGS
6242             .e_flags = ELF_HOST_FLAGS,
6243 #endif
6244 #ifdef ELF_OSABI
6245             .e_ident[EI_OSABI] = ELF_OSABI,
6246 #endif
6247         },
6248         .phdr = {
6249             .p_type = PT_LOAD,
6250             .p_flags = PF_X,
6251         },
6252         .shdr = {
6253             [0] = { .sh_type = SHT_NULL },
6254             /* Trick: The contents of code_gen_buffer are not present in
6255                this fake ELF file; that got allocated elsewhere.  Therefore
6256                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6257                will not look for contents.  We can record any address.  */
6258             [1] = { /* .text */
6259                 .sh_type = SHT_NOBITS,
6260                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6261             },
6262             [2] = { /* .debug_info */
6263                 .sh_type = SHT_PROGBITS,
6264                 .sh_offset = offsetof(struct ElfImage, di),
6265                 .sh_size = sizeof(struct DebugInfo),
6266             },
6267             [3] = { /* .debug_abbrev */
6268                 .sh_type = SHT_PROGBITS,
6269                 .sh_offset = offsetof(struct ElfImage, da),
6270                 .sh_size = sizeof(img->da),
6271             },
6272             [4] = { /* .debug_frame */
6273                 .sh_type = SHT_PROGBITS,
6274                 .sh_offset = sizeof(struct ElfImage),
6275             },
6276             [5] = { /* .symtab */
6277                 .sh_type = SHT_SYMTAB,
6278                 .sh_offset = offsetof(struct ElfImage, sym),
6279                 .sh_size = sizeof(img->sym),
6280                 .sh_info = 1,
6281                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6282                 .sh_entsize = sizeof(ElfW(Sym)),
6283             },
6284             [6] = { /* .strtab */
6285                 .sh_type = SHT_STRTAB,
6286                 .sh_offset = offsetof(struct ElfImage, str),
6287                 .sh_size = sizeof(img->str),
6288             }
6289         },
6290         .sym = {
6291             [1] = { /* code_gen_buffer */
6292                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6293                 .st_shndx = 1,
6294             }
6295         },
6296         .di = {
6297             .len = sizeof(struct DebugInfo) - 4,
6298             .version = 2,
6299             .ptr_size = sizeof(void *),
6300             .cu_die = 1,
6301             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6302             .fn_die = 2,
6303             .fn_name = "code_gen_buffer"
6304         },
6305         .da = {
6306             1,          /* abbrev number (the cu) */
6307             0x11, 1,    /* DW_TAG_compile_unit, has children */
6308             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6309             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6310             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6311             0, 0,       /* end of abbrev */
6312             2,          /* abbrev number (the fn) */
6313             0x2e, 0,    /* DW_TAG_subprogram, no children */
6314             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6315             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6316             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6317             0, 0,       /* end of abbrev */
6318             0           /* no more abbrev */
6319         },
6320         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6321                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6322     };
6323 
6324     /* We only need a single jit entry; statically allocate it.  */
6325     static struct jit_code_entry one_entry;
6326 
6327     uintptr_t buf = (uintptr_t)buf_ptr;
6328     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6329     DebugFrameHeader *dfh;
6330 
6331     img = g_malloc(img_size);
6332     *img = img_template;
6333 
6334     img->phdr.p_vaddr = buf;
6335     img->phdr.p_paddr = buf;
6336     img->phdr.p_memsz = buf_size;
6337 
6338     img->shdr[1].sh_name = find_string(img->str, ".text");
6339     img->shdr[1].sh_addr = buf;
6340     img->shdr[1].sh_size = buf_size;
6341 
6342     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6343     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6344 
6345     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6346     img->shdr[4].sh_size = debug_frame_size;
6347 
6348     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6349     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6350 
6351     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6352     img->sym[1].st_value = buf;
6353     img->sym[1].st_size = buf_size;
6354 
6355     img->di.cu_low_pc = buf;
6356     img->di.cu_high_pc = buf + buf_size;
6357     img->di.fn_low_pc = buf;
6358     img->di.fn_high_pc = buf + buf_size;
6359 
6360     dfh = (DebugFrameHeader *)(img + 1);
6361     memcpy(dfh, debug_frame, debug_frame_size);
6362     dfh->fde.func_start = buf;
6363     dfh->fde.func_len = buf_size;
6364 
6365 #ifdef DEBUG_JIT
6366     /* Enable this block to be able to debug the ELF image file creation.
6367        One can use readelf, objdump, or other inspection utilities.  */
6368     {
6369         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6370         FILE *f = fopen(jit, "w+b");
6371         if (f) {
6372             if (fwrite(img, img_size, 1, f) != img_size) {
6373                 /* Avoid stupid unused return value warning for fwrite.  */
6374             }
6375             fclose(f);
6376         }
6377     }
6378 #endif
6379 
6380     one_entry.symfile_addr = img;
6381     one_entry.symfile_size = img_size;
6382 
6383     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6384     __jit_debug_descriptor.relevant_entry = &one_entry;
6385     __jit_debug_descriptor.first_entry = &one_entry;
6386     __jit_debug_register_code();
6387 }
6388 #else
6389 /* No support for the feature.  Provide the entry point expected by exec.c,
6390    and implement the internal function we declared earlier.  */
6391 
6392 static void tcg_register_jit_int(const void *buf, size_t size,
6393                                  const void *debug_frame,
6394                                  size_t debug_frame_size)
6395 {
6396 }
6397 
6398 void tcg_register_jit(const void *buf, size_t buf_size)
6399 {
6400 }
6401 #endif /* ELF_HOST_MACHINE */
6402 
6403 #if !TCG_TARGET_MAYBE_vec
6404 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6405 {
6406     g_assert_not_reached();
6407 }
6408 #endif
6409