xref: /openbmc/qemu/tcg/tcg.c (revision 5a572dd2cb4c96e51c7c0d6a549050ed33dea269)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/target_page.h"
38 #include "exec/translation-block.h"
39 #include "exec/tlb-common.h"
40 #include "tcg/startup.h"
41 #include "tcg/tcg-op-common.h"
42 
43 #if UINTPTR_MAX == UINT32_MAX
44 # define ELF_CLASS  ELFCLASS32
45 #else
46 # define ELF_CLASS  ELFCLASS64
47 #endif
48 #if HOST_BIG_ENDIAN
49 # define ELF_DATA   ELFDATA2MSB
50 #else
51 # define ELF_DATA   ELFDATA2LSB
52 #endif
53 
54 #include "elf.h"
55 #include "exec/log.h"
56 #include "tcg/tcg-ldst.h"
57 #include "tcg/tcg-temp-internal.h"
58 #include "tcg-internal.h"
59 #include "tcg/perf.h"
60 #include "tcg-has.h"
61 #ifdef CONFIG_USER_ONLY
62 #include "user/guest-base.h"
63 #endif
64 
65 /* Forward declarations for functions declared in tcg-target.c.inc and
66    used here. */
67 static void tcg_target_init(TCGContext *s);
68 static void tcg_target_qemu_prologue(TCGContext *s);
69 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
70                         intptr_t value, intptr_t addend);
71 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
72 
73 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
74 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
76 
77 /* The CIE and FDE header definitions will be common to all hosts.  */
78 typedef struct {
79     uint32_t len __attribute__((aligned((sizeof(void *)))));
80     uint32_t id;
81     uint8_t version;
82     char augmentation[1];
83     uint8_t code_align;
84     uint8_t data_align;
85     uint8_t return_column;
86 } DebugFrameCIE;
87 
88 typedef struct QEMU_PACKED {
89     uint32_t len __attribute__((aligned((sizeof(void *)))));
90     uint32_t cie_offset;
91     uintptr_t func_start;
92     uintptr_t func_len;
93 } DebugFrameFDEHeader;
94 
95 typedef struct QEMU_PACKED {
96     DebugFrameCIE cie;
97     DebugFrameFDEHeader fde;
98 } DebugFrameHeader;
99 
100 struct TCGLabelQemuLdst {
101     bool is_ld;             /* qemu_ld: true, qemu_st: false */
102     MemOpIdx oi;
103     TCGType type;           /* result type of a load */
104     TCGReg addr_reg;        /* reg index for guest virtual addr */
105     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
106     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
107     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
108     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
109     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
110 };
111 
112 static void tcg_register_jit_int(const void *buf, size_t size,
113                                  const void *debug_frame,
114                                  size_t debug_frame_size)
115     __attribute__((unused));
116 
117 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
118 static void tcg_out_tb_start(TCGContext *s);
119 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
120                        intptr_t arg2);
121 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
122 static void tcg_out_movi(TCGContext *s, TCGType type,
123                          TCGReg ret, tcg_target_long arg);
124 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
133 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
134 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
135 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
136 static void tcg_out_goto_tb(TCGContext *s, int which);
137 static void tcg_out_goto_ptr(TCGContext *s, TCGReg dest);
138 static void tcg_out_mb(TCGContext *s, unsigned bar);
139 static void tcg_out_br(TCGContext *s, TCGLabel *l);
140 static void tcg_out_set_carry(TCGContext *s);
141 static void tcg_out_set_borrow(TCGContext *s);
142 #if TCG_TARGET_MAYBE_vec
143 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
144                             TCGReg dst, TCGReg src);
145 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
146                              TCGReg dst, TCGReg base, intptr_t offset);
147 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
148                              TCGReg dst, int64_t arg);
149 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
150                            unsigned vecl, unsigned vece,
151                            const TCGArg args[TCG_MAX_OP_ARGS],
152                            const int const_args[TCG_MAX_OP_ARGS]);
153 #else
154 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
155                                    TCGReg dst, TCGReg src)
156 {
157     g_assert_not_reached();
158 }
159 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
160                                     TCGReg dst, TCGReg base, intptr_t offset)
161 {
162     g_assert_not_reached();
163 }
164 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
165                                     TCGReg dst, int64_t arg)
166 {
167     g_assert_not_reached();
168 }
169 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
170                                   unsigned vecl, unsigned vece,
171                                   const TCGArg args[TCG_MAX_OP_ARGS],
172                                   const int const_args[TCG_MAX_OP_ARGS])
173 {
174     g_assert_not_reached();
175 }
176 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
177 {
178     return 0;
179 }
180 #endif
181 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
182                        intptr_t arg2);
183 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
184                         TCGReg base, intptr_t ofs);
185 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
186                          const TCGHelperInfo *info);
187 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
188 static bool tcg_target_const_match(int64_t val, int ct,
189                                    TCGType type, TCGCond cond, int vece);
190 
191 #ifndef CONFIG_USER_ONLY
192 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
193 #endif
194 
195 typedef struct TCGLdstHelperParam {
196     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
197     unsigned ntmp;
198     int tmp[3];
199 } TCGLdstHelperParam;
200 
201 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
202                                    const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
205                                   bool load_sign, const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
208                                    const TCGLdstHelperParam *p)
209     __attribute__((unused));
210 
211 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
212     [MO_UB] = helper_ldub_mmu,
213     [MO_SB] = helper_ldsb_mmu,
214     [MO_UW] = helper_lduw_mmu,
215     [MO_SW] = helper_ldsw_mmu,
216     [MO_UL] = helper_ldul_mmu,
217     [MO_UQ] = helper_ldq_mmu,
218 #if TCG_TARGET_REG_BITS == 64
219     [MO_SL] = helper_ldsl_mmu,
220     [MO_128] = helper_ld16_mmu,
221 #endif
222 };
223 
224 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
225     [MO_8]  = helper_stb_mmu,
226     [MO_16] = helper_stw_mmu,
227     [MO_32] = helper_stl_mmu,
228     [MO_64] = helper_stq_mmu,
229 #if TCG_TARGET_REG_BITS == 64
230     [MO_128] = helper_st16_mmu,
231 #endif
232 };
233 
234 typedef struct {
235     MemOp atom;   /* lg2 bits of atomicity required */
236     MemOp align;  /* lg2 bits of alignment to use */
237 } TCGAtomAlign;
238 
239 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
240                                            MemOp host_atom, bool allow_two_ops)
241     __attribute__((unused));
242 
243 #ifdef CONFIG_USER_ONLY
244 bool tcg_use_softmmu;
245 #endif
246 
247 TCGContext tcg_init_ctx;
248 __thread TCGContext *tcg_ctx;
249 
250 TCGContext **tcg_ctxs;
251 unsigned int tcg_cur_ctxs;
252 unsigned int tcg_max_ctxs;
253 TCGv_env tcg_env;
254 const void *tcg_code_gen_epilogue;
255 uintptr_t tcg_splitwx_diff;
256 
257 #ifndef CONFIG_TCG_INTERPRETER
258 tcg_prologue_fn *tcg_qemu_tb_exec;
259 #endif
260 
261 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
262 static TCGRegSet tcg_target_call_clobber_regs;
263 
264 #if TCG_TARGET_INSN_UNIT_SIZE == 1
265 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
266 {
267     *s->code_ptr++ = v;
268 }
269 
270 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
271                                                       uint8_t v)
272 {
273     *p = v;
274 }
275 #endif
276 
277 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
278 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
279 {
280     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
281         *s->code_ptr++ = v;
282     } else {
283         tcg_insn_unit *p = s->code_ptr;
284         memcpy(p, &v, sizeof(v));
285         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
286     }
287 }
288 
289 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
290                                                        uint16_t v)
291 {
292     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
293         *p = v;
294     } else {
295         memcpy(p, &v, sizeof(v));
296     }
297 }
298 #endif
299 
300 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
301 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
302 {
303     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
304         *s->code_ptr++ = v;
305     } else {
306         tcg_insn_unit *p = s->code_ptr;
307         memcpy(p, &v, sizeof(v));
308         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
309     }
310 }
311 
312 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
313                                                        uint32_t v)
314 {
315     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
316         *p = v;
317     } else {
318         memcpy(p, &v, sizeof(v));
319     }
320 }
321 #endif
322 
323 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
324 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
325 {
326     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
327         *s->code_ptr++ = v;
328     } else {
329         tcg_insn_unit *p = s->code_ptr;
330         memcpy(p, &v, sizeof(v));
331         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
332     }
333 }
334 
335 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
336                                                        uint64_t v)
337 {
338     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
339         *p = v;
340     } else {
341         memcpy(p, &v, sizeof(v));
342     }
343 }
344 #endif
345 
346 /* label relocation processing */
347 
348 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
349                           TCGLabel *l, intptr_t addend)
350 {
351     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
352 
353     r->type = type;
354     r->ptr = code_ptr;
355     r->addend = addend;
356     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
357 }
358 
359 static void tcg_out_label(TCGContext *s, TCGLabel *l)
360 {
361     tcg_debug_assert(!l->has_value);
362     l->has_value = 1;
363     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
364 }
365 
366 TCGLabel *gen_new_label(void)
367 {
368     TCGContext *s = tcg_ctx;
369     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
370 
371     memset(l, 0, sizeof(TCGLabel));
372     l->id = s->nb_labels++;
373     QSIMPLEQ_INIT(&l->branches);
374     QSIMPLEQ_INIT(&l->relocs);
375 
376     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
377 
378     return l;
379 }
380 
381 static bool tcg_resolve_relocs(TCGContext *s)
382 {
383     TCGLabel *l;
384 
385     QSIMPLEQ_FOREACH(l, &s->labels, next) {
386         TCGRelocation *r;
387         uintptr_t value = l->u.value;
388 
389         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
390             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
391                 return false;
392             }
393         }
394     }
395     return true;
396 }
397 
398 static void set_jmp_reset_offset(TCGContext *s, int which)
399 {
400     /*
401      * We will check for overflow at the end of the opcode loop in
402      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
403      */
404     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
405 }
406 
407 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
408 {
409     /*
410      * We will check for overflow at the end of the opcode loop in
411      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
412      */
413     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
414 }
415 
416 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
417 {
418     /*
419      * Return the read-execute version of the pointer, for the benefit
420      * of any pc-relative addressing mode.
421      */
422     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
423 }
424 
425 static int __attribute__((unused))
426 tlb_mask_table_ofs(TCGContext *s, int which)
427 {
428     int fi = mmuidx_to_fast_index(which);
429     return (offsetof(CPUNegativeOffsetState, tlb.f[fi]) -
430             sizeof(CPUNegativeOffsetState));
431 }
432 
433 /* Signal overflow, starting over with fewer guest insns. */
434 static G_NORETURN
435 void tcg_raise_tb_overflow(TCGContext *s)
436 {
437     siglongjmp(s->jmp_trans, -2);
438 }
439 
440 /*
441  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
442  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
443  *
444  * However, tcg_out_helper_load_slots reuses this field to hold an
445  * argument slot number (which may designate a argument register or an
446  * argument stack slot), converting to TCGReg once all arguments that
447  * are destined for the stack are processed.
448  */
449 typedef struct TCGMovExtend {
450     unsigned dst;
451     TCGReg src;
452     TCGType dst_type;
453     TCGType src_type;
454     MemOp src_ext;
455 } TCGMovExtend;
456 
457 /**
458  * tcg_out_movext -- move and extend
459  * @s: tcg context
460  * @dst_type: integral type for destination
461  * @dst: destination register
462  * @src_type: integral type for source
463  * @src_ext: extension to apply to source
464  * @src: source register
465  *
466  * Move or extend @src into @dst, depending on @src_ext and the types.
467  */
468 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
469                            TCGType src_type, MemOp src_ext, TCGReg src)
470 {
471     switch (src_ext) {
472     case MO_UB:
473         tcg_out_ext8u(s, dst, src);
474         break;
475     case MO_SB:
476         tcg_out_ext8s(s, dst_type, dst, src);
477         break;
478     case MO_UW:
479         tcg_out_ext16u(s, dst, src);
480         break;
481     case MO_SW:
482         tcg_out_ext16s(s, dst_type, dst, src);
483         break;
484     case MO_UL:
485     case MO_SL:
486         if (dst_type == TCG_TYPE_I32) {
487             if (src_type == TCG_TYPE_I32) {
488                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
489             } else {
490                 tcg_out_extrl_i64_i32(s, dst, src);
491             }
492         } else if (src_type == TCG_TYPE_I32) {
493             if (src_ext & MO_SIGN) {
494                 tcg_out_exts_i32_i64(s, dst, src);
495             } else {
496                 tcg_out_extu_i32_i64(s, dst, src);
497             }
498         } else {
499             if (src_ext & MO_SIGN) {
500                 tcg_out_ext32s(s, dst, src);
501             } else {
502                 tcg_out_ext32u(s, dst, src);
503             }
504         }
505         break;
506     case MO_UQ:
507         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
508         if (dst_type == TCG_TYPE_I32) {
509             tcg_out_extrl_i64_i32(s, dst, src);
510         } else {
511             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
512         }
513         break;
514     default:
515         g_assert_not_reached();
516     }
517 }
518 
519 /* Minor variations on a theme, using a structure. */
520 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
521                                     TCGReg src)
522 {
523     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
524 }
525 
526 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
527 {
528     tcg_out_movext1_new_src(s, i, i->src);
529 }
530 
531 /**
532  * tcg_out_movext2 -- move and extend two pair
533  * @s: tcg context
534  * @i1: first move description
535  * @i2: second move description
536  * @scratch: temporary register, or -1 for none
537  *
538  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
539  * between the sources and destinations.
540  */
541 
542 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
543                             const TCGMovExtend *i2, int scratch)
544 {
545     TCGReg src1 = i1->src;
546     TCGReg src2 = i2->src;
547 
548     if (i1->dst != src2) {
549         tcg_out_movext1(s, i1);
550         tcg_out_movext1(s, i2);
551         return;
552     }
553     if (i2->dst == src1) {
554         TCGType src1_type = i1->src_type;
555         TCGType src2_type = i2->src_type;
556 
557         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
558             /* The data is now in the correct registers, now extend. */
559             src1 = i2->src;
560             src2 = i1->src;
561         } else {
562             tcg_debug_assert(scratch >= 0);
563             tcg_out_mov(s, src1_type, scratch, src1);
564             src1 = scratch;
565         }
566     }
567     tcg_out_movext1_new_src(s, i2, src2);
568     tcg_out_movext1_new_src(s, i1, src1);
569 }
570 
571 /**
572  * tcg_out_movext3 -- move and extend three pair
573  * @s: tcg context
574  * @i1: first move description
575  * @i2: second move description
576  * @i3: third move description
577  * @scratch: temporary register, or -1 for none
578  *
579  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
580  * between the sources and destinations.
581  */
582 
583 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
584                             const TCGMovExtend *i2, const TCGMovExtend *i3,
585                             int scratch)
586 {
587     TCGReg src1 = i1->src;
588     TCGReg src2 = i2->src;
589     TCGReg src3 = i3->src;
590 
591     if (i1->dst != src2 && i1->dst != src3) {
592         tcg_out_movext1(s, i1);
593         tcg_out_movext2(s, i2, i3, scratch);
594         return;
595     }
596     if (i2->dst != src1 && i2->dst != src3) {
597         tcg_out_movext1(s, i2);
598         tcg_out_movext2(s, i1, i3, scratch);
599         return;
600     }
601     if (i3->dst != src1 && i3->dst != src2) {
602         tcg_out_movext1(s, i3);
603         tcg_out_movext2(s, i1, i2, scratch);
604         return;
605     }
606 
607     /*
608      * There is a cycle.  Since there are only 3 nodes, the cycle is
609      * either "clockwise" or "anti-clockwise", and can be solved with
610      * a single scratch or two xchg.
611      */
612     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
613         /* "Clockwise" */
614         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
615             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
616             /* The data is now in the correct registers, now extend. */
617             tcg_out_movext1_new_src(s, i1, i1->dst);
618             tcg_out_movext1_new_src(s, i2, i2->dst);
619             tcg_out_movext1_new_src(s, i3, i3->dst);
620         } else {
621             tcg_debug_assert(scratch >= 0);
622             tcg_out_mov(s, i1->src_type, scratch, src1);
623             tcg_out_movext1(s, i3);
624             tcg_out_movext1(s, i2);
625             tcg_out_movext1_new_src(s, i1, scratch);
626         }
627     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
628         /* "Anti-clockwise" */
629         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
630             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
631             /* The data is now in the correct registers, now extend. */
632             tcg_out_movext1_new_src(s, i1, i1->dst);
633             tcg_out_movext1_new_src(s, i2, i2->dst);
634             tcg_out_movext1_new_src(s, i3, i3->dst);
635         } else {
636             tcg_debug_assert(scratch >= 0);
637             tcg_out_mov(s, i1->src_type, scratch, src1);
638             tcg_out_movext1(s, i2);
639             tcg_out_movext1(s, i3);
640             tcg_out_movext1_new_src(s, i1, scratch);
641         }
642     } else {
643         g_assert_not_reached();
644     }
645 }
646 
647 /*
648  * Allocate a new TCGLabelQemuLdst entry.
649  */
650 
651 __attribute__((unused))
652 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
653 {
654     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
655 
656     memset(l, 0, sizeof(*l));
657     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
658 
659     return l;
660 }
661 
662 /*
663  * Allocate new constant pool entries.
664  */
665 
666 typedef struct TCGLabelPoolData {
667     struct TCGLabelPoolData *next;
668     tcg_insn_unit *label;
669     intptr_t addend;
670     int rtype;
671     unsigned nlong;
672     tcg_target_ulong data[];
673 } TCGLabelPoolData;
674 
675 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
676                                         tcg_insn_unit *label, intptr_t addend)
677 {
678     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
679                                      + sizeof(tcg_target_ulong) * nlong);
680 
681     n->label = label;
682     n->addend = addend;
683     n->rtype = rtype;
684     n->nlong = nlong;
685     return n;
686 }
687 
688 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
689 {
690     TCGLabelPoolData *i, **pp;
691     int nlong = n->nlong;
692 
693     /* Insertion sort on the pool.  */
694     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
695         if (nlong > i->nlong) {
696             break;
697         }
698         if (nlong < i->nlong) {
699             continue;
700         }
701         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
702             break;
703         }
704     }
705     n->next = *pp;
706     *pp = n;
707 }
708 
709 /* The "usual" for generic integer code.  */
710 __attribute__((unused))
711 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
712                            tcg_insn_unit *label, intptr_t addend)
713 {
714     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
715     n->data[0] = d;
716     new_pool_insert(s, n);
717 }
718 
719 /* For v64 or v128, depending on the host.  */
720 __attribute__((unused))
721 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
722                         intptr_t addend, tcg_target_ulong d0,
723                         tcg_target_ulong d1)
724 {
725     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
726     n->data[0] = d0;
727     n->data[1] = d1;
728     new_pool_insert(s, n);
729 }
730 
731 /* For v128 or v256, depending on the host.  */
732 __attribute__((unused))
733 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
734                         intptr_t addend, tcg_target_ulong d0,
735                         tcg_target_ulong d1, tcg_target_ulong d2,
736                         tcg_target_ulong d3)
737 {
738     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
739     n->data[0] = d0;
740     n->data[1] = d1;
741     n->data[2] = d2;
742     n->data[3] = d3;
743     new_pool_insert(s, n);
744 }
745 
746 /* For v256, for 32-bit host.  */
747 __attribute__((unused))
748 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
749                         intptr_t addend, tcg_target_ulong d0,
750                         tcg_target_ulong d1, tcg_target_ulong d2,
751                         tcg_target_ulong d3, tcg_target_ulong d4,
752                         tcg_target_ulong d5, tcg_target_ulong d6,
753                         tcg_target_ulong d7)
754 {
755     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
756     n->data[0] = d0;
757     n->data[1] = d1;
758     n->data[2] = d2;
759     n->data[3] = d3;
760     n->data[4] = d4;
761     n->data[5] = d5;
762     n->data[6] = d6;
763     n->data[7] = d7;
764     new_pool_insert(s, n);
765 }
766 
767 /*
768  * Generate TB finalization at the end of block
769  */
770 
771 static int tcg_out_ldst_finalize(TCGContext *s)
772 {
773     TCGLabelQemuLdst *lb;
774 
775     /* qemu_ld/st slow paths */
776     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
777         if (lb->is_ld
778             ? !tcg_out_qemu_ld_slow_path(s, lb)
779             : !tcg_out_qemu_st_slow_path(s, lb)) {
780             return -2;
781         }
782 
783         /*
784          * Test for (pending) buffer overflow.  The assumption is that any
785          * one operation beginning below the high water mark cannot overrun
786          * the buffer completely.  Thus we can test for overflow after
787          * generating code without having to check during generation.
788          */
789         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
790             return -1;
791         }
792     }
793     return 0;
794 }
795 
796 static int tcg_out_pool_finalize(TCGContext *s)
797 {
798     TCGLabelPoolData *p = s->pool_labels;
799     TCGLabelPoolData *l = NULL;
800     void *a;
801 
802     if (p == NULL) {
803         return 0;
804     }
805 
806     /*
807      * ??? Round up to qemu_icache_linesize, but then do not round
808      * again when allocating the next TranslationBlock structure.
809      */
810     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
811                          sizeof(tcg_target_ulong) * p->nlong);
812     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
813     s->data_gen_ptr = a;
814 
815     for (; p != NULL; p = p->next) {
816         size_t size = sizeof(tcg_target_ulong) * p->nlong;
817         uintptr_t value;
818 
819         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
820             if (unlikely(a > s->code_gen_highwater)) {
821                 return -1;
822             }
823             memcpy(a, p->data, size);
824             a += size;
825             l = p;
826         }
827 
828         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
829         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
830             return -2;
831         }
832     }
833 
834     s->code_ptr = a;
835     return 0;
836 }
837 
838 #define C_PFX1(P, A)                    P##A
839 #define C_PFX2(P, A, B)                 P##A##_##B
840 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
841 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
842 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
843 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
844 
845 /* Define an enumeration for the various combinations. */
846 
847 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
848 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
849 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
850 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
851 
852 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
853 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
854 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
855 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
856 
857 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
858 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
859 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
860 
861 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
862 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
863 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
864 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
865 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
866 
867 typedef enum {
868     C_Dynamic = -2,
869     C_NotImplemented = -1,
870 #include "tcg-target-con-set.h"
871 } TCGConstraintSetIndex;
872 
873 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
874 
875 #undef C_O0_I1
876 #undef C_O0_I2
877 #undef C_O0_I3
878 #undef C_O0_I4
879 #undef C_O1_I1
880 #undef C_O1_I2
881 #undef C_O1_I3
882 #undef C_O1_I4
883 #undef C_N1_I2
884 #undef C_N1O1_I1
885 #undef C_N2_I1
886 #undef C_O2_I1
887 #undef C_O2_I2
888 #undef C_O2_I3
889 #undef C_O2_I4
890 #undef C_N1_O1_I4
891 
892 /* Put all of the constraint sets into an array, indexed by the enum. */
893 
894 typedef struct TCGConstraintSet {
895     uint8_t nb_oargs, nb_iargs;
896     const char *args_ct_str[TCG_MAX_OP_ARGS];
897 } TCGConstraintSet;
898 
899 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
900 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
901 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
902 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
903 
904 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
905 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
906 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
907 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
908 
909 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
910 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
911 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
912 
913 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
914 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
915 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
916 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
917 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
918 
919 static const TCGConstraintSet constraint_sets[] = {
920 #include "tcg-target-con-set.h"
921 };
922 
923 #undef C_O0_I1
924 #undef C_O0_I2
925 #undef C_O0_I3
926 #undef C_O0_I4
927 #undef C_O1_I1
928 #undef C_O1_I2
929 #undef C_O1_I3
930 #undef C_O1_I4
931 #undef C_N1_I2
932 #undef C_N1O1_I1
933 #undef C_N2_I1
934 #undef C_O2_I1
935 #undef C_O2_I2
936 #undef C_O2_I3
937 #undef C_O2_I4
938 #undef C_N1_O1_I4
939 
940 /* Expand the enumerator to be returned from tcg_target_op_def(). */
941 
942 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
943 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
944 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
945 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
946 
947 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
948 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
949 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
950 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
951 
952 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
953 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
954 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
955 
956 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
957 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
958 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
959 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
960 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
961 
962 /*
963  * TCGOutOp is the base class for a set of structures that describe how
964  * to generate code for a given TCGOpcode.
965  *
966  * @static_constraint:
967  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
968  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
969  *                     based on any of @type, @flags, or host isa.
970  *   Otherwise:        The register allocation constrains for the TCGOpcode.
971  *
972  * Subclasses of TCGOutOp will define a set of output routines that may
973  * be used.  Such routines will often be selected by the set of registers
974  * and constants that come out of register allocation.  The set of
975  * routines that are provided will guide the set of constraints that are
976  * legal.  In particular, assume that tcg_optimize() has done its job in
977  * swapping commutative operands and folding operations for which all
978  * operands are constant.
979  */
980 typedef struct TCGOutOp {
981     TCGConstraintSetIndex static_constraint;
982     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
983 } TCGOutOp;
984 
985 typedef struct TCGOutOpAddSubCarry {
986     TCGOutOp base;
987     void (*out_rrr)(TCGContext *s, TCGType type,
988                     TCGReg a0, TCGReg a1, TCGReg a2);
989     void (*out_rri)(TCGContext *s, TCGType type,
990                     TCGReg a0, TCGReg a1, tcg_target_long a2);
991     void (*out_rir)(TCGContext *s, TCGType type,
992                     TCGReg a0, tcg_target_long a1, TCGReg a2);
993     void (*out_rii)(TCGContext *s, TCGType type,
994                     TCGReg a0, tcg_target_long a1, tcg_target_long a2);
995 } TCGOutOpAddSubCarry;
996 
997 typedef struct TCGOutOpBinary {
998     TCGOutOp base;
999     void (*out_rrr)(TCGContext *s, TCGType type,
1000                     TCGReg a0, TCGReg a1, TCGReg a2);
1001     void (*out_rri)(TCGContext *s, TCGType type,
1002                     TCGReg a0, TCGReg a1, tcg_target_long a2);
1003 } TCGOutOpBinary;
1004 
1005 typedef struct TCGOutOpBrcond {
1006     TCGOutOp base;
1007     void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond,
1008                    TCGReg a1, TCGReg a2, TCGLabel *label);
1009     void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond,
1010                    TCGReg a1, tcg_target_long a2, TCGLabel *label);
1011 } TCGOutOpBrcond;
1012 
1013 typedef struct TCGOutOpBrcond2 {
1014     TCGOutOp base;
1015     void (*out)(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
1016                 TCGArg bl, bool const_bl,
1017                 TCGArg bh, bool const_bh, TCGLabel *l);
1018 } TCGOutOpBrcond2;
1019 
1020 typedef struct TCGOutOpBswap {
1021     TCGOutOp base;
1022     void (*out_rr)(TCGContext *s, TCGType type,
1023                    TCGReg a0, TCGReg a1, unsigned flags);
1024 } TCGOutOpBswap;
1025 
1026 typedef struct TCGOutOpDeposit {
1027     TCGOutOp base;
1028     void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1029                     TCGReg a2, unsigned ofs, unsigned len);
1030     void (*out_rri)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1031                     tcg_target_long a2, unsigned ofs, unsigned len);
1032     void (*out_rzr)(TCGContext *s, TCGType type, TCGReg a0,
1033                     TCGReg a2, unsigned ofs, unsigned len);
1034 } TCGOutOpDeposit;
1035 
1036 typedef struct TCGOutOpDivRem {
1037     TCGOutOp base;
1038     void (*out_rr01r)(TCGContext *s, TCGType type,
1039                       TCGReg a0, TCGReg a1, TCGReg a4);
1040 } TCGOutOpDivRem;
1041 
1042 typedef struct TCGOutOpExtract {
1043     TCGOutOp base;
1044     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1045                    unsigned ofs, unsigned len);
1046 } TCGOutOpExtract;
1047 
1048 typedef struct TCGOutOpExtract2 {
1049     TCGOutOp base;
1050     void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1051                     TCGReg a2, unsigned shr);
1052 } TCGOutOpExtract2;
1053 
1054 typedef struct TCGOutOpLoad {
1055     TCGOutOp base;
1056     void (*out)(TCGContext *s, TCGType type, TCGReg dest,
1057                 TCGReg base, intptr_t offset);
1058 } TCGOutOpLoad;
1059 
1060 typedef struct TCGOutOpMovcond {
1061     TCGOutOp base;
1062     void (*out)(TCGContext *s, TCGType type, TCGCond cond,
1063                 TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2,
1064                 TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf);
1065 } TCGOutOpMovcond;
1066 
1067 typedef struct TCGOutOpMul2 {
1068     TCGOutOp base;
1069     void (*out_rrrr)(TCGContext *s, TCGType type,
1070                      TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3);
1071 } TCGOutOpMul2;
1072 
1073 typedef struct TCGOutOpQemuLdSt {
1074     TCGOutOp base;
1075     void (*out)(TCGContext *s, TCGType type, TCGReg dest,
1076                 TCGReg addr, MemOpIdx oi);
1077 } TCGOutOpQemuLdSt;
1078 
1079 typedef struct TCGOutOpQemuLdSt2 {
1080     TCGOutOp base;
1081     void (*out)(TCGContext *s, TCGType type, TCGReg dlo, TCGReg dhi,
1082                 TCGReg addr, MemOpIdx oi);
1083 } TCGOutOpQemuLdSt2;
1084 
1085 typedef struct TCGOutOpUnary {
1086     TCGOutOp base;
1087     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
1088 } TCGOutOpUnary;
1089 
1090 typedef struct TCGOutOpSetcond {
1091     TCGOutOp base;
1092     void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond,
1093                     TCGReg ret, TCGReg a1, TCGReg a2);
1094     void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond,
1095                     TCGReg ret, TCGReg a1, tcg_target_long a2);
1096 } TCGOutOpSetcond;
1097 
1098 typedef struct TCGOutOpSetcond2 {
1099     TCGOutOp base;
1100     void (*out)(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah,
1101                 TCGArg bl, bool const_bl, TCGArg bh, bool const_bh);
1102 } TCGOutOpSetcond2;
1103 
1104 typedef struct TCGOutOpStore {
1105     TCGOutOp base;
1106     void (*out_r)(TCGContext *s, TCGType type, TCGReg data,
1107                   TCGReg base, intptr_t offset);
1108     void (*out_i)(TCGContext *s, TCGType type, tcg_target_long data,
1109                   TCGReg base, intptr_t offset);
1110 } TCGOutOpStore;
1111 
1112 typedef struct TCGOutOpSubtract {
1113     TCGOutOp base;
1114     void (*out_rrr)(TCGContext *s, TCGType type,
1115                     TCGReg a0, TCGReg a1, TCGReg a2);
1116     void (*out_rir)(TCGContext *s, TCGType type,
1117                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1118 } TCGOutOpSubtract;
1119 
1120 #include "tcg-target.c.inc"
1121 
1122 #ifndef CONFIG_TCG_INTERPRETER
1123 /* Validate CPUTLBDescFast placement. */
1124 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1125                         sizeof(CPUNegativeOffsetState))
1126                   < MIN_TLB_MASK_TABLE_OFS);
1127 #endif
1128 
1129 #if TCG_TARGET_REG_BITS == 64
1130 /*
1131  * We require these functions for slow-path function calls.
1132  * Adapt them generically for opcode output.
1133  */
1134 
1135 static void tgen_exts_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
1136 {
1137     tcg_out_exts_i32_i64(s, a0, a1);
1138 }
1139 
1140 static const TCGOutOpUnary outop_exts_i32_i64 = {
1141     .base.static_constraint = C_O1_I1(r, r),
1142     .out_rr = tgen_exts_i32_i64,
1143 };
1144 
1145 static void tgen_extu_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
1146 {
1147     tcg_out_extu_i32_i64(s, a0, a1);
1148 }
1149 
1150 static const TCGOutOpUnary outop_extu_i32_i64 = {
1151     .base.static_constraint = C_O1_I1(r, r),
1152     .out_rr = tgen_extu_i32_i64,
1153 };
1154 
1155 static void tgen_extrl_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
1156 {
1157     tcg_out_extrl_i64_i32(s, a0, a1);
1158 }
1159 
1160 static const TCGOutOpUnary outop_extrl_i64_i32 = {
1161     .base.static_constraint = C_O1_I1(r, r),
1162     .out_rr = TCG_TARGET_HAS_extr_i64_i32 ? tgen_extrl_i64_i32 : NULL,
1163 };
1164 #endif
1165 
1166 static const TCGOutOp outop_goto_ptr = {
1167     .static_constraint = C_O0_I1(r),
1168 };
1169 
1170 static const TCGOutOpLoad outop_ld = {
1171     .base.static_constraint = C_O1_I1(r, r),
1172     .out = tcg_out_ld,
1173 };
1174 
1175 /*
1176  * Register V as the TCGOutOp for O.
1177  * This verifies that V is of type T, otherwise give a nice compiler error.
1178  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1179  */
1180 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1181 
1182 /* Register allocation descriptions for every TCGOpcode. */
1183 static const TCGOutOp * const all_outop[NB_OPS] = {
1184     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1185     OUTOP(INDEX_op_addci, TCGOutOpAddSubCarry, outop_addci),
1186     OUTOP(INDEX_op_addcio, TCGOutOpBinary, outop_addcio),
1187     OUTOP(INDEX_op_addco, TCGOutOpBinary, outop_addco),
1188     /* addc1o is implemented with set_carry + addcio */
1189     OUTOP(INDEX_op_addc1o, TCGOutOpBinary, outop_addcio),
1190     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1191     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1192     OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond),
1193     OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16),
1194     OUTOP(INDEX_op_bswap32, TCGOutOpBswap, outop_bswap32),
1195     OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz),
1196     OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop),
1197     OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz),
1198     OUTOP(INDEX_op_deposit, TCGOutOpDeposit, outop_deposit),
1199     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1200     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1201     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1202     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1203     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1204     OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract),
1205     OUTOP(INDEX_op_extract2, TCGOutOpExtract2, outop_extract2),
1206     OUTOP(INDEX_op_ld8u, TCGOutOpLoad, outop_ld8u),
1207     OUTOP(INDEX_op_ld8s, TCGOutOpLoad, outop_ld8s),
1208     OUTOP(INDEX_op_ld16u, TCGOutOpLoad, outop_ld16u),
1209     OUTOP(INDEX_op_ld16s, TCGOutOpLoad, outop_ld16s),
1210     OUTOP(INDEX_op_ld, TCGOutOpLoad, outop_ld),
1211     OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond),
1212     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1213     OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2),
1214     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1215     OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2),
1216     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1217     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1218     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1219     OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond),
1220     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1221     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1222     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1223     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1224     OUTOP(INDEX_op_qemu_ld, TCGOutOpQemuLdSt, outop_qemu_ld),
1225     OUTOP(INDEX_op_qemu_ld2, TCGOutOpQemuLdSt2, outop_qemu_ld2),
1226     OUTOP(INDEX_op_qemu_st, TCGOutOpQemuLdSt, outop_qemu_st),
1227     OUTOP(INDEX_op_qemu_st2, TCGOutOpQemuLdSt2, outop_qemu_st2),
1228     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1229     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1230     OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl),
1231     OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr),
1232     OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1233     OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond),
1234     OUTOP(INDEX_op_sextract, TCGOutOpExtract, outop_sextract),
1235     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1236     OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1237     OUTOP(INDEX_op_st, TCGOutOpStore, outop_st),
1238     OUTOP(INDEX_op_st8, TCGOutOpStore, outop_st8),
1239     OUTOP(INDEX_op_st16, TCGOutOpStore, outop_st16),
1240     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1241     OUTOP(INDEX_op_subbi, TCGOutOpAddSubCarry, outop_subbi),
1242     OUTOP(INDEX_op_subbio, TCGOutOpAddSubCarry, outop_subbio),
1243     OUTOP(INDEX_op_subbo, TCGOutOpAddSubCarry, outop_subbo),
1244     /* subb1o is implemented with set_borrow + subbio */
1245     OUTOP(INDEX_op_subb1o, TCGOutOpAddSubCarry, outop_subbio),
1246     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1247 
1248     [INDEX_op_goto_ptr] = &outop_goto_ptr,
1249 
1250 #if TCG_TARGET_REG_BITS == 32
1251     OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2),
1252     OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2),
1253 #else
1254     OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64),
1255     OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64),
1256     OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64),
1257     OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32),
1258     OUTOP(INDEX_op_extrh_i64_i32, TCGOutOpUnary, outop_extrh_i64_i32),
1259     OUTOP(INDEX_op_ld32u, TCGOutOpLoad, outop_ld32u),
1260     OUTOP(INDEX_op_ld32s, TCGOutOpLoad, outop_ld32s),
1261     OUTOP(INDEX_op_st32, TCGOutOpStore, outop_st),
1262 #endif
1263 };
1264 
1265 #undef OUTOP
1266 
1267 /*
1268  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1269  * and registered the target's TCG globals) must register with this function
1270  * before initiating translation.
1271  *
1272  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1273  * of tcg_region_init() for the reasoning behind this.
1274  *
1275  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1276  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1277  * is not used anymore for translation once this function is called.
1278  *
1279  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1280  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1281  * modes.
1282  */
1283 #ifdef CONFIG_USER_ONLY
1284 void tcg_register_thread(void)
1285 {
1286     tcg_ctx = &tcg_init_ctx;
1287 }
1288 #else
1289 void tcg_register_thread(void)
1290 {
1291     TCGContext *s = g_malloc(sizeof(*s));
1292     unsigned int i, n;
1293 
1294     *s = tcg_init_ctx;
1295 
1296     /* Relink mem_base.  */
1297     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1298         if (tcg_init_ctx.temps[i].mem_base) {
1299             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1300             tcg_debug_assert(b >= 0 && b < n);
1301             s->temps[i].mem_base = &s->temps[b];
1302         }
1303     }
1304 
1305     /* Claim an entry in tcg_ctxs */
1306     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1307     g_assert(n < tcg_max_ctxs);
1308     qatomic_set(&tcg_ctxs[n], s);
1309 
1310     if (n > 0) {
1311         tcg_region_initial_alloc(s);
1312     }
1313 
1314     tcg_ctx = s;
1315 }
1316 #endif /* !CONFIG_USER_ONLY */
1317 
1318 /* pool based memory allocation */
1319 void *tcg_malloc_internal(TCGContext *s, int size)
1320 {
1321     TCGPool *p;
1322     int pool_size;
1323 
1324     if (size > TCG_POOL_CHUNK_SIZE) {
1325         /* big malloc: insert a new pool (XXX: could optimize) */
1326         p = g_malloc(sizeof(TCGPool) + size);
1327         p->size = size;
1328         p->next = s->pool_first_large;
1329         s->pool_first_large = p;
1330         return p->data;
1331     } else {
1332         p = s->pool_current;
1333         if (!p) {
1334             p = s->pool_first;
1335             if (!p) {
1336                 goto new_pool;
1337             }
1338         } else {
1339             if (!p->next) {
1340             new_pool:
1341                 pool_size = TCG_POOL_CHUNK_SIZE;
1342                 p = g_malloc(sizeof(TCGPool) + pool_size);
1343                 p->size = pool_size;
1344                 p->next = NULL;
1345                 if (s->pool_current) {
1346                     s->pool_current->next = p;
1347                 } else {
1348                     s->pool_first = p;
1349                 }
1350             } else {
1351                 p = p->next;
1352             }
1353         }
1354     }
1355     s->pool_current = p;
1356     s->pool_cur = (uintptr_t)p->data + size;
1357     s->pool_end = (uintptr_t)p->data + p->size;
1358     return p->data;
1359 }
1360 
1361 void tcg_pool_reset(TCGContext *s)
1362 {
1363     TCGPool *p, *t;
1364     for (p = s->pool_first_large; p; p = t) {
1365         t = p->next;
1366         g_free(p);
1367     }
1368     s->pool_first_large = NULL;
1369     s->pool_cur = s->pool_end = 0;
1370     s->pool_current = NULL;
1371 }
1372 
1373 /*
1374  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1375  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1376  * We only use these for layout in tcg_out_ld_helper_ret and
1377  * tcg_out_st_helper_args, and share them between several of
1378  * the helpers, with the end result that it's easier to build manually.
1379  */
1380 
1381 #if TCG_TARGET_REG_BITS == 32
1382 # define dh_typecode_ttl  dh_typecode_i32
1383 #else
1384 # define dh_typecode_ttl  dh_typecode_i64
1385 #endif
1386 
1387 static TCGHelperInfo info_helper_ld32_mmu = {
1388     .flags = TCG_CALL_NO_WG,
1389     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1390               | dh_typemask(env, 1)
1391               | dh_typemask(i64, 2)  /* uint64_t addr */
1392               | dh_typemask(i32, 3)  /* unsigned oi */
1393               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1394 };
1395 
1396 static TCGHelperInfo info_helper_ld64_mmu = {
1397     .flags = TCG_CALL_NO_WG,
1398     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1399               | dh_typemask(env, 1)
1400               | dh_typemask(i64, 2)  /* uint64_t addr */
1401               | dh_typemask(i32, 3)  /* unsigned oi */
1402               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1403 };
1404 
1405 static TCGHelperInfo info_helper_ld128_mmu = {
1406     .flags = TCG_CALL_NO_WG,
1407     .typemask = dh_typemask(i128, 0) /* return Int128 */
1408               | dh_typemask(env, 1)
1409               | dh_typemask(i64, 2)  /* uint64_t addr */
1410               | dh_typemask(i32, 3)  /* unsigned oi */
1411               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1412 };
1413 
1414 static TCGHelperInfo info_helper_st32_mmu = {
1415     .flags = TCG_CALL_NO_WG,
1416     .typemask = dh_typemask(void, 0)
1417               | dh_typemask(env, 1)
1418               | dh_typemask(i64, 2)  /* uint64_t addr */
1419               | dh_typemask(i32, 3)  /* uint32_t data */
1420               | dh_typemask(i32, 4)  /* unsigned oi */
1421               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1422 };
1423 
1424 static TCGHelperInfo info_helper_st64_mmu = {
1425     .flags = TCG_CALL_NO_WG,
1426     .typemask = dh_typemask(void, 0)
1427               | dh_typemask(env, 1)
1428               | dh_typemask(i64, 2)  /* uint64_t addr */
1429               | dh_typemask(i64, 3)  /* uint64_t data */
1430               | dh_typemask(i32, 4)  /* unsigned oi */
1431               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1432 };
1433 
1434 static TCGHelperInfo info_helper_st128_mmu = {
1435     .flags = TCG_CALL_NO_WG,
1436     .typemask = dh_typemask(void, 0)
1437               | dh_typemask(env, 1)
1438               | dh_typemask(i64, 2)  /* uint64_t addr */
1439               | dh_typemask(i128, 3) /* Int128 data */
1440               | dh_typemask(i32, 4)  /* unsigned oi */
1441               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1442 };
1443 
1444 #ifdef CONFIG_TCG_INTERPRETER
1445 static ffi_type *typecode_to_ffi(int argmask)
1446 {
1447     /*
1448      * libffi does not support __int128_t, so we have forced Int128
1449      * to use the structure definition instead of the builtin type.
1450      */
1451     static ffi_type *ffi_type_i128_elements[3] = {
1452         &ffi_type_uint64,
1453         &ffi_type_uint64,
1454         NULL
1455     };
1456     static ffi_type ffi_type_i128 = {
1457         .size = 16,
1458         .alignment = __alignof__(Int128),
1459         .type = FFI_TYPE_STRUCT,
1460         .elements = ffi_type_i128_elements,
1461     };
1462 
1463     switch (argmask) {
1464     case dh_typecode_void:
1465         return &ffi_type_void;
1466     case dh_typecode_i32:
1467         return &ffi_type_uint32;
1468     case dh_typecode_s32:
1469         return &ffi_type_sint32;
1470     case dh_typecode_i64:
1471         return &ffi_type_uint64;
1472     case dh_typecode_s64:
1473         return &ffi_type_sint64;
1474     case dh_typecode_ptr:
1475         return &ffi_type_pointer;
1476     case dh_typecode_i128:
1477         return &ffi_type_i128;
1478     }
1479     g_assert_not_reached();
1480 }
1481 
1482 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1483 {
1484     unsigned typemask = info->typemask;
1485     struct {
1486         ffi_cif cif;
1487         ffi_type *args[];
1488     } *ca;
1489     ffi_status status;
1490     int nargs;
1491 
1492     /* Ignoring the return type, find the last non-zero field. */
1493     nargs = 32 - clz32(typemask >> 3);
1494     nargs = DIV_ROUND_UP(nargs, 3);
1495     assert(nargs <= MAX_CALL_IARGS);
1496 
1497     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1498     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1499     ca->cif.nargs = nargs;
1500 
1501     if (nargs != 0) {
1502         ca->cif.arg_types = ca->args;
1503         for (int j = 0; j < nargs; ++j) {
1504             int typecode = extract32(typemask, (j + 1) * 3, 3);
1505             ca->args[j] = typecode_to_ffi(typecode);
1506         }
1507     }
1508 
1509     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1510                           ca->cif.rtype, ca->cif.arg_types);
1511     assert(status == FFI_OK);
1512 
1513     return &ca->cif;
1514 }
1515 
1516 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1517 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1518 #else
1519 #define HELPER_INFO_INIT(I)      (&(I)->init)
1520 #define HELPER_INFO_INIT_VAL(I)  1
1521 #endif /* CONFIG_TCG_INTERPRETER */
1522 
1523 static inline bool arg_slot_reg_p(unsigned arg_slot)
1524 {
1525     /*
1526      * Split the sizeof away from the comparison to avoid Werror from
1527      * "unsigned < 0 is always false", when iarg_regs is empty.
1528      */
1529     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1530     return arg_slot < nreg;
1531 }
1532 
1533 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1534 {
1535     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1536     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1537 
1538     tcg_debug_assert(stk_slot < max);
1539     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1540 }
1541 
1542 typedef struct TCGCumulativeArgs {
1543     int arg_idx;                /* tcg_gen_callN args[] */
1544     int info_in_idx;            /* TCGHelperInfo in[] */
1545     int arg_slot;               /* regs+stack slot */
1546     int ref_slot;               /* stack slots for references */
1547 } TCGCumulativeArgs;
1548 
1549 static void layout_arg_even(TCGCumulativeArgs *cum)
1550 {
1551     cum->arg_slot += cum->arg_slot & 1;
1552 }
1553 
1554 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1555                          TCGCallArgumentKind kind)
1556 {
1557     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1558 
1559     *loc = (TCGCallArgumentLoc){
1560         .kind = kind,
1561         .arg_idx = cum->arg_idx,
1562         .arg_slot = cum->arg_slot,
1563     };
1564     cum->info_in_idx++;
1565     cum->arg_slot++;
1566 }
1567 
1568 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1569                                 TCGHelperInfo *info, int n)
1570 {
1571     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1572 
1573     for (int i = 0; i < n; ++i) {
1574         /* Layout all using the same arg_idx, adjusting the subindex. */
1575         loc[i] = (TCGCallArgumentLoc){
1576             .kind = TCG_CALL_ARG_NORMAL,
1577             .arg_idx = cum->arg_idx,
1578             .tmp_subindex = i,
1579             .arg_slot = cum->arg_slot + i,
1580         };
1581     }
1582     cum->info_in_idx += n;
1583     cum->arg_slot += n;
1584 }
1585 
1586 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1587 {
1588     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1589     int n = 128 / TCG_TARGET_REG_BITS;
1590 
1591     /* The first subindex carries the pointer. */
1592     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1593 
1594     /*
1595      * The callee is allowed to clobber memory associated with
1596      * structure pass by-reference.  Therefore we must make copies.
1597      * Allocate space from "ref_slot", which will be adjusted to
1598      * follow the parameters on the stack.
1599      */
1600     loc[0].ref_slot = cum->ref_slot;
1601 
1602     /*
1603      * Subsequent words also go into the reference slot, but
1604      * do not accumulate into the regular arguments.
1605      */
1606     for (int i = 1; i < n; ++i) {
1607         loc[i] = (TCGCallArgumentLoc){
1608             .kind = TCG_CALL_ARG_BY_REF_N,
1609             .arg_idx = cum->arg_idx,
1610             .tmp_subindex = i,
1611             .ref_slot = cum->ref_slot + i,
1612         };
1613     }
1614     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1615     cum->ref_slot += n;
1616 }
1617 
1618 static void init_call_layout(TCGHelperInfo *info)
1619 {
1620     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1621     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1622     unsigned typemask = info->typemask;
1623     unsigned typecode;
1624     TCGCumulativeArgs cum = { };
1625 
1626     /*
1627      * Parse and place any function return value.
1628      */
1629     typecode = typemask & 7;
1630     switch (typecode) {
1631     case dh_typecode_void:
1632         info->nr_out = 0;
1633         break;
1634     case dh_typecode_i32:
1635     case dh_typecode_s32:
1636     case dh_typecode_ptr:
1637         info->nr_out = 1;
1638         info->out_kind = TCG_CALL_RET_NORMAL;
1639         break;
1640     case dh_typecode_i64:
1641     case dh_typecode_s64:
1642         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1643         info->out_kind = TCG_CALL_RET_NORMAL;
1644         /* Query the last register now to trigger any assert early. */
1645         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1646         break;
1647     case dh_typecode_i128:
1648         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1649         info->out_kind = TCG_TARGET_CALL_RET_I128;
1650         switch (TCG_TARGET_CALL_RET_I128) {
1651         case TCG_CALL_RET_NORMAL:
1652             /* Query the last register now to trigger any assert early. */
1653             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1654             break;
1655         case TCG_CALL_RET_BY_VEC:
1656             /* Query the single register now to trigger any assert early. */
1657             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1658             break;
1659         case TCG_CALL_RET_BY_REF:
1660             /*
1661              * Allocate the first argument to the output.
1662              * We don't need to store this anywhere, just make it
1663              * unavailable for use in the input loop below.
1664              */
1665             cum.arg_slot = 1;
1666             break;
1667         default:
1668             qemu_build_not_reached();
1669         }
1670         break;
1671     default:
1672         g_assert_not_reached();
1673     }
1674 
1675     /*
1676      * Parse and place function arguments.
1677      */
1678     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1679         TCGCallArgumentKind kind;
1680         TCGType type;
1681 
1682         typecode = typemask & 7;
1683         switch (typecode) {
1684         case dh_typecode_i32:
1685         case dh_typecode_s32:
1686             type = TCG_TYPE_I32;
1687             break;
1688         case dh_typecode_i64:
1689         case dh_typecode_s64:
1690             type = TCG_TYPE_I64;
1691             break;
1692         case dh_typecode_ptr:
1693             type = TCG_TYPE_PTR;
1694             break;
1695         case dh_typecode_i128:
1696             type = TCG_TYPE_I128;
1697             break;
1698         default:
1699             g_assert_not_reached();
1700         }
1701 
1702         switch (type) {
1703         case TCG_TYPE_I32:
1704             switch (TCG_TARGET_CALL_ARG_I32) {
1705             case TCG_CALL_ARG_EVEN:
1706                 layout_arg_even(&cum);
1707                 /* fall through */
1708             case TCG_CALL_ARG_NORMAL:
1709                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1710                 break;
1711             case TCG_CALL_ARG_EXTEND:
1712                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1713                 layout_arg_1(&cum, info, kind);
1714                 break;
1715             default:
1716                 qemu_build_not_reached();
1717             }
1718             break;
1719 
1720         case TCG_TYPE_I64:
1721             switch (TCG_TARGET_CALL_ARG_I64) {
1722             case TCG_CALL_ARG_EVEN:
1723                 layout_arg_even(&cum);
1724                 /* fall through */
1725             case TCG_CALL_ARG_NORMAL:
1726                 if (TCG_TARGET_REG_BITS == 32) {
1727                     layout_arg_normal_n(&cum, info, 2);
1728                 } else {
1729                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1730                 }
1731                 break;
1732             default:
1733                 qemu_build_not_reached();
1734             }
1735             break;
1736 
1737         case TCG_TYPE_I128:
1738             switch (TCG_TARGET_CALL_ARG_I128) {
1739             case TCG_CALL_ARG_EVEN:
1740                 layout_arg_even(&cum);
1741                 /* fall through */
1742             case TCG_CALL_ARG_NORMAL:
1743                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1744                 break;
1745             case TCG_CALL_ARG_BY_REF:
1746                 layout_arg_by_ref(&cum, info);
1747                 break;
1748             default:
1749                 qemu_build_not_reached();
1750             }
1751             break;
1752 
1753         default:
1754             g_assert_not_reached();
1755         }
1756     }
1757     info->nr_in = cum.info_in_idx;
1758 
1759     /* Validate that we didn't overrun the input array. */
1760     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1761     /* Validate the backend has enough argument space. */
1762     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1763 
1764     /*
1765      * Relocate the "ref_slot" area to the end of the parameters.
1766      * Minimizing this stack offset helps code size for x86,
1767      * which has a signed 8-bit offset encoding.
1768      */
1769     if (cum.ref_slot != 0) {
1770         int ref_base = 0;
1771 
1772         if (cum.arg_slot > max_reg_slots) {
1773             int align = __alignof(Int128) / sizeof(tcg_target_long);
1774 
1775             ref_base = cum.arg_slot - max_reg_slots;
1776             if (align > 1) {
1777                 ref_base = ROUND_UP(ref_base, align);
1778             }
1779         }
1780         assert(ref_base + cum.ref_slot <= max_stk_slots);
1781         ref_base += max_reg_slots;
1782 
1783         if (ref_base != 0) {
1784             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1785                 TCGCallArgumentLoc *loc = &info->in[i];
1786                 switch (loc->kind) {
1787                 case TCG_CALL_ARG_BY_REF:
1788                 case TCG_CALL_ARG_BY_REF_N:
1789                     loc->ref_slot += ref_base;
1790                     break;
1791                 default:
1792                     break;
1793                 }
1794             }
1795         }
1796     }
1797 }
1798 
1799 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1800 static void process_constraint_sets(void);
1801 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1802                                             TCGReg reg, const char *name);
1803 
1804 static void tcg_context_init(unsigned max_threads)
1805 {
1806     TCGContext *s = &tcg_init_ctx;
1807     int n, i;
1808     TCGTemp *ts;
1809 
1810     memset(s, 0, sizeof(*s));
1811     s->nb_globals = 0;
1812 
1813     init_call_layout(&info_helper_ld32_mmu);
1814     init_call_layout(&info_helper_ld64_mmu);
1815     init_call_layout(&info_helper_ld128_mmu);
1816     init_call_layout(&info_helper_st32_mmu);
1817     init_call_layout(&info_helper_st64_mmu);
1818     init_call_layout(&info_helper_st128_mmu);
1819 
1820     tcg_target_init(s);
1821     process_constraint_sets();
1822 
1823     /* Reverse the order of the saved registers, assuming they're all at
1824        the start of tcg_target_reg_alloc_order.  */
1825     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1826         int r = tcg_target_reg_alloc_order[n];
1827         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1828             break;
1829         }
1830     }
1831     for (i = 0; i < n; ++i) {
1832         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1833     }
1834     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1835         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1836     }
1837 
1838     tcg_ctx = s;
1839     /*
1840      * In user-mode we simply share the init context among threads, since we
1841      * use a single region. See the documentation tcg_region_init() for the
1842      * reasoning behind this.
1843      * In system-mode we will have at most max_threads TCG threads.
1844      */
1845 #ifdef CONFIG_USER_ONLY
1846     tcg_ctxs = &tcg_ctx;
1847     tcg_cur_ctxs = 1;
1848     tcg_max_ctxs = 1;
1849 #else
1850     tcg_max_ctxs = max_threads;
1851     tcg_ctxs = g_new0(TCGContext *, max_threads);
1852 #endif
1853 
1854     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1855     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1856     tcg_env = temp_tcgv_ptr(ts);
1857 }
1858 
1859 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1860 {
1861     tcg_context_init(max_threads);
1862     tcg_region_init(tb_size, splitwx, max_threads);
1863 }
1864 
1865 /*
1866  * Allocate TBs right before their corresponding translated code, making
1867  * sure that TBs and code are on different cache lines.
1868  */
1869 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1870 {
1871     uintptr_t align = qemu_icache_linesize;
1872     TranslationBlock *tb;
1873     void *next;
1874 
1875  retry:
1876     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1877     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1878 
1879     if (unlikely(next > s->code_gen_highwater)) {
1880         if (tcg_region_alloc(s)) {
1881             return NULL;
1882         }
1883         goto retry;
1884     }
1885     qatomic_set(&s->code_gen_ptr, next);
1886     return tb;
1887 }
1888 
1889 void tcg_prologue_init(void)
1890 {
1891     TCGContext *s = tcg_ctx;
1892     size_t prologue_size;
1893 
1894     s->code_ptr = s->code_gen_ptr;
1895     s->code_buf = s->code_gen_ptr;
1896     s->data_gen_ptr = NULL;
1897 
1898 #ifndef CONFIG_TCG_INTERPRETER
1899     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1900 #endif
1901 
1902     s->pool_labels = NULL;
1903 
1904     qemu_thread_jit_write();
1905     /* Generate the prologue.  */
1906     tcg_target_qemu_prologue(s);
1907 
1908     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1909     {
1910         int result = tcg_out_pool_finalize(s);
1911         tcg_debug_assert(result == 0);
1912     }
1913 
1914     prologue_size = tcg_current_code_size(s);
1915     perf_report_prologue(s->code_gen_ptr, prologue_size);
1916 
1917 #ifndef CONFIG_TCG_INTERPRETER
1918     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1919                         (uintptr_t)s->code_buf, prologue_size);
1920 #endif
1921 
1922     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1923         FILE *logfile = qemu_log_trylock();
1924         if (logfile) {
1925             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1926             if (s->data_gen_ptr) {
1927                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1928                 size_t data_size = prologue_size - code_size;
1929                 size_t i;
1930 
1931                 disas(logfile, s->code_gen_ptr, code_size);
1932 
1933                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1934                     if (sizeof(tcg_target_ulong) == 8) {
1935                         fprintf(logfile,
1936                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1937                                 (uintptr_t)s->data_gen_ptr + i,
1938                                 *(uint64_t *)(s->data_gen_ptr + i));
1939                     } else {
1940                         fprintf(logfile,
1941                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1942                                 (uintptr_t)s->data_gen_ptr + i,
1943                                 *(uint32_t *)(s->data_gen_ptr + i));
1944                     }
1945                 }
1946             } else {
1947                 disas(logfile, s->code_gen_ptr, prologue_size);
1948             }
1949             fprintf(logfile, "\n");
1950             qemu_log_unlock(logfile);
1951         }
1952     }
1953 
1954 #ifndef CONFIG_TCG_INTERPRETER
1955     /*
1956      * Assert that goto_ptr is implemented completely, setting an epilogue.
1957      * For tci, we use NULL as the signal to return from the interpreter,
1958      * so skip this check.
1959      */
1960     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1961 #endif
1962 
1963     tcg_region_prologue_set(s);
1964 }
1965 
1966 void tcg_func_start(TCGContext *s)
1967 {
1968     tcg_pool_reset(s);
1969     s->nb_temps = s->nb_globals;
1970 
1971     /* No temps have been previously allocated for size or locality.  */
1972     tcg_temp_ebb_reset_freed(s);
1973 
1974     /* No constant temps have been previously allocated. */
1975     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1976         if (s->const_table[i]) {
1977             g_hash_table_remove_all(s->const_table[i]);
1978         }
1979     }
1980 
1981     s->nb_ops = 0;
1982     s->nb_labels = 0;
1983     s->current_frame_offset = s->frame_start;
1984 
1985 #ifdef CONFIG_DEBUG_TCG
1986     s->goto_tb_issue_mask = 0;
1987 #endif
1988 
1989     QTAILQ_INIT(&s->ops);
1990     QTAILQ_INIT(&s->free_ops);
1991     s->emit_before_op = NULL;
1992     QSIMPLEQ_INIT(&s->labels);
1993 
1994     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1995 }
1996 
1997 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1998 {
1999     int n = s->nb_temps++;
2000 
2001     if (n >= TCG_MAX_TEMPS) {
2002         tcg_raise_tb_overflow(s);
2003     }
2004     return memset(&s->temps[n], 0, sizeof(TCGTemp));
2005 }
2006 
2007 static TCGTemp *tcg_global_alloc(TCGContext *s)
2008 {
2009     TCGTemp *ts;
2010 
2011     tcg_debug_assert(s->nb_globals == s->nb_temps);
2012     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
2013     s->nb_globals++;
2014     ts = tcg_temp_alloc(s);
2015     ts->kind = TEMP_GLOBAL;
2016 
2017     return ts;
2018 }
2019 
2020 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
2021                                             TCGReg reg, const char *name)
2022 {
2023     TCGTemp *ts;
2024 
2025     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
2026 
2027     ts = tcg_global_alloc(s);
2028     ts->base_type = type;
2029     ts->type = type;
2030     ts->kind = TEMP_FIXED;
2031     ts->reg = reg;
2032     ts->name = name;
2033     tcg_regset_set_reg(s->reserved_regs, reg);
2034 
2035     return ts;
2036 }
2037 
2038 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
2039 {
2040     s->frame_start = start;
2041     s->frame_end = start + size;
2042     s->frame_temp
2043         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
2044 }
2045 
2046 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
2047                                             const char *name, TCGType type)
2048 {
2049     TCGContext *s = tcg_ctx;
2050     TCGTemp *base_ts = tcgv_ptr_temp(base);
2051     TCGTemp *ts = tcg_global_alloc(s);
2052     int indirect_reg = 0;
2053 
2054     switch (base_ts->kind) {
2055     case TEMP_FIXED:
2056         break;
2057     case TEMP_GLOBAL:
2058         /* We do not support double-indirect registers.  */
2059         tcg_debug_assert(!base_ts->indirect_reg);
2060         base_ts->indirect_base = 1;
2061         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
2062                             ? 2 : 1);
2063         indirect_reg = 1;
2064         break;
2065     default:
2066         g_assert_not_reached();
2067     }
2068 
2069     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2070         TCGTemp *ts2 = tcg_global_alloc(s);
2071         char buf[64];
2072 
2073         ts->base_type = TCG_TYPE_I64;
2074         ts->type = TCG_TYPE_I32;
2075         ts->indirect_reg = indirect_reg;
2076         ts->mem_allocated = 1;
2077         ts->mem_base = base_ts;
2078         ts->mem_offset = offset;
2079         pstrcpy(buf, sizeof(buf), name);
2080         pstrcat(buf, sizeof(buf), "_0");
2081         ts->name = strdup(buf);
2082 
2083         tcg_debug_assert(ts2 == ts + 1);
2084         ts2->base_type = TCG_TYPE_I64;
2085         ts2->type = TCG_TYPE_I32;
2086         ts2->indirect_reg = indirect_reg;
2087         ts2->mem_allocated = 1;
2088         ts2->mem_base = base_ts;
2089         ts2->mem_offset = offset + 4;
2090         ts2->temp_subindex = 1;
2091         pstrcpy(buf, sizeof(buf), name);
2092         pstrcat(buf, sizeof(buf), "_1");
2093         ts2->name = strdup(buf);
2094     } else {
2095         ts->base_type = type;
2096         ts->type = type;
2097         ts->indirect_reg = indirect_reg;
2098         ts->mem_allocated = 1;
2099         ts->mem_base = base_ts;
2100         ts->mem_offset = offset;
2101         ts->name = name;
2102     }
2103     return ts;
2104 }
2105 
2106 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
2107 {
2108     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
2109     return temp_tcgv_i32(ts);
2110 }
2111 
2112 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
2113 {
2114     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
2115     return temp_tcgv_i64(ts);
2116 }
2117 
2118 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
2119 {
2120     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
2121     return temp_tcgv_ptr(ts);
2122 }
2123 
2124 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
2125 {
2126     TCGContext *s = tcg_ctx;
2127     TCGTemp *ts;
2128     int n;
2129 
2130     if (kind == TEMP_EBB) {
2131         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
2132 
2133         if (idx < TCG_MAX_TEMPS) {
2134             /* There is already an available temp with the right type.  */
2135             clear_bit(idx, s->free_temps[type].l);
2136 
2137             ts = &s->temps[idx];
2138             ts->temp_allocated = 1;
2139             tcg_debug_assert(ts->base_type == type);
2140             tcg_debug_assert(ts->kind == kind);
2141             return ts;
2142         }
2143     } else {
2144         tcg_debug_assert(kind == TEMP_TB);
2145     }
2146 
2147     switch (type) {
2148     case TCG_TYPE_I32:
2149     case TCG_TYPE_V64:
2150     case TCG_TYPE_V128:
2151     case TCG_TYPE_V256:
2152         n = 1;
2153         break;
2154     case TCG_TYPE_I64:
2155         n = 64 / TCG_TARGET_REG_BITS;
2156         break;
2157     case TCG_TYPE_I128:
2158         n = 128 / TCG_TARGET_REG_BITS;
2159         break;
2160     default:
2161         g_assert_not_reached();
2162     }
2163 
2164     ts = tcg_temp_alloc(s);
2165     ts->base_type = type;
2166     ts->temp_allocated = 1;
2167     ts->kind = kind;
2168 
2169     if (n == 1) {
2170         ts->type = type;
2171     } else {
2172         ts->type = TCG_TYPE_REG;
2173 
2174         for (int i = 1; i < n; ++i) {
2175             TCGTemp *ts2 = tcg_temp_alloc(s);
2176 
2177             tcg_debug_assert(ts2 == ts + i);
2178             ts2->base_type = type;
2179             ts2->type = TCG_TYPE_REG;
2180             ts2->temp_allocated = 1;
2181             ts2->temp_subindex = i;
2182             ts2->kind = kind;
2183         }
2184     }
2185     return ts;
2186 }
2187 
2188 TCGv_i32 tcg_temp_new_i32(void)
2189 {
2190     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
2191 }
2192 
2193 TCGv_i32 tcg_temp_ebb_new_i32(void)
2194 {
2195     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
2196 }
2197 
2198 TCGv_i64 tcg_temp_new_i64(void)
2199 {
2200     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
2201 }
2202 
2203 TCGv_i64 tcg_temp_ebb_new_i64(void)
2204 {
2205     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
2206 }
2207 
2208 TCGv_ptr tcg_temp_new_ptr(void)
2209 {
2210     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
2211 }
2212 
2213 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2214 {
2215     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2216 }
2217 
2218 TCGv_i128 tcg_temp_new_i128(void)
2219 {
2220     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2221 }
2222 
2223 TCGv_i128 tcg_temp_ebb_new_i128(void)
2224 {
2225     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2226 }
2227 
2228 TCGv_vec tcg_temp_new_vec(TCGType type)
2229 {
2230     TCGTemp *t;
2231 
2232 #ifdef CONFIG_DEBUG_TCG
2233     switch (type) {
2234     case TCG_TYPE_V64:
2235         assert(TCG_TARGET_HAS_v64);
2236         break;
2237     case TCG_TYPE_V128:
2238         assert(TCG_TARGET_HAS_v128);
2239         break;
2240     case TCG_TYPE_V256:
2241         assert(TCG_TARGET_HAS_v256);
2242         break;
2243     default:
2244         g_assert_not_reached();
2245     }
2246 #endif
2247 
2248     t = tcg_temp_new_internal(type, TEMP_EBB);
2249     return temp_tcgv_vec(t);
2250 }
2251 
2252 /* Create a new temp of the same type as an existing temp.  */
2253 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2254 {
2255     TCGTemp *t = tcgv_vec_temp(match);
2256 
2257     tcg_debug_assert(t->temp_allocated != 0);
2258 
2259     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2260     return temp_tcgv_vec(t);
2261 }
2262 
2263 void tcg_temp_free_internal(TCGTemp *ts)
2264 {
2265     TCGContext *s = tcg_ctx;
2266 
2267     switch (ts->kind) {
2268     case TEMP_CONST:
2269     case TEMP_TB:
2270         /* Silently ignore free. */
2271         break;
2272     case TEMP_EBB:
2273         tcg_debug_assert(ts->temp_allocated != 0);
2274         ts->temp_allocated = 0;
2275         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2276         break;
2277     default:
2278         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2279         g_assert_not_reached();
2280     }
2281 }
2282 
2283 void tcg_temp_free_i32(TCGv_i32 arg)
2284 {
2285     tcg_temp_free_internal(tcgv_i32_temp(arg));
2286 }
2287 
2288 void tcg_temp_free_i64(TCGv_i64 arg)
2289 {
2290     tcg_temp_free_internal(tcgv_i64_temp(arg));
2291 }
2292 
2293 void tcg_temp_free_i128(TCGv_i128 arg)
2294 {
2295     tcg_temp_free_internal(tcgv_i128_temp(arg));
2296 }
2297 
2298 void tcg_temp_free_ptr(TCGv_ptr arg)
2299 {
2300     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2301 }
2302 
2303 void tcg_temp_free_vec(TCGv_vec arg)
2304 {
2305     tcg_temp_free_internal(tcgv_vec_temp(arg));
2306 }
2307 
2308 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2309 {
2310     TCGContext *s = tcg_ctx;
2311     GHashTable *h = s->const_table[type];
2312     TCGTemp *ts;
2313 
2314     if (h == NULL) {
2315         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2316         s->const_table[type] = h;
2317     }
2318 
2319     ts = g_hash_table_lookup(h, &val);
2320     if (ts == NULL) {
2321         int64_t *val_ptr;
2322 
2323         ts = tcg_temp_alloc(s);
2324 
2325         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2326             TCGTemp *ts2 = tcg_temp_alloc(s);
2327 
2328             tcg_debug_assert(ts2 == ts + 1);
2329 
2330             ts->base_type = TCG_TYPE_I64;
2331             ts->type = TCG_TYPE_I32;
2332             ts->kind = TEMP_CONST;
2333             ts->temp_allocated = 1;
2334 
2335             ts2->base_type = TCG_TYPE_I64;
2336             ts2->type = TCG_TYPE_I32;
2337             ts2->kind = TEMP_CONST;
2338             ts2->temp_allocated = 1;
2339             ts2->temp_subindex = 1;
2340 
2341             /*
2342              * Retain the full value of the 64-bit constant in the low
2343              * part, so that the hash table works.  Actual uses will
2344              * truncate the value to the low part.
2345              */
2346             ts[HOST_BIG_ENDIAN].val = val;
2347             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2348             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2349         } else {
2350             ts->base_type = type;
2351             ts->type = type;
2352             ts->kind = TEMP_CONST;
2353             ts->temp_allocated = 1;
2354             ts->val = val;
2355             val_ptr = &ts->val;
2356         }
2357         g_hash_table_insert(h, val_ptr, ts);
2358     }
2359 
2360     return ts;
2361 }
2362 
2363 TCGv_i32 tcg_constant_i32(int32_t val)
2364 {
2365     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2366 }
2367 
2368 TCGv_i64 tcg_constant_i64(int64_t val)
2369 {
2370     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2371 }
2372 
2373 TCGv_vaddr tcg_constant_vaddr(uintptr_t val)
2374 {
2375     return temp_tcgv_vaddr(tcg_constant_internal(TCG_TYPE_PTR, val));
2376 }
2377 
2378 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2379 {
2380     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2381 }
2382 
2383 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2384 {
2385     val = dup_const(vece, val);
2386     return temp_tcgv_vec(tcg_constant_internal(type, val));
2387 }
2388 
2389 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2390 {
2391     TCGTemp *t = tcgv_vec_temp(match);
2392 
2393     tcg_debug_assert(t->temp_allocated != 0);
2394     return tcg_constant_vec(t->base_type, vece, val);
2395 }
2396 
2397 #ifdef CONFIG_DEBUG_TCG
2398 size_t temp_idx(TCGTemp *ts)
2399 {
2400     ptrdiff_t n = ts - tcg_ctx->temps;
2401     assert(n >= 0 && n < tcg_ctx->nb_temps);
2402     return n;
2403 }
2404 
2405 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2406 {
2407     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2408 
2409     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2410     assert(o % sizeof(TCGTemp) == 0);
2411 
2412     return (void *)tcg_ctx + (uintptr_t)v;
2413 }
2414 #endif /* CONFIG_DEBUG_TCG */
2415 
2416 /*
2417  * Return true if OP may appear in the opcode stream with TYPE.
2418  * Test the runtime variable that controls each opcode.
2419  */
2420 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2421 {
2422     bool has_type;
2423 
2424     switch (type) {
2425     case TCG_TYPE_I32:
2426         has_type = true;
2427         break;
2428     case TCG_TYPE_I64:
2429         has_type = TCG_TARGET_REG_BITS == 64;
2430         break;
2431     case TCG_TYPE_V64:
2432         has_type = TCG_TARGET_HAS_v64;
2433         break;
2434     case TCG_TYPE_V128:
2435         has_type = TCG_TARGET_HAS_v128;
2436         break;
2437     case TCG_TYPE_V256:
2438         has_type = TCG_TARGET_HAS_v256;
2439         break;
2440     default:
2441         has_type = false;
2442         break;
2443     }
2444 
2445     switch (op) {
2446     case INDEX_op_discard:
2447     case INDEX_op_set_label:
2448     case INDEX_op_call:
2449     case INDEX_op_br:
2450     case INDEX_op_mb:
2451     case INDEX_op_insn_start:
2452     case INDEX_op_exit_tb:
2453     case INDEX_op_goto_tb:
2454     case INDEX_op_goto_ptr:
2455         return true;
2456 
2457     case INDEX_op_qemu_ld:
2458     case INDEX_op_qemu_st:
2459         tcg_debug_assert(type <= TCG_TYPE_REG);
2460         return true;
2461 
2462     case INDEX_op_qemu_ld2:
2463     case INDEX_op_qemu_st2:
2464         if (TCG_TARGET_REG_BITS == 32) {
2465             tcg_debug_assert(type == TCG_TYPE_I64);
2466             return true;
2467         }
2468         tcg_debug_assert(type == TCG_TYPE_I128);
2469         goto do_lookup;
2470 
2471     case INDEX_op_add:
2472     case INDEX_op_and:
2473     case INDEX_op_brcond:
2474     case INDEX_op_deposit:
2475     case INDEX_op_extract:
2476     case INDEX_op_ld8u:
2477     case INDEX_op_ld8s:
2478     case INDEX_op_ld16u:
2479     case INDEX_op_ld16s:
2480     case INDEX_op_ld:
2481     case INDEX_op_mov:
2482     case INDEX_op_movcond:
2483     case INDEX_op_negsetcond:
2484     case INDEX_op_or:
2485     case INDEX_op_setcond:
2486     case INDEX_op_sextract:
2487     case INDEX_op_st8:
2488     case INDEX_op_st16:
2489     case INDEX_op_st:
2490     case INDEX_op_xor:
2491         return has_type;
2492 
2493     case INDEX_op_brcond2_i32:
2494     case INDEX_op_setcond2_i32:
2495         return TCG_TARGET_REG_BITS == 32;
2496 
2497     case INDEX_op_ld32u:
2498     case INDEX_op_ld32s:
2499     case INDEX_op_st32:
2500     case INDEX_op_ext_i32_i64:
2501     case INDEX_op_extu_i32_i64:
2502     case INDEX_op_extrl_i64_i32:
2503     case INDEX_op_extrh_i64_i32:
2504         return TCG_TARGET_REG_BITS == 64;
2505 
2506     case INDEX_op_mov_vec:
2507     case INDEX_op_dup_vec:
2508     case INDEX_op_dupm_vec:
2509     case INDEX_op_ld_vec:
2510     case INDEX_op_st_vec:
2511     case INDEX_op_add_vec:
2512     case INDEX_op_sub_vec:
2513     case INDEX_op_and_vec:
2514     case INDEX_op_or_vec:
2515     case INDEX_op_xor_vec:
2516     case INDEX_op_cmp_vec:
2517         return has_type;
2518     case INDEX_op_dup2_vec:
2519         return has_type && TCG_TARGET_REG_BITS == 32;
2520     case INDEX_op_not_vec:
2521         return has_type && TCG_TARGET_HAS_not_vec;
2522     case INDEX_op_neg_vec:
2523         return has_type && TCG_TARGET_HAS_neg_vec;
2524     case INDEX_op_abs_vec:
2525         return has_type && TCG_TARGET_HAS_abs_vec;
2526     case INDEX_op_andc_vec:
2527         return has_type && TCG_TARGET_HAS_andc_vec;
2528     case INDEX_op_orc_vec:
2529         return has_type && TCG_TARGET_HAS_orc_vec;
2530     case INDEX_op_nand_vec:
2531         return has_type && TCG_TARGET_HAS_nand_vec;
2532     case INDEX_op_nor_vec:
2533         return has_type && TCG_TARGET_HAS_nor_vec;
2534     case INDEX_op_eqv_vec:
2535         return has_type && TCG_TARGET_HAS_eqv_vec;
2536     case INDEX_op_mul_vec:
2537         return has_type && TCG_TARGET_HAS_mul_vec;
2538     case INDEX_op_shli_vec:
2539     case INDEX_op_shri_vec:
2540     case INDEX_op_sari_vec:
2541         return has_type && TCG_TARGET_HAS_shi_vec;
2542     case INDEX_op_shls_vec:
2543     case INDEX_op_shrs_vec:
2544     case INDEX_op_sars_vec:
2545         return has_type && TCG_TARGET_HAS_shs_vec;
2546     case INDEX_op_shlv_vec:
2547     case INDEX_op_shrv_vec:
2548     case INDEX_op_sarv_vec:
2549         return has_type && TCG_TARGET_HAS_shv_vec;
2550     case INDEX_op_rotli_vec:
2551         return has_type && TCG_TARGET_HAS_roti_vec;
2552     case INDEX_op_rotls_vec:
2553         return has_type && TCG_TARGET_HAS_rots_vec;
2554     case INDEX_op_rotlv_vec:
2555     case INDEX_op_rotrv_vec:
2556         return has_type && TCG_TARGET_HAS_rotv_vec;
2557     case INDEX_op_ssadd_vec:
2558     case INDEX_op_usadd_vec:
2559     case INDEX_op_sssub_vec:
2560     case INDEX_op_ussub_vec:
2561         return has_type && TCG_TARGET_HAS_sat_vec;
2562     case INDEX_op_smin_vec:
2563     case INDEX_op_umin_vec:
2564     case INDEX_op_smax_vec:
2565     case INDEX_op_umax_vec:
2566         return has_type && TCG_TARGET_HAS_minmax_vec;
2567     case INDEX_op_bitsel_vec:
2568         return has_type && TCG_TARGET_HAS_bitsel_vec;
2569     case INDEX_op_cmpsel_vec:
2570         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2571 
2572     default:
2573         if (op < INDEX_op_last_generic) {
2574             const TCGOutOp *outop;
2575             TCGConstraintSetIndex con_set;
2576 
2577             if (!has_type) {
2578                 return false;
2579             }
2580 
2581     do_lookup:
2582             outop = all_outop[op];
2583             tcg_debug_assert(outop != NULL);
2584 
2585             con_set = outop->static_constraint;
2586             if (con_set == C_Dynamic) {
2587                 con_set = outop->dynamic_constraint(type, flags);
2588             }
2589             if (con_set >= 0) {
2590                 return true;
2591             }
2592             tcg_debug_assert(con_set == C_NotImplemented);
2593             return false;
2594         }
2595         tcg_debug_assert(op < NB_OPS);
2596         return true;
2597 
2598     case INDEX_op_last_generic:
2599         g_assert_not_reached();
2600     }
2601 }
2602 
2603 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2604 {
2605     unsigned width;
2606 
2607     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2608     width = (type == TCG_TYPE_I32 ? 32 : 64);
2609 
2610     tcg_debug_assert(ofs < width);
2611     tcg_debug_assert(len > 0);
2612     tcg_debug_assert(len <= width - ofs);
2613 
2614     return TCG_TARGET_deposit_valid(type, ofs, len);
2615 }
2616 
2617 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2618 
2619 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2620                           TCGTemp *ret, TCGTemp **args)
2621 {
2622     TCGv_i64 extend_free[MAX_CALL_IARGS];
2623     int n_extend = 0;
2624     TCGOp *op;
2625     int i, n, pi = 0, total_args;
2626 
2627     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2628         init_call_layout(info);
2629         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2630     }
2631 
2632     total_args = info->nr_out + info->nr_in + 2;
2633     op = tcg_op_alloc(INDEX_op_call, total_args);
2634 
2635 #ifdef CONFIG_PLUGIN
2636     /* Flag helpers that may affect guest state */
2637     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2638         tcg_ctx->plugin_insn->calls_helpers = true;
2639     }
2640 #endif
2641 
2642     TCGOP_CALLO(op) = n = info->nr_out;
2643     switch (n) {
2644     case 0:
2645         tcg_debug_assert(ret == NULL);
2646         break;
2647     case 1:
2648         tcg_debug_assert(ret != NULL);
2649         op->args[pi++] = temp_arg(ret);
2650         break;
2651     case 2:
2652     case 4:
2653         tcg_debug_assert(ret != NULL);
2654         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2655         tcg_debug_assert(ret->temp_subindex == 0);
2656         for (i = 0; i < n; ++i) {
2657             op->args[pi++] = temp_arg(ret + i);
2658         }
2659         break;
2660     default:
2661         g_assert_not_reached();
2662     }
2663 
2664     TCGOP_CALLI(op) = n = info->nr_in;
2665     for (i = 0; i < n; i++) {
2666         const TCGCallArgumentLoc *loc = &info->in[i];
2667         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2668 
2669         switch (loc->kind) {
2670         case TCG_CALL_ARG_NORMAL:
2671         case TCG_CALL_ARG_BY_REF:
2672         case TCG_CALL_ARG_BY_REF_N:
2673             op->args[pi++] = temp_arg(ts);
2674             break;
2675 
2676         case TCG_CALL_ARG_EXTEND_U:
2677         case TCG_CALL_ARG_EXTEND_S:
2678             {
2679                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2680                 TCGv_i32 orig = temp_tcgv_i32(ts);
2681 
2682                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2683                     tcg_gen_ext_i32_i64(temp, orig);
2684                 } else {
2685                     tcg_gen_extu_i32_i64(temp, orig);
2686                 }
2687                 op->args[pi++] = tcgv_i64_arg(temp);
2688                 extend_free[n_extend++] = temp;
2689             }
2690             break;
2691 
2692         default:
2693             g_assert_not_reached();
2694         }
2695     }
2696     op->args[pi++] = (uintptr_t)func;
2697     op->args[pi++] = (uintptr_t)info;
2698     tcg_debug_assert(pi == total_args);
2699 
2700     if (tcg_ctx->emit_before_op) {
2701         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2702     } else {
2703         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2704     }
2705 
2706     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2707     for (i = 0; i < n_extend; ++i) {
2708         tcg_temp_free_i64(extend_free[i]);
2709     }
2710 }
2711 
2712 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2713 {
2714     tcg_gen_callN(func, info, ret, NULL);
2715 }
2716 
2717 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2718 {
2719     tcg_gen_callN(func, info, ret, &t1);
2720 }
2721 
2722 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2723                    TCGTemp *t1, TCGTemp *t2)
2724 {
2725     TCGTemp *args[2] = { t1, t2 };
2726     tcg_gen_callN(func, info, ret, args);
2727 }
2728 
2729 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2730                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2731 {
2732     TCGTemp *args[3] = { t1, t2, t3 };
2733     tcg_gen_callN(func, info, ret, args);
2734 }
2735 
2736 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2737                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2738 {
2739     TCGTemp *args[4] = { t1, t2, t3, t4 };
2740     tcg_gen_callN(func, info, ret, args);
2741 }
2742 
2743 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2744                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2745 {
2746     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2747     tcg_gen_callN(func, info, ret, args);
2748 }
2749 
2750 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2751                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2752                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2753 {
2754     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2755     tcg_gen_callN(func, info, ret, args);
2756 }
2757 
2758 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2759                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2760                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2761 {
2762     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2763     tcg_gen_callN(func, info, ret, args);
2764 }
2765 
2766 static void tcg_reg_alloc_start(TCGContext *s)
2767 {
2768     int i, n;
2769 
2770     for (i = 0, n = s->nb_temps; i < n; i++) {
2771         TCGTemp *ts = &s->temps[i];
2772         TCGTempVal val = TEMP_VAL_MEM;
2773 
2774         switch (ts->kind) {
2775         case TEMP_CONST:
2776             val = TEMP_VAL_CONST;
2777             break;
2778         case TEMP_FIXED:
2779             val = TEMP_VAL_REG;
2780             break;
2781         case TEMP_GLOBAL:
2782             break;
2783         case TEMP_EBB:
2784             val = TEMP_VAL_DEAD;
2785             /* fall through */
2786         case TEMP_TB:
2787             ts->mem_allocated = 0;
2788             break;
2789         default:
2790             g_assert_not_reached();
2791         }
2792         ts->val_type = val;
2793     }
2794 
2795     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2796 }
2797 
2798 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2799                                  TCGTemp *ts)
2800 {
2801     int idx = temp_idx(ts);
2802 
2803     switch (ts->kind) {
2804     case TEMP_FIXED:
2805     case TEMP_GLOBAL:
2806         pstrcpy(buf, buf_size, ts->name);
2807         break;
2808     case TEMP_TB:
2809         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2810         break;
2811     case TEMP_EBB:
2812         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2813         break;
2814     case TEMP_CONST:
2815         switch (ts->type) {
2816         case TCG_TYPE_I32:
2817             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2818             break;
2819 #if TCG_TARGET_REG_BITS > 32
2820         case TCG_TYPE_I64:
2821             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2822             break;
2823 #endif
2824         case TCG_TYPE_V64:
2825         case TCG_TYPE_V128:
2826         case TCG_TYPE_V256:
2827             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2828                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2829             break;
2830         default:
2831             g_assert_not_reached();
2832         }
2833         break;
2834     }
2835     return buf;
2836 }
2837 
2838 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2839                              int buf_size, TCGArg arg)
2840 {
2841     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2842 }
2843 
2844 static const char * const cond_name[] =
2845 {
2846     [TCG_COND_NEVER] = "never",
2847     [TCG_COND_ALWAYS] = "always",
2848     [TCG_COND_EQ] = "eq",
2849     [TCG_COND_NE] = "ne",
2850     [TCG_COND_LT] = "lt",
2851     [TCG_COND_GE] = "ge",
2852     [TCG_COND_LE] = "le",
2853     [TCG_COND_GT] = "gt",
2854     [TCG_COND_LTU] = "ltu",
2855     [TCG_COND_GEU] = "geu",
2856     [TCG_COND_LEU] = "leu",
2857     [TCG_COND_GTU] = "gtu",
2858     [TCG_COND_TSTEQ] = "tsteq",
2859     [TCG_COND_TSTNE] = "tstne",
2860 };
2861 
2862 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2863 {
2864     [MO_UB]   = "ub",
2865     [MO_SB]   = "sb",
2866     [MO_LEUW] = "leuw",
2867     [MO_LESW] = "lesw",
2868     [MO_LEUL] = "leul",
2869     [MO_LESL] = "lesl",
2870     [MO_LEUQ] = "leq",
2871     [MO_BEUW] = "beuw",
2872     [MO_BESW] = "besw",
2873     [MO_BEUL] = "beul",
2874     [MO_BESL] = "besl",
2875     [MO_BEUQ] = "beq",
2876     [MO_128 + MO_BE] = "beo",
2877     [MO_128 + MO_LE] = "leo",
2878 };
2879 
2880 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2881     [MO_UNALN >> MO_ASHIFT]    = "un+",
2882     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2883     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2884     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2885     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2886     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2887     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2888     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2889 };
2890 
2891 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2892     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2893     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2894     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2895     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2896     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2897     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2898 };
2899 
2900 static const char bswap_flag_name[][6] = {
2901     [TCG_BSWAP_IZ] = "iz",
2902     [TCG_BSWAP_OZ] = "oz",
2903     [TCG_BSWAP_OS] = "os",
2904     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2905     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2906 };
2907 
2908 #ifdef CONFIG_PLUGIN
2909 static const char * const plugin_from_name[] = {
2910     "from-tb",
2911     "from-insn",
2912     "after-insn",
2913     "after-tb",
2914 };
2915 #endif
2916 
2917 static inline bool tcg_regset_single(TCGRegSet d)
2918 {
2919     return (d & (d - 1)) == 0;
2920 }
2921 
2922 static inline TCGReg tcg_regset_first(TCGRegSet d)
2923 {
2924     if (TCG_TARGET_NB_REGS <= 32) {
2925         return ctz32(d);
2926     } else {
2927         return ctz64(d);
2928     }
2929 }
2930 
2931 /* Return only the number of characters output -- no error return. */
2932 #define ne_fprintf(...) \
2933     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2934 
2935 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2936 {
2937     char buf[128];
2938     TCGOp *op;
2939 
2940     QTAILQ_FOREACH(op, &s->ops, link) {
2941         int i, k, nb_oargs, nb_iargs, nb_cargs;
2942         const TCGOpDef *def;
2943         TCGOpcode c;
2944         int col = 0;
2945 
2946         c = op->opc;
2947         def = &tcg_op_defs[c];
2948 
2949         if (c == INDEX_op_insn_start) {
2950             nb_oargs = 0;
2951             col += ne_fprintf(f, "\n ----");
2952 
2953             for (i = 0, k = INSN_START_WORDS; i < k; ++i) {
2954                 col += ne_fprintf(f, " %016" PRIx64,
2955                                   tcg_get_insn_start_param(op, i));
2956             }
2957         } else if (c == INDEX_op_call) {
2958             const TCGHelperInfo *info = tcg_call_info(op);
2959             void *func = tcg_call_func(op);
2960 
2961             /* variable number of arguments */
2962             nb_oargs = TCGOP_CALLO(op);
2963             nb_iargs = TCGOP_CALLI(op);
2964             nb_cargs = def->nb_cargs;
2965 
2966             col += ne_fprintf(f, " %s ", def->name);
2967 
2968             /*
2969              * Print the function name from TCGHelperInfo, if available.
2970              * Note that plugins have a template function for the info,
2971              * but the actual function pointer comes from the plugin.
2972              */
2973             if (func == info->func) {
2974                 col += ne_fprintf(f, "%s", info->name);
2975             } else {
2976                 col += ne_fprintf(f, "plugin(%p)", func);
2977             }
2978 
2979             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2980             for (i = 0; i < nb_oargs; i++) {
2981                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2982                                                             op->args[i]));
2983             }
2984             for (i = 0; i < nb_iargs; i++) {
2985                 TCGArg arg = op->args[nb_oargs + i];
2986                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2987                 col += ne_fprintf(f, ",%s", t);
2988             }
2989         } else {
2990             if (def->flags & TCG_OPF_INT) {
2991                 col += ne_fprintf(f, " %s_i%d ",
2992                                   def->name,
2993                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2994             } else if (def->flags & TCG_OPF_VECTOR) {
2995                 col += ne_fprintf(f, "%s v%d,e%d,",
2996                                   def->name,
2997                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2998                                   8 << TCGOP_VECE(op));
2999             } else {
3000                 col += ne_fprintf(f, " %s ", def->name);
3001             }
3002 
3003             nb_oargs = def->nb_oargs;
3004             nb_iargs = def->nb_iargs;
3005             nb_cargs = def->nb_cargs;
3006 
3007             k = 0;
3008             for (i = 0; i < nb_oargs; i++) {
3009                 const char *sep =  k ? "," : "";
3010                 col += ne_fprintf(f, "%s%s", sep,
3011                                   tcg_get_arg_str(s, buf, sizeof(buf),
3012                                                   op->args[k++]));
3013             }
3014             for (i = 0; i < nb_iargs; i++) {
3015                 const char *sep =  k ? "," : "";
3016                 col += ne_fprintf(f, "%s%s", sep,
3017                                   tcg_get_arg_str(s, buf, sizeof(buf),
3018                                                   op->args[k++]));
3019             }
3020             switch (c) {
3021             case INDEX_op_brcond:
3022             case INDEX_op_setcond:
3023             case INDEX_op_negsetcond:
3024             case INDEX_op_movcond:
3025             case INDEX_op_brcond2_i32:
3026             case INDEX_op_setcond2_i32:
3027             case INDEX_op_cmp_vec:
3028             case INDEX_op_cmpsel_vec:
3029                 if (op->args[k] < ARRAY_SIZE(cond_name)
3030                     && cond_name[op->args[k]]) {
3031                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
3032                 } else {
3033                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
3034                 }
3035                 i = 1;
3036                 break;
3037             case INDEX_op_qemu_ld:
3038             case INDEX_op_qemu_st:
3039             case INDEX_op_qemu_ld2:
3040             case INDEX_op_qemu_st2:
3041                 {
3042                     const char *s_al, *s_tlb, *s_op, *s_at;
3043                     MemOpIdx oi = op->args[k++];
3044                     MemOp mop = get_memop(oi);
3045                     unsigned ix = get_mmuidx(oi);
3046 
3047                     s_tlb = mop & MO_ALIGN_TLB_ONLY ? "tlb+" : "";
3048                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
3049                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
3050                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
3051                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE |
3052                              MO_ATOM_MASK | MO_ALIGN_TLB_ONLY);
3053 
3054                     /* If all fields are accounted for, print symbolically. */
3055                     if (!mop && s_al && s_op && s_at) {
3056                         col += ne_fprintf(f, ",%s%s%s%s,%u",
3057                                           s_at, s_al, s_tlb, s_op, ix);
3058                     } else {
3059                         mop = get_memop(oi);
3060                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
3061                     }
3062                     i = 1;
3063                 }
3064                 break;
3065             case INDEX_op_bswap16:
3066             case INDEX_op_bswap32:
3067             case INDEX_op_bswap64:
3068                 {
3069                     TCGArg flags = op->args[k];
3070                     const char *name = NULL;
3071 
3072                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
3073                         name = bswap_flag_name[flags];
3074                     }
3075                     if (name) {
3076                         col += ne_fprintf(f, ",%s", name);
3077                     } else {
3078                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
3079                     }
3080                     i = k = 1;
3081                 }
3082                 break;
3083 #ifdef CONFIG_PLUGIN
3084             case INDEX_op_plugin_cb:
3085                 {
3086                     TCGArg from = op->args[k++];
3087                     const char *name = NULL;
3088 
3089                     if (from < ARRAY_SIZE(plugin_from_name)) {
3090                         name = plugin_from_name[from];
3091                     }
3092                     if (name) {
3093                         col += ne_fprintf(f, "%s", name);
3094                     } else {
3095                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
3096                     }
3097                     i = 1;
3098                 }
3099                 break;
3100 #endif
3101             default:
3102                 i = 0;
3103                 break;
3104             }
3105             switch (c) {
3106             case INDEX_op_set_label:
3107             case INDEX_op_br:
3108             case INDEX_op_brcond:
3109             case INDEX_op_brcond2_i32:
3110                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
3111                                   arg_label(op->args[k])->id);
3112                 i++, k++;
3113                 break;
3114             case INDEX_op_mb:
3115                 {
3116                     TCGBar membar = op->args[k];
3117                     const char *b_op, *m_op;
3118 
3119                     switch (membar & TCG_BAR_SC) {
3120                     case 0:
3121                         b_op = "none";
3122                         break;
3123                     case TCG_BAR_LDAQ:
3124                         b_op = "acq";
3125                         break;
3126                     case TCG_BAR_STRL:
3127                         b_op = "rel";
3128                         break;
3129                     case TCG_BAR_SC:
3130                         b_op = "seq";
3131                         break;
3132                     default:
3133                         g_assert_not_reached();
3134                     }
3135 
3136                     switch (membar & TCG_MO_ALL) {
3137                     case 0:
3138                         m_op = "none";
3139                         break;
3140                     case TCG_MO_LD_LD:
3141                         m_op = "rr";
3142                         break;
3143                     case TCG_MO_LD_ST:
3144                         m_op = "rw";
3145                         break;
3146                     case TCG_MO_ST_LD:
3147                         m_op = "wr";
3148                         break;
3149                     case TCG_MO_ST_ST:
3150                         m_op = "ww";
3151                         break;
3152                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3153                         m_op = "rr+rw";
3154                         break;
3155                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3156                         m_op = "rr+wr";
3157                         break;
3158                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3159                         m_op = "rr+ww";
3160                         break;
3161                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3162                         m_op = "rw+wr";
3163                         break;
3164                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3165                         m_op = "rw+ww";
3166                         break;
3167                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3168                         m_op = "wr+ww";
3169                         break;
3170                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3171                         m_op = "rr+rw+wr";
3172                         break;
3173                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3174                         m_op = "rr+rw+ww";
3175                         break;
3176                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3177                         m_op = "rr+wr+ww";
3178                         break;
3179                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3180                         m_op = "rw+wr+ww";
3181                         break;
3182                     case TCG_MO_ALL:
3183                         m_op = "all";
3184                         break;
3185                     default:
3186                         g_assert_not_reached();
3187                     }
3188 
3189                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3190                     i++, k++;
3191                 }
3192                 break;
3193             default:
3194                 break;
3195             }
3196             for (; i < nb_cargs; i++, k++) {
3197                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3198                                   op->args[k]);
3199             }
3200         }
3201 
3202         if (have_prefs || op->life) {
3203             for (; col < 40; ++col) {
3204                 putc(' ', f);
3205             }
3206         }
3207 
3208         if (op->life) {
3209             unsigned life = op->life;
3210 
3211             if (life & (SYNC_ARG * 3)) {
3212                 ne_fprintf(f, "  sync:");
3213                 for (i = 0; i < 2; ++i) {
3214                     if (life & (SYNC_ARG << i)) {
3215                         ne_fprintf(f, " %d", i);
3216                     }
3217                 }
3218             }
3219             life /= DEAD_ARG;
3220             if (life) {
3221                 ne_fprintf(f, "  dead:");
3222                 for (i = 0; life; ++i, life >>= 1) {
3223                     if (life & 1) {
3224                         ne_fprintf(f, " %d", i);
3225                     }
3226                 }
3227             }
3228         }
3229 
3230         if (have_prefs) {
3231             for (i = 0; i < nb_oargs; ++i) {
3232                 TCGRegSet set = output_pref(op, i);
3233 
3234                 if (i == 0) {
3235                     ne_fprintf(f, "  pref=");
3236                 } else {
3237                     ne_fprintf(f, ",");
3238                 }
3239                 if (set == 0) {
3240                     ne_fprintf(f, "none");
3241                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3242                     ne_fprintf(f, "all");
3243 #ifdef CONFIG_DEBUG_TCG
3244                 } else if (tcg_regset_single(set)) {
3245                     TCGReg reg = tcg_regset_first(set);
3246                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3247 #endif
3248                 } else if (TCG_TARGET_NB_REGS <= 32) {
3249                     ne_fprintf(f, "0x%x", (uint32_t)set);
3250                 } else {
3251                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3252                 }
3253             }
3254         }
3255 
3256         putc('\n', f);
3257     }
3258 }
3259 
3260 /* we give more priority to constraints with less registers */
3261 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3262 {
3263     int n;
3264 
3265     arg_ct += k;
3266     n = ctpop64(arg_ct->regs);
3267 
3268     /*
3269      * Sort constraints of a single register first, which includes output
3270      * aliases (which must exactly match the input already allocated).
3271      */
3272     if (n == 1 || arg_ct->oalias) {
3273         return INT_MAX;
3274     }
3275 
3276     /*
3277      * Sort register pairs next, first then second immediately after.
3278      * Arbitrarily sort multiple pairs by the index of the first reg;
3279      * there shouldn't be many pairs.
3280      */
3281     switch (arg_ct->pair) {
3282     case 1:
3283     case 3:
3284         return (k + 1) * 2;
3285     case 2:
3286         return (arg_ct->pair_index + 1) * 2 - 1;
3287     }
3288 
3289     /* Finally, sort by decreasing register count. */
3290     assert(n > 1);
3291     return -n;
3292 }
3293 
3294 /* sort from highest priority to lowest */
3295 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3296 {
3297     int i, j;
3298 
3299     for (i = 0; i < n; i++) {
3300         a[start + i].sort_index = start + i;
3301     }
3302     if (n <= 1) {
3303         return;
3304     }
3305     for (i = 0; i < n - 1; i++) {
3306         for (j = i + 1; j < n; j++) {
3307             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3308             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3309             if (p1 < p2) {
3310                 int tmp = a[start + i].sort_index;
3311                 a[start + i].sort_index = a[start + j].sort_index;
3312                 a[start + j].sort_index = tmp;
3313             }
3314         }
3315     }
3316 }
3317 
3318 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3319 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3320 
3321 static void process_constraint_sets(void)
3322 {
3323     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3324         const TCGConstraintSet *tdefs = &constraint_sets[c];
3325         TCGArgConstraint *args_ct = all_cts[c];
3326         int nb_oargs = tdefs->nb_oargs;
3327         int nb_iargs = tdefs->nb_iargs;
3328         int nb_args = nb_oargs + nb_iargs;
3329         bool saw_alias_pair = false;
3330 
3331         for (int i = 0; i < nb_args; i++) {
3332             const char *ct_str = tdefs->args_ct_str[i];
3333             bool input_p = i >= nb_oargs;
3334             int o;
3335 
3336             switch (*ct_str) {
3337             case '0' ... '9':
3338                 o = *ct_str - '0';
3339                 tcg_debug_assert(input_p);
3340                 tcg_debug_assert(o < nb_oargs);
3341                 tcg_debug_assert(args_ct[o].regs != 0);
3342                 tcg_debug_assert(!args_ct[o].oalias);
3343                 args_ct[i] = args_ct[o];
3344                 /* The output sets oalias.  */
3345                 args_ct[o].oalias = 1;
3346                 args_ct[o].alias_index = i;
3347                 /* The input sets ialias. */
3348                 args_ct[i].ialias = 1;
3349                 args_ct[i].alias_index = o;
3350                 if (args_ct[i].pair) {
3351                     saw_alias_pair = true;
3352                 }
3353                 tcg_debug_assert(ct_str[1] == '\0');
3354                 continue;
3355 
3356             case '&':
3357                 tcg_debug_assert(!input_p);
3358                 args_ct[i].newreg = true;
3359                 ct_str++;
3360                 break;
3361 
3362             case 'p': /* plus */
3363                 /* Allocate to the register after the previous. */
3364                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3365                 o = i - 1;
3366                 tcg_debug_assert(!args_ct[o].pair);
3367                 tcg_debug_assert(!args_ct[o].ct);
3368                 args_ct[i] = (TCGArgConstraint){
3369                     .pair = 2,
3370                     .pair_index = o,
3371                     .regs = args_ct[o].regs << 1,
3372                     .newreg = args_ct[o].newreg,
3373                 };
3374                 args_ct[o].pair = 1;
3375                 args_ct[o].pair_index = i;
3376                 tcg_debug_assert(ct_str[1] == '\0');
3377                 continue;
3378 
3379             case 'm': /* minus */
3380                 /* Allocate to the register before the previous. */
3381                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3382                 o = i - 1;
3383                 tcg_debug_assert(!args_ct[o].pair);
3384                 tcg_debug_assert(!args_ct[o].ct);
3385                 args_ct[i] = (TCGArgConstraint){
3386                     .pair = 1,
3387                     .pair_index = o,
3388                     .regs = args_ct[o].regs >> 1,
3389                     .newreg = args_ct[o].newreg,
3390                 };
3391                 args_ct[o].pair = 2;
3392                 args_ct[o].pair_index = i;
3393                 tcg_debug_assert(ct_str[1] == '\0');
3394                 continue;
3395             }
3396 
3397             do {
3398                 switch (*ct_str) {
3399                 case 'i':
3400                     args_ct[i].ct |= TCG_CT_CONST;
3401                     break;
3402 #ifdef TCG_REG_ZERO
3403                 case 'z':
3404                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3405                     break;
3406 #endif
3407 
3408                 /* Include all of the target-specific constraints. */
3409 
3410 #undef CONST
3411 #define CONST(CASE, MASK) \
3412     case CASE: args_ct[i].ct |= MASK; break;
3413 #define REGS(CASE, MASK) \
3414     case CASE: args_ct[i].regs |= MASK; break;
3415 
3416 #include "tcg-target-con-str.h"
3417 
3418 #undef REGS
3419 #undef CONST
3420                 default:
3421                 case '0' ... '9':
3422                 case '&':
3423                 case 'p':
3424                 case 'm':
3425                     /* Typo in TCGConstraintSet constraint. */
3426                     g_assert_not_reached();
3427                 }
3428             } while (*++ct_str != '\0');
3429         }
3430 
3431         /*
3432          * Fix up output pairs that are aliased with inputs.
3433          * When we created the alias, we copied pair from the output.
3434          * There are three cases:
3435          *    (1a) Pairs of inputs alias pairs of outputs.
3436          *    (1b) One input aliases the first of a pair of outputs.
3437          *    (2)  One input aliases the second of a pair of outputs.
3438          *
3439          * Case 1a is handled by making sure that the pair_index'es are
3440          * properly updated so that they appear the same as a pair of inputs.
3441          *
3442          * Case 1b is handled by setting the pair_index of the input to
3443          * itself, simply so it doesn't point to an unrelated argument.
3444          * Since we don't encounter the "second" during the input allocation
3445          * phase, nothing happens with the second half of the input pair.
3446          *
3447          * Case 2 is handled by setting the second input to pair=3, the
3448          * first output to pair=3, and the pair_index'es to match.
3449          */
3450         if (saw_alias_pair) {
3451             for (int i = nb_oargs; i < nb_args; i++) {
3452                 int o, o2, i2;
3453 
3454                 /*
3455                  * Since [0-9pm] must be alone in the constraint string,
3456                  * the only way they can both be set is if the pair comes
3457                  * from the output alias.
3458                  */
3459                 if (!args_ct[i].ialias) {
3460                     continue;
3461                 }
3462                 switch (args_ct[i].pair) {
3463                 case 0:
3464                     break;
3465                 case 1:
3466                     o = args_ct[i].alias_index;
3467                     o2 = args_ct[o].pair_index;
3468                     tcg_debug_assert(args_ct[o].pair == 1);
3469                     tcg_debug_assert(args_ct[o2].pair == 2);
3470                     if (args_ct[o2].oalias) {
3471                         /* Case 1a */
3472                         i2 = args_ct[o2].alias_index;
3473                         tcg_debug_assert(args_ct[i2].pair == 2);
3474                         args_ct[i2].pair_index = i;
3475                         args_ct[i].pair_index = i2;
3476                     } else {
3477                         /* Case 1b */
3478                         args_ct[i].pair_index = i;
3479                     }
3480                     break;
3481                 case 2:
3482                     o = args_ct[i].alias_index;
3483                     o2 = args_ct[o].pair_index;
3484                     tcg_debug_assert(args_ct[o].pair == 2);
3485                     tcg_debug_assert(args_ct[o2].pair == 1);
3486                     if (args_ct[o2].oalias) {
3487                         /* Case 1a */
3488                         i2 = args_ct[o2].alias_index;
3489                         tcg_debug_assert(args_ct[i2].pair == 1);
3490                         args_ct[i2].pair_index = i;
3491                         args_ct[i].pair_index = i2;
3492                     } else {
3493                         /* Case 2 */
3494                         args_ct[i].pair = 3;
3495                         args_ct[o2].pair = 3;
3496                         args_ct[i].pair_index = o2;
3497                         args_ct[o2].pair_index = i;
3498                     }
3499                     break;
3500                 default:
3501                     g_assert_not_reached();
3502                 }
3503             }
3504         }
3505 
3506         /* sort the constraints (XXX: this is just an heuristic) */
3507         sort_constraints(args_ct, 0, nb_oargs);
3508         sort_constraints(args_ct, nb_oargs, nb_iargs);
3509     }
3510 }
3511 
3512 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3513 {
3514     TCGOpcode opc = op->opc;
3515     TCGType type = TCGOP_TYPE(op);
3516     unsigned flags = TCGOP_FLAGS(op);
3517     const TCGOpDef *def = &tcg_op_defs[opc];
3518     const TCGOutOp *outop = all_outop[opc];
3519     TCGConstraintSetIndex con_set;
3520 
3521     if (def->flags & TCG_OPF_NOT_PRESENT) {
3522         return empty_cts;
3523     }
3524 
3525     if (outop) {
3526         con_set = outop->static_constraint;
3527         if (con_set == C_Dynamic) {
3528             con_set = outop->dynamic_constraint(type, flags);
3529         }
3530     } else {
3531         con_set = tcg_target_op_def(opc, type, flags);
3532     }
3533     tcg_debug_assert(con_set >= 0);
3534     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3535 
3536     /* The constraint arguments must match TCGOpcode arguments. */
3537     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3538     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3539 
3540     return all_cts[con_set];
3541 }
3542 
3543 static void remove_label_use(TCGOp *op, int idx)
3544 {
3545     TCGLabel *label = arg_label(op->args[idx]);
3546     TCGLabelUse *use;
3547 
3548     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3549         if (use->op == op) {
3550             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3551             return;
3552         }
3553     }
3554     g_assert_not_reached();
3555 }
3556 
3557 void tcg_op_remove(TCGContext *s, TCGOp *op)
3558 {
3559     switch (op->opc) {
3560     case INDEX_op_br:
3561         remove_label_use(op, 0);
3562         break;
3563     case INDEX_op_brcond:
3564         remove_label_use(op, 3);
3565         break;
3566     case INDEX_op_brcond2_i32:
3567         remove_label_use(op, 5);
3568         break;
3569     default:
3570         break;
3571     }
3572 
3573     QTAILQ_REMOVE(&s->ops, op, link);
3574     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3575     s->nb_ops--;
3576 }
3577 
3578 void tcg_remove_ops_after(TCGOp *op)
3579 {
3580     TCGContext *s = tcg_ctx;
3581 
3582     while (true) {
3583         TCGOp *last = tcg_last_op();
3584         if (last == op) {
3585             return;
3586         }
3587         tcg_op_remove(s, last);
3588     }
3589 }
3590 
3591 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3592 {
3593     TCGContext *s = tcg_ctx;
3594     TCGOp *op = NULL;
3595 
3596     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3597         QTAILQ_FOREACH(op, &s->free_ops, link) {
3598             if (nargs <= op->nargs) {
3599                 QTAILQ_REMOVE(&s->free_ops, op, link);
3600                 nargs = op->nargs;
3601                 goto found;
3602             }
3603         }
3604     }
3605 
3606     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3607     nargs = MAX(4, nargs);
3608     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3609 
3610  found:
3611     memset(op, 0, offsetof(TCGOp, link));
3612     op->opc = opc;
3613     op->nargs = nargs;
3614 
3615     /* Check for bitfield overflow. */
3616     tcg_debug_assert(op->nargs == nargs);
3617 
3618     s->nb_ops++;
3619     return op;
3620 }
3621 
3622 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3623 {
3624     TCGOp *op = tcg_op_alloc(opc, nargs);
3625 
3626     if (tcg_ctx->emit_before_op) {
3627         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3628     } else {
3629         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3630     }
3631     return op;
3632 }
3633 
3634 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3635                             TCGOpcode opc, TCGType type, unsigned nargs)
3636 {
3637     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3638 
3639     TCGOP_TYPE(new_op) = type;
3640     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3641     return new_op;
3642 }
3643 
3644 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3645                            TCGOpcode opc, TCGType type, unsigned nargs)
3646 {
3647     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3648 
3649     TCGOP_TYPE(new_op) = type;
3650     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3651     return new_op;
3652 }
3653 
3654 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3655 {
3656     TCGLabelUse *u;
3657 
3658     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3659         TCGOp *op = u->op;
3660         switch (op->opc) {
3661         case INDEX_op_br:
3662             op->args[0] = label_arg(to);
3663             break;
3664         case INDEX_op_brcond:
3665             op->args[3] = label_arg(to);
3666             break;
3667         case INDEX_op_brcond2_i32:
3668             op->args[5] = label_arg(to);
3669             break;
3670         default:
3671             g_assert_not_reached();
3672         }
3673     }
3674 
3675     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3676 }
3677 
3678 /* Reachable analysis : remove unreachable code.  */
3679 static void __attribute__((noinline))
3680 reachable_code_pass(TCGContext *s)
3681 {
3682     TCGOp *op, *op_next, *op_prev;
3683     bool dead = false;
3684 
3685     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3686         bool remove = dead;
3687         TCGLabel *label;
3688 
3689         switch (op->opc) {
3690         case INDEX_op_set_label:
3691             label = arg_label(op->args[0]);
3692 
3693             /*
3694              * Note that the first op in the TB is always a load,
3695              * so there is always something before a label.
3696              */
3697             op_prev = QTAILQ_PREV(op, link);
3698 
3699             /*
3700              * If we find two sequential labels, move all branches to
3701              * reference the second label and remove the first label.
3702              * Do this before branch to next optimization, so that the
3703              * middle label is out of the way.
3704              */
3705             if (op_prev->opc == INDEX_op_set_label) {
3706                 move_label_uses(label, arg_label(op_prev->args[0]));
3707                 tcg_op_remove(s, op_prev);
3708                 op_prev = QTAILQ_PREV(op, link);
3709             }
3710 
3711             /*
3712              * Optimization can fold conditional branches to unconditional.
3713              * If we find a label which is preceded by an unconditional
3714              * branch to next, remove the branch.  We couldn't do this when
3715              * processing the branch because any dead code between the branch
3716              * and label had not yet been removed.
3717              */
3718             if (op_prev->opc == INDEX_op_br &&
3719                 label == arg_label(op_prev->args[0])) {
3720                 tcg_op_remove(s, op_prev);
3721                 /* Fall through means insns become live again.  */
3722                 dead = false;
3723             }
3724 
3725             if (QSIMPLEQ_EMPTY(&label->branches)) {
3726                 /*
3727                  * While there is an occasional backward branch, virtually
3728                  * all branches generated by the translators are forward.
3729                  * Which means that generally we will have already removed
3730                  * all references to the label that will be, and there is
3731                  * little to be gained by iterating.
3732                  */
3733                 remove = true;
3734             } else {
3735                 /* Once we see a label, insns become live again.  */
3736                 dead = false;
3737                 remove = false;
3738             }
3739             break;
3740 
3741         case INDEX_op_br:
3742         case INDEX_op_exit_tb:
3743         case INDEX_op_goto_ptr:
3744             /* Unconditional branches; everything following is dead.  */
3745             dead = true;
3746             break;
3747 
3748         case INDEX_op_call:
3749             /* Notice noreturn helper calls, raising exceptions.  */
3750             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3751                 dead = true;
3752             }
3753             break;
3754 
3755         case INDEX_op_insn_start:
3756             /* Never remove -- we need to keep these for unwind.  */
3757             remove = false;
3758             break;
3759 
3760         default:
3761             break;
3762         }
3763 
3764         if (remove) {
3765             tcg_op_remove(s, op);
3766         }
3767     }
3768 }
3769 
3770 #define TS_DEAD  1
3771 #define TS_MEM   2
3772 
3773 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3774 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3775 
3776 /* For liveness_pass_1, the register preferences for a given temp.  */
3777 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3778 {
3779     return ts->state_ptr;
3780 }
3781 
3782 /* For liveness_pass_1, reset the preferences for a given temp to the
3783  * maximal regset for its type.
3784  */
3785 static inline void la_reset_pref(TCGTemp *ts)
3786 {
3787     *la_temp_pref(ts)
3788         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3789 }
3790 
3791 /* liveness analysis: end of function: all temps are dead, and globals
3792    should be in memory. */
3793 static void la_func_end(TCGContext *s, int ng, int nt)
3794 {
3795     int i;
3796 
3797     for (i = 0; i < ng; ++i) {
3798         s->temps[i].state = TS_DEAD | TS_MEM;
3799         la_reset_pref(&s->temps[i]);
3800     }
3801     for (i = ng; i < nt; ++i) {
3802         s->temps[i].state = TS_DEAD;
3803         la_reset_pref(&s->temps[i]);
3804     }
3805 }
3806 
3807 /* liveness analysis: end of basic block: all temps are dead, globals
3808    and local temps should be in memory. */
3809 static void la_bb_end(TCGContext *s, int ng, int nt)
3810 {
3811     int i;
3812 
3813     for (i = 0; i < nt; ++i) {
3814         TCGTemp *ts = &s->temps[i];
3815         int state;
3816 
3817         switch (ts->kind) {
3818         case TEMP_FIXED:
3819         case TEMP_GLOBAL:
3820         case TEMP_TB:
3821             state = TS_DEAD | TS_MEM;
3822             break;
3823         case TEMP_EBB:
3824         case TEMP_CONST:
3825             state = TS_DEAD;
3826             break;
3827         default:
3828             g_assert_not_reached();
3829         }
3830         ts->state = state;
3831         la_reset_pref(ts);
3832     }
3833 }
3834 
3835 /* liveness analysis: sync globals back to memory.  */
3836 static void la_global_sync(TCGContext *s, int ng)
3837 {
3838     int i;
3839 
3840     for (i = 0; i < ng; ++i) {
3841         int state = s->temps[i].state;
3842         s->temps[i].state = state | TS_MEM;
3843         if (state == TS_DEAD) {
3844             /* If the global was previously dead, reset prefs.  */
3845             la_reset_pref(&s->temps[i]);
3846         }
3847     }
3848 }
3849 
3850 /*
3851  * liveness analysis: conditional branch: all temps are dead unless
3852  * explicitly live-across-conditional-branch, globals and local temps
3853  * should be synced.
3854  */
3855 static void la_bb_sync(TCGContext *s, int ng, int nt)
3856 {
3857     la_global_sync(s, ng);
3858 
3859     for (int i = ng; i < nt; ++i) {
3860         TCGTemp *ts = &s->temps[i];
3861         int state;
3862 
3863         switch (ts->kind) {
3864         case TEMP_TB:
3865             state = ts->state;
3866             ts->state = state | TS_MEM;
3867             if (state != TS_DEAD) {
3868                 continue;
3869             }
3870             break;
3871         case TEMP_EBB:
3872         case TEMP_CONST:
3873             continue;
3874         default:
3875             g_assert_not_reached();
3876         }
3877         la_reset_pref(&s->temps[i]);
3878     }
3879 }
3880 
3881 /* liveness analysis: sync globals back to memory and kill.  */
3882 static void la_global_kill(TCGContext *s, int ng)
3883 {
3884     int i;
3885 
3886     for (i = 0; i < ng; i++) {
3887         s->temps[i].state = TS_DEAD | TS_MEM;
3888         la_reset_pref(&s->temps[i]);
3889     }
3890 }
3891 
3892 /* liveness analysis: note live globals crossing calls.  */
3893 static void la_cross_call(TCGContext *s, int nt)
3894 {
3895     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3896     int i;
3897 
3898     for (i = 0; i < nt; i++) {
3899         TCGTemp *ts = &s->temps[i];
3900         if (!(ts->state & TS_DEAD)) {
3901             TCGRegSet *pset = la_temp_pref(ts);
3902             TCGRegSet set = *pset;
3903 
3904             set &= mask;
3905             /* If the combination is not possible, restart.  */
3906             if (set == 0) {
3907                 set = tcg_target_available_regs[ts->type] & mask;
3908             }
3909             *pset = set;
3910         }
3911     }
3912 }
3913 
3914 /*
3915  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3916  * to TEMP_EBB, if possible.
3917  */
3918 static void __attribute__((noinline))
3919 liveness_pass_0(TCGContext *s)
3920 {
3921     void * const multiple_ebb = (void *)(uintptr_t)-1;
3922     int nb_temps = s->nb_temps;
3923     TCGOp *op, *ebb;
3924 
3925     for (int i = s->nb_globals; i < nb_temps; ++i) {
3926         s->temps[i].state_ptr = NULL;
3927     }
3928 
3929     /*
3930      * Represent each EBB by the op at which it begins.  In the case of
3931      * the first EBB, this is the first op, otherwise it is a label.
3932      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3933      * within a single EBB, else MULTIPLE_EBB.
3934      */
3935     ebb = QTAILQ_FIRST(&s->ops);
3936     QTAILQ_FOREACH(op, &s->ops, link) {
3937         const TCGOpDef *def;
3938         int nb_oargs, nb_iargs;
3939 
3940         switch (op->opc) {
3941         case INDEX_op_set_label:
3942             ebb = op;
3943             continue;
3944         case INDEX_op_discard:
3945             continue;
3946         case INDEX_op_call:
3947             nb_oargs = TCGOP_CALLO(op);
3948             nb_iargs = TCGOP_CALLI(op);
3949             break;
3950         default:
3951             def = &tcg_op_defs[op->opc];
3952             nb_oargs = def->nb_oargs;
3953             nb_iargs = def->nb_iargs;
3954             break;
3955         }
3956 
3957         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3958             TCGTemp *ts = arg_temp(op->args[i]);
3959 
3960             if (ts->kind != TEMP_TB) {
3961                 continue;
3962             }
3963             if (ts->state_ptr == NULL) {
3964                 ts->state_ptr = ebb;
3965             } else if (ts->state_ptr != ebb) {
3966                 ts->state_ptr = multiple_ebb;
3967             }
3968         }
3969     }
3970 
3971     /*
3972      * For TEMP_TB that turned out not to be used beyond one EBB,
3973      * reduce the liveness to TEMP_EBB.
3974      */
3975     for (int i = s->nb_globals; i < nb_temps; ++i) {
3976         TCGTemp *ts = &s->temps[i];
3977         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3978             ts->kind = TEMP_EBB;
3979         }
3980     }
3981 }
3982 
3983 static void assert_carry_dead(TCGContext *s)
3984 {
3985     /*
3986      * Carry operations can be separated by a few insns like mov,
3987      * load or store, but they should always be "close", and
3988      * carry-out operations should always be paired with carry-in.
3989      * At various boundaries, carry must have been consumed.
3990      */
3991     tcg_debug_assert(!s->carry_live);
3992 }
3993 
3994 /* Liveness analysis : update the opc_arg_life array to tell if a
3995    given input arguments is dead. Instructions updating dead
3996    temporaries are removed. */
3997 static void __attribute__((noinline))
3998 liveness_pass_1(TCGContext *s)
3999 {
4000     int nb_globals = s->nb_globals;
4001     int nb_temps = s->nb_temps;
4002     TCGOp *op, *op_prev;
4003     TCGRegSet *prefs;
4004 
4005     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
4006     for (int i = 0; i < nb_temps; ++i) {
4007         s->temps[i].state_ptr = prefs + i;
4008     }
4009 
4010     /* ??? Should be redundant with the exit_tb that ends the TB.  */
4011     la_func_end(s, nb_globals, nb_temps);
4012 
4013     s->carry_live = false;
4014     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
4015         int nb_iargs, nb_oargs;
4016         TCGOpcode opc_new, opc_new2;
4017         TCGLifeData arg_life = 0;
4018         TCGTemp *ts;
4019         TCGOpcode opc = op->opc;
4020         const TCGOpDef *def;
4021         const TCGArgConstraint *args_ct;
4022 
4023         switch (opc) {
4024         case INDEX_op_call:
4025             assert_carry_dead(s);
4026             {
4027                 const TCGHelperInfo *info = tcg_call_info(op);
4028                 int call_flags = tcg_call_flags(op);
4029 
4030                 nb_oargs = TCGOP_CALLO(op);
4031                 nb_iargs = TCGOP_CALLI(op);
4032 
4033                 /* pure functions can be removed if their result is unused */
4034                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
4035                     for (int i = 0; i < nb_oargs; i++) {
4036                         ts = arg_temp(op->args[i]);
4037                         if (ts->state != TS_DEAD) {
4038                             goto do_not_remove_call;
4039                         }
4040                     }
4041                     goto do_remove;
4042                 }
4043             do_not_remove_call:
4044 
4045                 /* Output args are dead.  */
4046                 for (int i = 0; i < nb_oargs; i++) {
4047                     ts = arg_temp(op->args[i]);
4048                     if (ts->state & TS_DEAD) {
4049                         arg_life |= DEAD_ARG << i;
4050                     }
4051                     if (ts->state & TS_MEM) {
4052                         arg_life |= SYNC_ARG << i;
4053                     }
4054                     ts->state = TS_DEAD;
4055                     la_reset_pref(ts);
4056                 }
4057 
4058                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
4059                 memset(op->output_pref, 0, sizeof(op->output_pref));
4060 
4061                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
4062                                     TCG_CALL_NO_READ_GLOBALS))) {
4063                     la_global_kill(s, nb_globals);
4064                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
4065                     la_global_sync(s, nb_globals);
4066                 }
4067 
4068                 /* Record arguments that die in this helper.  */
4069                 for (int i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4070                     ts = arg_temp(op->args[i]);
4071                     if (ts->state & TS_DEAD) {
4072                         arg_life |= DEAD_ARG << i;
4073                     }
4074                 }
4075 
4076                 /* For all live registers, remove call-clobbered prefs.  */
4077                 la_cross_call(s, nb_temps);
4078 
4079                 /*
4080                  * Input arguments are live for preceding opcodes.
4081                  *
4082                  * For those arguments that die, and will be allocated in
4083                  * registers, clear the register set for that arg, to be
4084                  * filled in below.  For args that will be on the stack,
4085                  * reset to any available reg.  Process arguments in reverse
4086                  * order so that if a temp is used more than once, the stack
4087                  * reset to max happens before the register reset to 0.
4088                  */
4089                 for (int i = nb_iargs - 1; i >= 0; i--) {
4090                     const TCGCallArgumentLoc *loc = &info->in[i];
4091                     ts = arg_temp(op->args[nb_oargs + i]);
4092 
4093                     if (ts->state & TS_DEAD) {
4094                         switch (loc->kind) {
4095                         case TCG_CALL_ARG_NORMAL:
4096                         case TCG_CALL_ARG_EXTEND_U:
4097                         case TCG_CALL_ARG_EXTEND_S:
4098                             if (arg_slot_reg_p(loc->arg_slot)) {
4099                                 *la_temp_pref(ts) = 0;
4100                                 break;
4101                             }
4102                             /* fall through */
4103                         default:
4104                             *la_temp_pref(ts) =
4105                                 tcg_target_available_regs[ts->type];
4106                             break;
4107                         }
4108                         ts->state &= ~TS_DEAD;
4109                     }
4110                 }
4111 
4112                 /*
4113                  * For each input argument, add its input register to prefs.
4114                  * If a temp is used once, this produces a single set bit;
4115                  * if a temp is used multiple times, this produces a set.
4116                  */
4117                 for (int i = 0; i < nb_iargs; i++) {
4118                     const TCGCallArgumentLoc *loc = &info->in[i];
4119                     ts = arg_temp(op->args[nb_oargs + i]);
4120 
4121                     switch (loc->kind) {
4122                     case TCG_CALL_ARG_NORMAL:
4123                     case TCG_CALL_ARG_EXTEND_U:
4124                     case TCG_CALL_ARG_EXTEND_S:
4125                         if (arg_slot_reg_p(loc->arg_slot)) {
4126                             tcg_regset_set_reg(*la_temp_pref(ts),
4127                                 tcg_target_call_iarg_regs[loc->arg_slot]);
4128                         }
4129                         break;
4130                     default:
4131                         break;
4132                     }
4133                 }
4134             }
4135             break;
4136         case INDEX_op_insn_start:
4137             assert_carry_dead(s);
4138             break;
4139         case INDEX_op_discard:
4140             /* mark the temporary as dead */
4141             ts = arg_temp(op->args[0]);
4142             ts->state = TS_DEAD;
4143             la_reset_pref(ts);
4144             break;
4145 
4146         case INDEX_op_muls2:
4147             opc_new = INDEX_op_mul;
4148             opc_new2 = INDEX_op_mulsh;
4149             goto do_mul2;
4150         case INDEX_op_mulu2:
4151             opc_new = INDEX_op_mul;
4152             opc_new2 = INDEX_op_muluh;
4153         do_mul2:
4154             assert_carry_dead(s);
4155             if (arg_temp(op->args[1])->state == TS_DEAD) {
4156                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4157                     /* Both parts of the operation are dead.  */
4158                     goto do_remove;
4159                 }
4160                 /* The high part of the operation is dead; generate the low. */
4161                 op->opc = opc = opc_new;
4162                 op->args[1] = op->args[2];
4163                 op->args[2] = op->args[3];
4164             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4165                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4166                 /* The low part of the operation is dead; generate the high. */
4167                 op->opc = opc = opc_new2;
4168                 op->args[0] = op->args[1];
4169                 op->args[1] = op->args[2];
4170                 op->args[2] = op->args[3];
4171             } else {
4172                 goto do_not_remove;
4173             }
4174             /* Mark the single-word operation live.  */
4175             goto do_not_remove;
4176 
4177         case INDEX_op_addco:
4178             if (s->carry_live) {
4179                 goto do_not_remove;
4180             }
4181             op->opc = opc = INDEX_op_add;
4182             goto do_default;
4183 
4184         case INDEX_op_addcio:
4185             if (s->carry_live) {
4186                 goto do_not_remove;
4187             }
4188             op->opc = opc = INDEX_op_addci;
4189             goto do_default;
4190 
4191         case INDEX_op_subbo:
4192             if (s->carry_live) {
4193                 goto do_not_remove;
4194             }
4195             /* Lower to sub, but this may also require canonicalization. */
4196             op->opc = opc = INDEX_op_sub;
4197             ts = arg_temp(op->args[2]);
4198             if (ts->kind == TEMP_CONST) {
4199                 ts = tcg_constant_internal(ts->type, -ts->val);
4200                 if (ts->state_ptr == NULL) {
4201                     tcg_debug_assert(temp_idx(ts) == nb_temps);
4202                     nb_temps++;
4203                     ts->state_ptr = tcg_malloc(sizeof(TCGRegSet));
4204                     ts->state = TS_DEAD;
4205                     la_reset_pref(ts);
4206                 }
4207                 op->args[2] = temp_arg(ts);
4208                 op->opc = opc = INDEX_op_add;
4209             }
4210             goto do_default;
4211 
4212         case INDEX_op_subbio:
4213             if (s->carry_live) {
4214                 goto do_not_remove;
4215             }
4216             op->opc = opc = INDEX_op_subbi;
4217             goto do_default;
4218 
4219         case INDEX_op_addc1o:
4220             if (s->carry_live) {
4221                 goto do_not_remove;
4222             }
4223             /* Lower to add, add +1. */
4224             op_prev = tcg_op_insert_before(s, op, INDEX_op_add,
4225                                            TCGOP_TYPE(op), 3);
4226             op_prev->args[0] = op->args[0];
4227             op_prev->args[1] = op->args[1];
4228             op_prev->args[2] = op->args[2];
4229             op->opc = opc = INDEX_op_add;
4230             op->args[1] = op->args[0];
4231             ts = arg_temp(op->args[0]);
4232             ts = tcg_constant_internal(ts->type, 1);
4233             op->args[2] = temp_arg(ts);
4234             goto do_default;
4235 
4236         case INDEX_op_subb1o:
4237             if (s->carry_live) {
4238                 goto do_not_remove;
4239             }
4240             /* Lower to sub, add -1. */
4241             op_prev = tcg_op_insert_before(s, op, INDEX_op_sub,
4242                                            TCGOP_TYPE(op), 3);
4243             op_prev->args[0] = op->args[0];
4244             op_prev->args[1] = op->args[1];
4245             op_prev->args[2] = op->args[2];
4246             op->opc = opc = INDEX_op_add;
4247             op->args[1] = op->args[0];
4248             ts = arg_temp(op->args[0]);
4249             ts = tcg_constant_internal(ts->type, -1);
4250             op->args[2] = temp_arg(ts);
4251             goto do_default;
4252 
4253         default:
4254         do_default:
4255             /*
4256              * Test if the operation can be removed because all
4257              * its outputs are dead. We assume that nb_oargs == 0
4258              * implies side effects.
4259              */
4260             def = &tcg_op_defs[opc];
4261             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && def->nb_oargs != 0) {
4262                 for (int i = def->nb_oargs - 1; i >= 0; i--) {
4263                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4264                         goto do_not_remove;
4265                     }
4266                 }
4267                 goto do_remove;
4268             }
4269             goto do_not_remove;
4270 
4271         do_remove:
4272             tcg_op_remove(s, op);
4273             break;
4274 
4275         do_not_remove:
4276             def = &tcg_op_defs[opc];
4277             nb_iargs = def->nb_iargs;
4278             nb_oargs = def->nb_oargs;
4279 
4280             for (int i = 0; i < nb_oargs; i++) {
4281                 ts = arg_temp(op->args[i]);
4282 
4283                 /* Remember the preference of the uses that followed.  */
4284                 if (i < ARRAY_SIZE(op->output_pref)) {
4285                     op->output_pref[i] = *la_temp_pref(ts);
4286                 }
4287 
4288                 /* Output args are dead.  */
4289                 if (ts->state & TS_DEAD) {
4290                     arg_life |= DEAD_ARG << i;
4291                 }
4292                 if (ts->state & TS_MEM) {
4293                     arg_life |= SYNC_ARG << i;
4294                 }
4295                 ts->state = TS_DEAD;
4296                 la_reset_pref(ts);
4297             }
4298 
4299             /* If end of basic block, update.  */
4300             if (def->flags & TCG_OPF_BB_EXIT) {
4301                 assert_carry_dead(s);
4302                 la_func_end(s, nb_globals, nb_temps);
4303             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4304                 assert_carry_dead(s);
4305                 la_bb_sync(s, nb_globals, nb_temps);
4306             } else if (def->flags & TCG_OPF_BB_END) {
4307                 assert_carry_dead(s);
4308                 la_bb_end(s, nb_globals, nb_temps);
4309             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4310                 assert_carry_dead(s);
4311                 la_global_sync(s, nb_globals);
4312                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4313                     la_cross_call(s, nb_temps);
4314                 }
4315             }
4316 
4317             /* Record arguments that die in this opcode.  */
4318             for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4319                 ts = arg_temp(op->args[i]);
4320                 if (ts->state & TS_DEAD) {
4321                     arg_life |= DEAD_ARG << i;
4322                 }
4323             }
4324             if (def->flags & TCG_OPF_CARRY_OUT) {
4325                 s->carry_live = false;
4326             }
4327 
4328             /* Input arguments are live for preceding opcodes.  */
4329             for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4330                 ts = arg_temp(op->args[i]);
4331                 if (ts->state & TS_DEAD) {
4332                     /* For operands that were dead, initially allow
4333                        all regs for the type.  */
4334                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4335                     ts->state &= ~TS_DEAD;
4336                 }
4337             }
4338             if (def->flags & TCG_OPF_CARRY_IN) {
4339                 s->carry_live = true;
4340             }
4341 
4342             /* Incorporate constraints for this operand.  */
4343             switch (opc) {
4344             case INDEX_op_mov:
4345                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4346                    have proper constraints.  That said, special case
4347                    moves to propagate preferences backward.  */
4348                 if (IS_DEAD_ARG(1)) {
4349                     *la_temp_pref(arg_temp(op->args[0]))
4350                         = *la_temp_pref(arg_temp(op->args[1]));
4351                 }
4352                 break;
4353 
4354             default:
4355                 args_ct = opcode_args_ct(op);
4356                 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4357                     const TCGArgConstraint *ct = &args_ct[i];
4358                     TCGRegSet set, *pset;
4359 
4360                     ts = arg_temp(op->args[i]);
4361                     pset = la_temp_pref(ts);
4362                     set = *pset;
4363 
4364                     set &= ct->regs;
4365                     if (ct->ialias) {
4366                         set &= output_pref(op, ct->alias_index);
4367                     }
4368                     /* If the combination is not possible, restart.  */
4369                     if (set == 0) {
4370                         set = ct->regs;
4371                     }
4372                     *pset = set;
4373                 }
4374                 break;
4375             }
4376             break;
4377         }
4378         op->life = arg_life;
4379     }
4380     assert_carry_dead(s);
4381 }
4382 
4383 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4384 static bool __attribute__((noinline))
4385 liveness_pass_2(TCGContext *s)
4386 {
4387     int nb_globals = s->nb_globals;
4388     int nb_temps, i;
4389     bool changes = false;
4390     TCGOp *op, *op_next;
4391 
4392     /* Create a temporary for each indirect global.  */
4393     for (i = 0; i < nb_globals; ++i) {
4394         TCGTemp *its = &s->temps[i];
4395         if (its->indirect_reg) {
4396             TCGTemp *dts = tcg_temp_alloc(s);
4397             dts->type = its->type;
4398             dts->base_type = its->base_type;
4399             dts->temp_subindex = its->temp_subindex;
4400             dts->kind = TEMP_EBB;
4401             its->state_ptr = dts;
4402         } else {
4403             its->state_ptr = NULL;
4404         }
4405         /* All globals begin dead.  */
4406         its->state = TS_DEAD;
4407     }
4408     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4409         TCGTemp *its = &s->temps[i];
4410         its->state_ptr = NULL;
4411         its->state = TS_DEAD;
4412     }
4413 
4414     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4415         TCGOpcode opc = op->opc;
4416         const TCGOpDef *def = &tcg_op_defs[opc];
4417         TCGLifeData arg_life = op->life;
4418         int nb_iargs, nb_oargs, call_flags;
4419         TCGTemp *arg_ts, *dir_ts;
4420 
4421         if (opc == INDEX_op_call) {
4422             nb_oargs = TCGOP_CALLO(op);
4423             nb_iargs = TCGOP_CALLI(op);
4424             call_flags = tcg_call_flags(op);
4425         } else {
4426             nb_iargs = def->nb_iargs;
4427             nb_oargs = def->nb_oargs;
4428 
4429             /* Set flags similar to how calls require.  */
4430             if (def->flags & TCG_OPF_COND_BRANCH) {
4431                 /* Like reading globals: sync_globals */
4432                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4433             } else if (def->flags & TCG_OPF_BB_END) {
4434                 /* Like writing globals: save_globals */
4435                 call_flags = 0;
4436             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4437                 /* Like reading globals: sync_globals */
4438                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4439             } else {
4440                 /* No effect on globals.  */
4441                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4442                               TCG_CALL_NO_WRITE_GLOBALS);
4443             }
4444         }
4445 
4446         /* Make sure that input arguments are available.  */
4447         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4448             arg_ts = arg_temp(op->args[i]);
4449             dir_ts = arg_ts->state_ptr;
4450             if (dir_ts && arg_ts->state == TS_DEAD) {
4451                 TCGOp *lop = tcg_op_insert_before(s, op, INDEX_op_ld,
4452                                                   arg_ts->type, 3);
4453 
4454                 lop->args[0] = temp_arg(dir_ts);
4455                 lop->args[1] = temp_arg(arg_ts->mem_base);
4456                 lop->args[2] = arg_ts->mem_offset;
4457 
4458                 /* Loaded, but synced with memory.  */
4459                 arg_ts->state = TS_MEM;
4460             }
4461         }
4462 
4463         /* Perform input replacement, and mark inputs that became dead.
4464            No action is required except keeping temp_state up to date
4465            so that we reload when needed.  */
4466         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4467             arg_ts = arg_temp(op->args[i]);
4468             dir_ts = arg_ts->state_ptr;
4469             if (dir_ts) {
4470                 op->args[i] = temp_arg(dir_ts);
4471                 changes = true;
4472                 if (IS_DEAD_ARG(i)) {
4473                     arg_ts->state = TS_DEAD;
4474                 }
4475             }
4476         }
4477 
4478         /* Liveness analysis should ensure that the following are
4479            all correct, for call sites and basic block end points.  */
4480         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4481             /* Nothing to do */
4482         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4483             for (i = 0; i < nb_globals; ++i) {
4484                 /* Liveness should see that globals are synced back,
4485                    that is, either TS_DEAD or TS_MEM.  */
4486                 arg_ts = &s->temps[i];
4487                 tcg_debug_assert(arg_ts->state_ptr == 0
4488                                  || arg_ts->state != 0);
4489             }
4490         } else {
4491             for (i = 0; i < nb_globals; ++i) {
4492                 /* Liveness should see that globals are saved back,
4493                    that is, TS_DEAD, waiting to be reloaded.  */
4494                 arg_ts = &s->temps[i];
4495                 tcg_debug_assert(arg_ts->state_ptr == 0
4496                                  || arg_ts->state == TS_DEAD);
4497             }
4498         }
4499 
4500         /* Outputs become available.  */
4501         if (opc == INDEX_op_mov) {
4502             arg_ts = arg_temp(op->args[0]);
4503             dir_ts = arg_ts->state_ptr;
4504             if (dir_ts) {
4505                 op->args[0] = temp_arg(dir_ts);
4506                 changes = true;
4507 
4508                 /* The output is now live and modified.  */
4509                 arg_ts->state = 0;
4510 
4511                 if (NEED_SYNC_ARG(0)) {
4512                     TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st,
4513                                                      arg_ts->type, 3);
4514                     TCGTemp *out_ts = dir_ts;
4515 
4516                     if (IS_DEAD_ARG(0)) {
4517                         out_ts = arg_temp(op->args[1]);
4518                         arg_ts->state = TS_DEAD;
4519                         tcg_op_remove(s, op);
4520                     } else {
4521                         arg_ts->state = TS_MEM;
4522                     }
4523 
4524                     sop->args[0] = temp_arg(out_ts);
4525                     sop->args[1] = temp_arg(arg_ts->mem_base);
4526                     sop->args[2] = arg_ts->mem_offset;
4527                 } else {
4528                     tcg_debug_assert(!IS_DEAD_ARG(0));
4529                 }
4530             }
4531         } else {
4532             for (i = 0; i < nb_oargs; i++) {
4533                 arg_ts = arg_temp(op->args[i]);
4534                 dir_ts = arg_ts->state_ptr;
4535                 if (!dir_ts) {
4536                     continue;
4537                 }
4538                 op->args[i] = temp_arg(dir_ts);
4539                 changes = true;
4540 
4541                 /* The output is now live and modified.  */
4542                 arg_ts->state = 0;
4543 
4544                 /* Sync outputs upon their last write.  */
4545                 if (NEED_SYNC_ARG(i)) {
4546                     TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st,
4547                                                      arg_ts->type, 3);
4548 
4549                     sop->args[0] = temp_arg(dir_ts);
4550                     sop->args[1] = temp_arg(arg_ts->mem_base);
4551                     sop->args[2] = arg_ts->mem_offset;
4552 
4553                     arg_ts->state = TS_MEM;
4554                 }
4555                 /* Drop outputs that are dead.  */
4556                 if (IS_DEAD_ARG(i)) {
4557                     arg_ts->state = TS_DEAD;
4558                 }
4559             }
4560         }
4561     }
4562 
4563     return changes;
4564 }
4565 
4566 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4567 {
4568     intptr_t off;
4569     int size, align;
4570 
4571     /* When allocating an object, look at the full type. */
4572     size = tcg_type_size(ts->base_type);
4573     switch (ts->base_type) {
4574     case TCG_TYPE_I32:
4575         align = 4;
4576         break;
4577     case TCG_TYPE_I64:
4578     case TCG_TYPE_V64:
4579         align = 8;
4580         break;
4581     case TCG_TYPE_I128:
4582     case TCG_TYPE_V128:
4583     case TCG_TYPE_V256:
4584         /*
4585          * Note that we do not require aligned storage for V256,
4586          * and that we provide alignment for I128 to match V128,
4587          * even if that's above what the host ABI requires.
4588          */
4589         align = 16;
4590         break;
4591     default:
4592         g_assert_not_reached();
4593     }
4594 
4595     /*
4596      * Assume the stack is sufficiently aligned.
4597      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4598      * and do not require 16 byte vector alignment.  This seems slightly
4599      * easier than fully parameterizing the above switch statement.
4600      */
4601     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4602     off = ROUND_UP(s->current_frame_offset, align);
4603 
4604     /* If we've exhausted the stack frame, restart with a smaller TB. */
4605     if (off + size > s->frame_end) {
4606         tcg_raise_tb_overflow(s);
4607     }
4608     s->current_frame_offset = off + size;
4609 #if defined(__sparc__)
4610     off += TCG_TARGET_STACK_BIAS;
4611 #endif
4612 
4613     /* If the object was subdivided, assign memory to all the parts. */
4614     if (ts->base_type != ts->type) {
4615         int part_size = tcg_type_size(ts->type);
4616         int part_count = size / part_size;
4617 
4618         /*
4619          * Each part is allocated sequentially in tcg_temp_new_internal.
4620          * Jump back to the first part by subtracting the current index.
4621          */
4622         ts -= ts->temp_subindex;
4623         for (int i = 0; i < part_count; ++i) {
4624             ts[i].mem_offset = off + i * part_size;
4625             ts[i].mem_base = s->frame_temp;
4626             ts[i].mem_allocated = 1;
4627         }
4628     } else {
4629         ts->mem_offset = off;
4630         ts->mem_base = s->frame_temp;
4631         ts->mem_allocated = 1;
4632     }
4633 }
4634 
4635 /* Assign @reg to @ts, and update reg_to_temp[]. */
4636 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4637 {
4638     if (ts->val_type == TEMP_VAL_REG) {
4639         TCGReg old = ts->reg;
4640         tcg_debug_assert(s->reg_to_temp[old] == ts);
4641         if (old == reg) {
4642             return;
4643         }
4644         s->reg_to_temp[old] = NULL;
4645     }
4646     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4647     s->reg_to_temp[reg] = ts;
4648     ts->val_type = TEMP_VAL_REG;
4649     ts->reg = reg;
4650 }
4651 
4652 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4653 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4654 {
4655     tcg_debug_assert(type != TEMP_VAL_REG);
4656     if (ts->val_type == TEMP_VAL_REG) {
4657         TCGReg reg = ts->reg;
4658         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4659         s->reg_to_temp[reg] = NULL;
4660     }
4661     ts->val_type = type;
4662 }
4663 
4664 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4665 
4666 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4667    mark it free; otherwise mark it dead.  */
4668 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4669 {
4670     TCGTempVal new_type;
4671 
4672     switch (ts->kind) {
4673     case TEMP_FIXED:
4674         return;
4675     case TEMP_GLOBAL:
4676     case TEMP_TB:
4677         new_type = TEMP_VAL_MEM;
4678         break;
4679     case TEMP_EBB:
4680         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4681         break;
4682     case TEMP_CONST:
4683         new_type = TEMP_VAL_CONST;
4684         break;
4685     default:
4686         g_assert_not_reached();
4687     }
4688     set_temp_val_nonreg(s, ts, new_type);
4689 }
4690 
4691 /* Mark a temporary as dead.  */
4692 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4693 {
4694     temp_free_or_dead(s, ts, 1);
4695 }
4696 
4697 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4698    registers needs to be allocated to store a constant.  If 'free_or_dead'
4699    is non-zero, subsequently release the temporary; if it is positive, the
4700    temp is dead; if it is negative, the temp is free.  */
4701 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4702                       TCGRegSet preferred_regs, int free_or_dead)
4703 {
4704     if (!temp_readonly(ts) && !ts->mem_coherent) {
4705         if (!ts->mem_allocated) {
4706             temp_allocate_frame(s, ts);
4707         }
4708         switch (ts->val_type) {
4709         case TEMP_VAL_CONST:
4710             /* If we're going to free the temp immediately, then we won't
4711                require it later in a register, so attempt to store the
4712                constant to memory directly.  */
4713             if (free_or_dead
4714                 && tcg_out_sti(s, ts->type, ts->val,
4715                                ts->mem_base->reg, ts->mem_offset)) {
4716                 break;
4717             }
4718             temp_load(s, ts, tcg_target_available_regs[ts->type],
4719                       allocated_regs, preferred_regs);
4720             /* fallthrough */
4721 
4722         case TEMP_VAL_REG:
4723             tcg_out_st(s, ts->type, ts->reg,
4724                        ts->mem_base->reg, ts->mem_offset);
4725             break;
4726 
4727         case TEMP_VAL_MEM:
4728             break;
4729 
4730         case TEMP_VAL_DEAD:
4731         default:
4732             g_assert_not_reached();
4733         }
4734         ts->mem_coherent = 1;
4735     }
4736     if (free_or_dead) {
4737         temp_free_or_dead(s, ts, free_or_dead);
4738     }
4739 }
4740 
4741 /* free register 'reg' by spilling the corresponding temporary if necessary */
4742 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4743 {
4744     TCGTemp *ts = s->reg_to_temp[reg];
4745     if (ts != NULL) {
4746         temp_sync(s, ts, allocated_regs, 0, -1);
4747     }
4748 }
4749 
4750 /**
4751  * tcg_reg_alloc:
4752  * @required_regs: Set of registers in which we must allocate.
4753  * @allocated_regs: Set of registers which must be avoided.
4754  * @preferred_regs: Set of registers we should prefer.
4755  * @rev: True if we search the registers in "indirect" order.
4756  *
4757  * The allocated register must be in @required_regs & ~@allocated_regs,
4758  * but if we can put it in @preferred_regs we may save a move later.
4759  */
4760 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4761                             TCGRegSet allocated_regs,
4762                             TCGRegSet preferred_regs, bool rev)
4763 {
4764     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4765     TCGRegSet reg_ct[2];
4766     const int *order;
4767 
4768     reg_ct[1] = required_regs & ~allocated_regs;
4769     tcg_debug_assert(reg_ct[1] != 0);
4770     reg_ct[0] = reg_ct[1] & preferred_regs;
4771 
4772     /* Skip the preferred_regs option if it cannot be satisfied,
4773        or if the preference made no difference.  */
4774     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4775 
4776     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4777 
4778     /* Try free registers, preferences first.  */
4779     for (j = f; j < 2; j++) {
4780         TCGRegSet set = reg_ct[j];
4781 
4782         if (tcg_regset_single(set)) {
4783             /* One register in the set.  */
4784             TCGReg reg = tcg_regset_first(set);
4785             if (s->reg_to_temp[reg] == NULL) {
4786                 return reg;
4787             }
4788         } else {
4789             for (i = 0; i < n; i++) {
4790                 TCGReg reg = order[i];
4791                 if (s->reg_to_temp[reg] == NULL &&
4792                     tcg_regset_test_reg(set, reg)) {
4793                     return reg;
4794                 }
4795             }
4796         }
4797     }
4798 
4799     /* We must spill something.  */
4800     for (j = f; j < 2; j++) {
4801         TCGRegSet set = reg_ct[j];
4802 
4803         if (tcg_regset_single(set)) {
4804             /* One register in the set.  */
4805             TCGReg reg = tcg_regset_first(set);
4806             tcg_reg_free(s, reg, allocated_regs);
4807             return reg;
4808         } else {
4809             for (i = 0; i < n; i++) {
4810                 TCGReg reg = order[i];
4811                 if (tcg_regset_test_reg(set, reg)) {
4812                     tcg_reg_free(s, reg, allocated_regs);
4813                     return reg;
4814                 }
4815             }
4816         }
4817     }
4818 
4819     g_assert_not_reached();
4820 }
4821 
4822 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4823                                  TCGRegSet allocated_regs,
4824                                  TCGRegSet preferred_regs, bool rev)
4825 {
4826     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4827     TCGRegSet reg_ct[2];
4828     const int *order;
4829 
4830     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4831     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4832     tcg_debug_assert(reg_ct[1] != 0);
4833     reg_ct[0] = reg_ct[1] & preferred_regs;
4834 
4835     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4836 
4837     /*
4838      * Skip the preferred_regs option if it cannot be satisfied,
4839      * or if the preference made no difference.
4840      */
4841     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4842 
4843     /*
4844      * Minimize the number of flushes by looking for 2 free registers first,
4845      * then a single flush, then two flushes.
4846      */
4847     for (fmin = 2; fmin >= 0; fmin--) {
4848         for (j = k; j < 2; j++) {
4849             TCGRegSet set = reg_ct[j];
4850 
4851             for (i = 0; i < n; i++) {
4852                 TCGReg reg = order[i];
4853 
4854                 if (tcg_regset_test_reg(set, reg)) {
4855                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4856                     if (f >= fmin) {
4857                         tcg_reg_free(s, reg, allocated_regs);
4858                         tcg_reg_free(s, reg + 1, allocated_regs);
4859                         return reg;
4860                     }
4861                 }
4862             }
4863         }
4864     }
4865     g_assert_not_reached();
4866 }
4867 
4868 /* Make sure the temporary is in a register.  If needed, allocate the register
4869    from DESIRED while avoiding ALLOCATED.  */
4870 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4871                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4872 {
4873     TCGReg reg;
4874 
4875     switch (ts->val_type) {
4876     case TEMP_VAL_REG:
4877         return;
4878     case TEMP_VAL_CONST:
4879         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4880                             preferred_regs, ts->indirect_base);
4881         if (ts->type <= TCG_TYPE_I64) {
4882             tcg_out_movi(s, ts->type, reg, ts->val);
4883         } else {
4884             uint64_t val = ts->val;
4885             MemOp vece = MO_64;
4886 
4887             /*
4888              * Find the minimal vector element that matches the constant.
4889              * The targets will, in general, have to do this search anyway,
4890              * do this generically.
4891              */
4892             if (val == dup_const(MO_8, val)) {
4893                 vece = MO_8;
4894             } else if (val == dup_const(MO_16, val)) {
4895                 vece = MO_16;
4896             } else if (val == dup_const(MO_32, val)) {
4897                 vece = MO_32;
4898             }
4899 
4900             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4901         }
4902         ts->mem_coherent = 0;
4903         break;
4904     case TEMP_VAL_MEM:
4905         if (!ts->mem_allocated) {
4906             temp_allocate_frame(s, ts);
4907         }
4908         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4909                             preferred_regs, ts->indirect_base);
4910         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4911         ts->mem_coherent = 1;
4912         break;
4913     case TEMP_VAL_DEAD:
4914     default:
4915         g_assert_not_reached();
4916     }
4917     set_temp_val_reg(s, ts, reg);
4918 }
4919 
4920 /* Save a temporary to memory. 'allocated_regs' is used in case a
4921    temporary registers needs to be allocated to store a constant.  */
4922 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4923 {
4924     /* The liveness analysis already ensures that globals are back
4925        in memory. Keep an tcg_debug_assert for safety. */
4926     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4927 }
4928 
4929 /* save globals to their canonical location and assume they can be
4930    modified be the following code. 'allocated_regs' is used in case a
4931    temporary registers needs to be allocated to store a constant. */
4932 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4933 {
4934     int i, n;
4935 
4936     for (i = 0, n = s->nb_globals; i < n; i++) {
4937         temp_save(s, &s->temps[i], allocated_regs);
4938     }
4939 }
4940 
4941 /* sync globals to their canonical location and assume they can be
4942    read by the following code. 'allocated_regs' is used in case a
4943    temporary registers needs to be allocated to store a constant. */
4944 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4945 {
4946     int i, n;
4947 
4948     for (i = 0, n = s->nb_globals; i < n; i++) {
4949         TCGTemp *ts = &s->temps[i];
4950         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4951                          || ts->kind == TEMP_FIXED
4952                          || ts->mem_coherent);
4953     }
4954 }
4955 
4956 /* at the end of a basic block, we assume all temporaries are dead and
4957    all globals are stored at their canonical location. */
4958 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4959 {
4960     assert_carry_dead(s);
4961     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4962         TCGTemp *ts = &s->temps[i];
4963 
4964         switch (ts->kind) {
4965         case TEMP_TB:
4966             temp_save(s, ts, allocated_regs);
4967             break;
4968         case TEMP_EBB:
4969             /* The liveness analysis already ensures that temps are dead.
4970                Keep an tcg_debug_assert for safety. */
4971             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4972             break;
4973         case TEMP_CONST:
4974             /* Similarly, we should have freed any allocated register. */
4975             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4976             break;
4977         default:
4978             g_assert_not_reached();
4979         }
4980     }
4981 
4982     save_globals(s, allocated_regs);
4983 }
4984 
4985 /*
4986  * At a conditional branch, we assume all temporaries are dead unless
4987  * explicitly live-across-conditional-branch; all globals and local
4988  * temps are synced to their location.
4989  */
4990 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4991 {
4992     assert_carry_dead(s);
4993     sync_globals(s, allocated_regs);
4994 
4995     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4996         TCGTemp *ts = &s->temps[i];
4997         /*
4998          * The liveness analysis already ensures that temps are dead.
4999          * Keep tcg_debug_asserts for safety.
5000          */
5001         switch (ts->kind) {
5002         case TEMP_TB:
5003             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
5004             break;
5005         case TEMP_EBB:
5006         case TEMP_CONST:
5007             break;
5008         default:
5009             g_assert_not_reached();
5010         }
5011     }
5012 }
5013 
5014 /*
5015  * Specialized code generation for INDEX_op_mov_* with a constant.
5016  */
5017 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
5018                                   tcg_target_ulong val, TCGLifeData arg_life,
5019                                   TCGRegSet preferred_regs)
5020 {
5021     /* ENV should not be modified.  */
5022     tcg_debug_assert(!temp_readonly(ots));
5023 
5024     /* The movi is not explicitly generated here.  */
5025     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
5026     ots->val = val;
5027     ots->mem_coherent = 0;
5028     if (NEED_SYNC_ARG(0)) {
5029         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
5030     } else if (IS_DEAD_ARG(0)) {
5031         temp_dead(s, ots);
5032     }
5033 }
5034 
5035 /*
5036  * Specialized code generation for INDEX_op_mov_*.
5037  */
5038 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
5039 {
5040     const TCGLifeData arg_life = op->life;
5041     TCGRegSet allocated_regs, preferred_regs;
5042     TCGTemp *ts, *ots;
5043     TCGType otype, itype;
5044     TCGReg oreg, ireg;
5045 
5046     allocated_regs = s->reserved_regs;
5047     preferred_regs = output_pref(op, 0);
5048     ots = arg_temp(op->args[0]);
5049     ts = arg_temp(op->args[1]);
5050 
5051     /* ENV should not be modified.  */
5052     tcg_debug_assert(!temp_readonly(ots));
5053 
5054     /* Note that otype != itype for no-op truncation.  */
5055     otype = ots->type;
5056     itype = ts->type;
5057 
5058     if (ts->val_type == TEMP_VAL_CONST) {
5059         /* propagate constant or generate sti */
5060         tcg_target_ulong val = ts->val;
5061         if (IS_DEAD_ARG(1)) {
5062             temp_dead(s, ts);
5063         }
5064         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
5065         return;
5066     }
5067 
5068     /* If the source value is in memory we're going to be forced
5069        to have it in a register in order to perform the copy.  Copy
5070        the SOURCE value into its own register first, that way we
5071        don't have to reload SOURCE the next time it is used. */
5072     if (ts->val_type == TEMP_VAL_MEM) {
5073         temp_load(s, ts, tcg_target_available_regs[itype],
5074                   allocated_regs, preferred_regs);
5075     }
5076     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
5077     ireg = ts->reg;
5078 
5079     if (IS_DEAD_ARG(0)) {
5080         /* mov to a non-saved dead register makes no sense (even with
5081            liveness analysis disabled). */
5082         tcg_debug_assert(NEED_SYNC_ARG(0));
5083         if (!ots->mem_allocated) {
5084             temp_allocate_frame(s, ots);
5085         }
5086         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
5087         if (IS_DEAD_ARG(1)) {
5088             temp_dead(s, ts);
5089         }
5090         temp_dead(s, ots);
5091         return;
5092     }
5093 
5094     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
5095         /*
5096          * The mov can be suppressed.  Kill input first, so that it
5097          * is unlinked from reg_to_temp, then set the output to the
5098          * reg that we saved from the input.
5099          */
5100         temp_dead(s, ts);
5101         oreg = ireg;
5102     } else {
5103         if (ots->val_type == TEMP_VAL_REG) {
5104             oreg = ots->reg;
5105         } else {
5106             /* Make sure to not spill the input register during allocation. */
5107             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
5108                                  allocated_regs | ((TCGRegSet)1 << ireg),
5109                                  preferred_regs, ots->indirect_base);
5110         }
5111         if (!tcg_out_mov(s, otype, oreg, ireg)) {
5112             /*
5113              * Cross register class move not supported.
5114              * Store the source register into the destination slot
5115              * and leave the destination temp as TEMP_VAL_MEM.
5116              */
5117             assert(!temp_readonly(ots));
5118             if (!ts->mem_allocated) {
5119                 temp_allocate_frame(s, ots);
5120             }
5121             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
5122             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
5123             ots->mem_coherent = 1;
5124             return;
5125         }
5126     }
5127     set_temp_val_reg(s, ots, oreg);
5128     ots->mem_coherent = 0;
5129 
5130     if (NEED_SYNC_ARG(0)) {
5131         temp_sync(s, ots, allocated_regs, 0, 0);
5132     }
5133 }
5134 
5135 /*
5136  * Specialized code generation for INDEX_op_dup_vec.
5137  */
5138 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
5139 {
5140     const TCGLifeData arg_life = op->life;
5141     TCGRegSet dup_out_regs, dup_in_regs;
5142     const TCGArgConstraint *dup_args_ct;
5143     TCGTemp *its, *ots;
5144     TCGType itype, vtype;
5145     unsigned vece;
5146     int lowpart_ofs;
5147     bool ok;
5148 
5149     ots = arg_temp(op->args[0]);
5150     its = arg_temp(op->args[1]);
5151 
5152     /* ENV should not be modified.  */
5153     tcg_debug_assert(!temp_readonly(ots));
5154 
5155     itype = its->type;
5156     vece = TCGOP_VECE(op);
5157     vtype = TCGOP_TYPE(op);
5158 
5159     if (its->val_type == TEMP_VAL_CONST) {
5160         /* Propagate constant via movi -> dupi.  */
5161         tcg_target_ulong val = dup_const(vece, its->val);
5162         if (IS_DEAD_ARG(1)) {
5163             temp_dead(s, its);
5164         }
5165         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
5166         return;
5167     }
5168 
5169     dup_args_ct = opcode_args_ct(op);
5170     dup_out_regs = dup_args_ct[0].regs;
5171     dup_in_regs = dup_args_ct[1].regs;
5172 
5173     /* Allocate the output register now.  */
5174     if (ots->val_type != TEMP_VAL_REG) {
5175         TCGRegSet allocated_regs = s->reserved_regs;
5176         TCGReg oreg;
5177 
5178         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
5179             /* Make sure to not spill the input register. */
5180             tcg_regset_set_reg(allocated_regs, its->reg);
5181         }
5182         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5183                              output_pref(op, 0), ots->indirect_base);
5184         set_temp_val_reg(s, ots, oreg);
5185     }
5186 
5187     switch (its->val_type) {
5188     case TEMP_VAL_REG:
5189         /*
5190          * The dup constriaints must be broad, covering all possible VECE.
5191          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5192          * to fail, indicating that extra moves are required for that case.
5193          */
5194         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5195             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5196                 goto done;
5197             }
5198             /* Try again from memory or a vector input register.  */
5199         }
5200         if (!its->mem_coherent) {
5201             /*
5202              * The input register is not synced, and so an extra store
5203              * would be required to use memory.  Attempt an integer-vector
5204              * register move first.  We do not have a TCGRegSet for this.
5205              */
5206             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5207                 break;
5208             }
5209             /* Sync the temp back to its slot and load from there.  */
5210             temp_sync(s, its, s->reserved_regs, 0, 0);
5211         }
5212         /* fall through */
5213 
5214     case TEMP_VAL_MEM:
5215         lowpart_ofs = 0;
5216         if (HOST_BIG_ENDIAN) {
5217             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5218         }
5219         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5220                              its->mem_offset + lowpart_ofs)) {
5221             goto done;
5222         }
5223         /* Load the input into the destination vector register. */
5224         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5225         break;
5226 
5227     default:
5228         g_assert_not_reached();
5229     }
5230 
5231     /* We now have a vector input register, so dup must succeed. */
5232     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5233     tcg_debug_assert(ok);
5234 
5235  done:
5236     ots->mem_coherent = 0;
5237     if (IS_DEAD_ARG(1)) {
5238         temp_dead(s, its);
5239     }
5240     if (NEED_SYNC_ARG(0)) {
5241         temp_sync(s, ots, s->reserved_regs, 0, 0);
5242     }
5243     if (IS_DEAD_ARG(0)) {
5244         temp_dead(s, ots);
5245     }
5246 }
5247 
5248 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5249 {
5250     const TCGLifeData arg_life = op->life;
5251     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5252     TCGRegSet i_allocated_regs;
5253     TCGRegSet o_allocated_regs;
5254     int i, k, nb_iargs, nb_oargs;
5255     TCGReg reg;
5256     TCGArg arg;
5257     const TCGArgConstraint *args_ct;
5258     const TCGArgConstraint *arg_ct;
5259     TCGTemp *ts;
5260     TCGArg new_args[TCG_MAX_OP_ARGS];
5261     int const_args[TCG_MAX_OP_ARGS];
5262     TCGCond op_cond;
5263 
5264     if (def->flags & TCG_OPF_CARRY_IN) {
5265         tcg_debug_assert(s->carry_live);
5266     }
5267 
5268     nb_oargs = def->nb_oargs;
5269     nb_iargs = def->nb_iargs;
5270 
5271     /* copy constants */
5272     memcpy(new_args + nb_oargs + nb_iargs,
5273            op->args + nb_oargs + nb_iargs,
5274            sizeof(TCGArg) * def->nb_cargs);
5275 
5276     i_allocated_regs = s->reserved_regs;
5277     o_allocated_regs = s->reserved_regs;
5278 
5279     switch (op->opc) {
5280     case INDEX_op_brcond:
5281         op_cond = op->args[2];
5282         break;
5283     case INDEX_op_setcond:
5284     case INDEX_op_negsetcond:
5285     case INDEX_op_cmp_vec:
5286         op_cond = op->args[3];
5287         break;
5288     case INDEX_op_brcond2_i32:
5289         op_cond = op->args[4];
5290         break;
5291     case INDEX_op_movcond:
5292     case INDEX_op_setcond2_i32:
5293     case INDEX_op_cmpsel_vec:
5294         op_cond = op->args[5];
5295         break;
5296     default:
5297         /* No condition within opcode. */
5298         op_cond = TCG_COND_ALWAYS;
5299         break;
5300     }
5301 
5302     args_ct = opcode_args_ct(op);
5303 
5304     /* satisfy input constraints */
5305     for (k = 0; k < nb_iargs; k++) {
5306         TCGRegSet i_preferred_regs, i_required_regs;
5307         bool allocate_new_reg, copyto_new_reg;
5308         TCGTemp *ts2;
5309         int i1, i2;
5310 
5311         i = args_ct[nb_oargs + k].sort_index;
5312         arg = op->args[i];
5313         arg_ct = &args_ct[i];
5314         ts = arg_temp(arg);
5315 
5316         if (ts->val_type == TEMP_VAL_CONST) {
5317 #ifdef TCG_REG_ZERO
5318             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5319                 /* Hardware zero register: indicate register via non-const. */
5320                 const_args[i] = 0;
5321                 new_args[i] = TCG_REG_ZERO;
5322                 continue;
5323             }
5324 #endif
5325 
5326             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5327                                        op_cond, TCGOP_VECE(op))) {
5328                 /* constant is OK for instruction */
5329                 const_args[i] = 1;
5330                 new_args[i] = ts->val;
5331                 continue;
5332             }
5333         }
5334 
5335         reg = ts->reg;
5336         i_preferred_regs = 0;
5337         i_required_regs = arg_ct->regs;
5338         allocate_new_reg = false;
5339         copyto_new_reg = false;
5340 
5341         switch (arg_ct->pair) {
5342         case 0: /* not paired */
5343             if (arg_ct->ialias) {
5344                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5345 
5346                 /*
5347                  * If the input is readonly, then it cannot also be an
5348                  * output and aliased to itself.  If the input is not
5349                  * dead after the instruction, we must allocate a new
5350                  * register and move it.
5351                  */
5352                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5353                     || args_ct[arg_ct->alias_index].newreg) {
5354                     allocate_new_reg = true;
5355                 } else if (ts->val_type == TEMP_VAL_REG) {
5356                     /*
5357                      * Check if the current register has already been
5358                      * allocated for another input.
5359                      */
5360                     allocate_new_reg =
5361                         tcg_regset_test_reg(i_allocated_regs, reg);
5362                 }
5363             }
5364             if (!allocate_new_reg) {
5365                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5366                           i_preferred_regs);
5367                 reg = ts->reg;
5368                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5369             }
5370             if (allocate_new_reg) {
5371                 /*
5372                  * Allocate a new register matching the constraint
5373                  * and move the temporary register into it.
5374                  */
5375                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5376                           i_allocated_regs, 0);
5377                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5378                                     i_preferred_regs, ts->indirect_base);
5379                 copyto_new_reg = true;
5380             }
5381             break;
5382 
5383         case 1:
5384             /* First of an input pair; if i1 == i2, the second is an output. */
5385             i1 = i;
5386             i2 = arg_ct->pair_index;
5387             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5388 
5389             /*
5390              * It is easier to default to allocating a new pair
5391              * and to identify a few cases where it's not required.
5392              */
5393             if (arg_ct->ialias) {
5394                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5395                 if (IS_DEAD_ARG(i1) &&
5396                     IS_DEAD_ARG(i2) &&
5397                     !temp_readonly(ts) &&
5398                     ts->val_type == TEMP_VAL_REG &&
5399                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5400                     tcg_regset_test_reg(i_required_regs, reg) &&
5401                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5402                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5403                     (ts2
5404                      ? ts2->val_type == TEMP_VAL_REG &&
5405                        ts2->reg == reg + 1 &&
5406                        !temp_readonly(ts2)
5407                      : s->reg_to_temp[reg + 1] == NULL)) {
5408                     break;
5409                 }
5410             } else {
5411                 /* Without aliasing, the pair must also be an input. */
5412                 tcg_debug_assert(ts2);
5413                 if (ts->val_type == TEMP_VAL_REG &&
5414                     ts2->val_type == TEMP_VAL_REG &&
5415                     ts2->reg == reg + 1 &&
5416                     tcg_regset_test_reg(i_required_regs, reg)) {
5417                     break;
5418                 }
5419             }
5420             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5421                                      0, ts->indirect_base);
5422             goto do_pair;
5423 
5424         case 2: /* pair second */
5425             reg = new_args[arg_ct->pair_index] + 1;
5426             goto do_pair;
5427 
5428         case 3: /* ialias with second output, no first input */
5429             tcg_debug_assert(arg_ct->ialias);
5430             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5431 
5432             if (IS_DEAD_ARG(i) &&
5433                 !temp_readonly(ts) &&
5434                 ts->val_type == TEMP_VAL_REG &&
5435                 reg > 0 &&
5436                 s->reg_to_temp[reg - 1] == NULL &&
5437                 tcg_regset_test_reg(i_required_regs, reg) &&
5438                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5439                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5440                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5441                 break;
5442             }
5443             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5444                                      i_allocated_regs, 0,
5445                                      ts->indirect_base);
5446             tcg_regset_set_reg(i_allocated_regs, reg);
5447             reg += 1;
5448             goto do_pair;
5449 
5450         do_pair:
5451             /*
5452              * If an aliased input is not dead after the instruction,
5453              * we must allocate a new register and move it.
5454              */
5455             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5456                 TCGRegSet t_allocated_regs = i_allocated_regs;
5457 
5458                 /*
5459                  * Because of the alias, and the continued life, make sure
5460                  * that the temp is somewhere *other* than the reg pair,
5461                  * and we get a copy in reg.
5462                  */
5463                 tcg_regset_set_reg(t_allocated_regs, reg);
5464                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5465                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5466                     /* If ts was already in reg, copy it somewhere else. */
5467                     TCGReg nr;
5468                     bool ok;
5469 
5470                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5471                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5472                                        t_allocated_regs, 0, ts->indirect_base);
5473                     ok = tcg_out_mov(s, ts->type, nr, reg);
5474                     tcg_debug_assert(ok);
5475 
5476                     set_temp_val_reg(s, ts, nr);
5477                 } else {
5478                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5479                               t_allocated_regs, 0);
5480                     copyto_new_reg = true;
5481                 }
5482             } else {
5483                 /* Preferably allocate to reg, otherwise copy. */
5484                 i_required_regs = (TCGRegSet)1 << reg;
5485                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5486                           i_preferred_regs);
5487                 copyto_new_reg = ts->reg != reg;
5488             }
5489             break;
5490 
5491         default:
5492             g_assert_not_reached();
5493         }
5494 
5495         if (copyto_new_reg) {
5496             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5497                 /*
5498                  * Cross register class move not supported.  Sync the
5499                  * temp back to its slot and load from there.
5500                  */
5501                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5502                 tcg_out_ld(s, ts->type, reg,
5503                            ts->mem_base->reg, ts->mem_offset);
5504             }
5505         }
5506         new_args[i] = reg;
5507         const_args[i] = 0;
5508         tcg_regset_set_reg(i_allocated_regs, reg);
5509     }
5510 
5511     /* mark dead temporaries and free the associated registers */
5512     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5513         if (IS_DEAD_ARG(i)) {
5514             temp_dead(s, arg_temp(op->args[i]));
5515         }
5516     }
5517 
5518     if (def->flags & TCG_OPF_COND_BRANCH) {
5519         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5520     } else if (def->flags & TCG_OPF_BB_END) {
5521         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5522     } else {
5523         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5524             assert_carry_dead(s);
5525             /* XXX: permit generic clobber register list ? */
5526             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5527                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5528                     tcg_reg_free(s, i, i_allocated_regs);
5529                 }
5530             }
5531         }
5532         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5533             /* sync globals if the op has side effects and might trigger
5534                an exception. */
5535             sync_globals(s, i_allocated_regs);
5536         }
5537 
5538         /* satisfy the output constraints */
5539         for (k = 0; k < nb_oargs; k++) {
5540             i = args_ct[k].sort_index;
5541             arg = op->args[i];
5542             arg_ct = &args_ct[i];
5543             ts = arg_temp(arg);
5544 
5545             /* ENV should not be modified.  */
5546             tcg_debug_assert(!temp_readonly(ts));
5547 
5548             switch (arg_ct->pair) {
5549             case 0: /* not paired */
5550                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5551                     reg = new_args[arg_ct->alias_index];
5552                 } else if (arg_ct->newreg) {
5553                     reg = tcg_reg_alloc(s, arg_ct->regs,
5554                                         i_allocated_regs | o_allocated_regs,
5555                                         output_pref(op, k), ts->indirect_base);
5556                 } else {
5557                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5558                                         output_pref(op, k), ts->indirect_base);
5559                 }
5560                 break;
5561 
5562             case 1: /* first of pair */
5563                 if (arg_ct->oalias) {
5564                     reg = new_args[arg_ct->alias_index];
5565                 } else if (arg_ct->newreg) {
5566                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5567                                              i_allocated_regs | o_allocated_regs,
5568                                              output_pref(op, k),
5569                                              ts->indirect_base);
5570                 } else {
5571                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5572                                              output_pref(op, k),
5573                                              ts->indirect_base);
5574                 }
5575                 break;
5576 
5577             case 2: /* second of pair */
5578                 if (arg_ct->oalias) {
5579                     reg = new_args[arg_ct->alias_index];
5580                 } else {
5581                     reg = new_args[arg_ct->pair_index] + 1;
5582                 }
5583                 break;
5584 
5585             case 3: /* first of pair, aliasing with a second input */
5586                 tcg_debug_assert(!arg_ct->newreg);
5587                 reg = new_args[arg_ct->pair_index] - 1;
5588                 break;
5589 
5590             default:
5591                 g_assert_not_reached();
5592             }
5593             tcg_regset_set_reg(o_allocated_regs, reg);
5594             set_temp_val_reg(s, ts, reg);
5595             ts->mem_coherent = 0;
5596             new_args[i] = reg;
5597         }
5598     }
5599 
5600     /* emit instruction */
5601     TCGType type = TCGOP_TYPE(op);
5602     switch (op->opc) {
5603     case INDEX_op_addc1o:
5604         tcg_out_set_carry(s);
5605         /* fall through */
5606     case INDEX_op_add:
5607     case INDEX_op_addcio:
5608     case INDEX_op_addco:
5609     case INDEX_op_and:
5610     case INDEX_op_andc:
5611     case INDEX_op_clz:
5612     case INDEX_op_ctz:
5613     case INDEX_op_divs:
5614     case INDEX_op_divu:
5615     case INDEX_op_eqv:
5616     case INDEX_op_mul:
5617     case INDEX_op_mulsh:
5618     case INDEX_op_muluh:
5619     case INDEX_op_nand:
5620     case INDEX_op_nor:
5621     case INDEX_op_or:
5622     case INDEX_op_orc:
5623     case INDEX_op_rems:
5624     case INDEX_op_remu:
5625     case INDEX_op_rotl:
5626     case INDEX_op_rotr:
5627     case INDEX_op_sar:
5628     case INDEX_op_shl:
5629     case INDEX_op_shr:
5630     case INDEX_op_xor:
5631         {
5632             const TCGOutOpBinary *out =
5633                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5634 
5635             /* Constants should never appear in the first source operand. */
5636             tcg_debug_assert(!const_args[1]);
5637             if (const_args[2]) {
5638                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5639             } else {
5640                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5641             }
5642         }
5643         break;
5644 
5645     case INDEX_op_sub:
5646         {
5647             const TCGOutOpSubtract *out = &outop_sub;
5648 
5649             /*
5650              * Constants should never appear in the second source operand.
5651              * These are folded to add with negative constant.
5652              */
5653             tcg_debug_assert(!const_args[2]);
5654             if (const_args[1]) {
5655                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5656             } else {
5657                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5658             }
5659         }
5660         break;
5661 
5662     case INDEX_op_subb1o:
5663         tcg_out_set_borrow(s);
5664         /* fall through */
5665     case INDEX_op_addci:
5666     case INDEX_op_subbi:
5667     case INDEX_op_subbio:
5668     case INDEX_op_subbo:
5669         {
5670             const TCGOutOpAddSubCarry *out =
5671                 container_of(all_outop[op->opc], TCGOutOpAddSubCarry, base);
5672 
5673             if (const_args[2]) {
5674                 if (const_args[1]) {
5675                     out->out_rii(s, type, new_args[0],
5676                                  new_args[1], new_args[2]);
5677                 } else {
5678                     out->out_rri(s, type, new_args[0],
5679                                  new_args[1], new_args[2]);
5680                 }
5681             } else if (const_args[1]) {
5682                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5683             } else {
5684                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5685             }
5686         }
5687         break;
5688 
5689     case INDEX_op_bswap64:
5690     case INDEX_op_ext_i32_i64:
5691     case INDEX_op_extu_i32_i64:
5692     case INDEX_op_extrl_i64_i32:
5693     case INDEX_op_extrh_i64_i32:
5694         assert(TCG_TARGET_REG_BITS == 64);
5695         /* fall through */
5696     case INDEX_op_ctpop:
5697     case INDEX_op_neg:
5698     case INDEX_op_not:
5699         {
5700             const TCGOutOpUnary *out =
5701                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5702 
5703             /* Constants should have been folded. */
5704             tcg_debug_assert(!const_args[1]);
5705             out->out_rr(s, type, new_args[0], new_args[1]);
5706         }
5707         break;
5708 
5709     case INDEX_op_bswap16:
5710     case INDEX_op_bswap32:
5711         {
5712             const TCGOutOpBswap *out =
5713                 container_of(all_outop[op->opc], TCGOutOpBswap, base);
5714 
5715             tcg_debug_assert(!const_args[1]);
5716             out->out_rr(s, type, new_args[0], new_args[1], new_args[2]);
5717         }
5718         break;
5719 
5720     case INDEX_op_deposit:
5721         {
5722             const TCGOutOpDeposit *out = &outop_deposit;
5723 
5724             if (const_args[2]) {
5725                 tcg_debug_assert(!const_args[1]);
5726                 out->out_rri(s, type, new_args[0], new_args[1],
5727                              new_args[2], new_args[3], new_args[4]);
5728             } else if (const_args[1]) {
5729                 tcg_debug_assert(new_args[1] == 0);
5730                 tcg_debug_assert(!const_args[2]);
5731                 out->out_rzr(s, type, new_args[0], new_args[2],
5732                              new_args[3], new_args[4]);
5733             } else {
5734                 out->out_rrr(s, type, new_args[0], new_args[1],
5735                              new_args[2], new_args[3], new_args[4]);
5736             }
5737         }
5738         break;
5739 
5740     case INDEX_op_divs2:
5741     case INDEX_op_divu2:
5742         {
5743             const TCGOutOpDivRem *out =
5744                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5745 
5746             /* Only used by x86 and s390x, which use matching constraints. */
5747             tcg_debug_assert(new_args[0] == new_args[2]);
5748             tcg_debug_assert(new_args[1] == new_args[3]);
5749             tcg_debug_assert(!const_args[4]);
5750             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5751         }
5752         break;
5753 
5754     case INDEX_op_extract:
5755     case INDEX_op_sextract:
5756         {
5757             const TCGOutOpExtract *out =
5758                 container_of(all_outop[op->opc], TCGOutOpExtract, base);
5759 
5760             tcg_debug_assert(!const_args[1]);
5761             out->out_rr(s, type, new_args[0], new_args[1],
5762                         new_args[2], new_args[3]);
5763         }
5764         break;
5765 
5766     case INDEX_op_extract2:
5767         {
5768             const TCGOutOpExtract2 *out = &outop_extract2;
5769 
5770             tcg_debug_assert(!const_args[1]);
5771             tcg_debug_assert(!const_args[2]);
5772             out->out_rrr(s, type, new_args[0], new_args[1],
5773                          new_args[2], new_args[3]);
5774         }
5775         break;
5776 
5777     case INDEX_op_ld8u:
5778     case INDEX_op_ld8s:
5779     case INDEX_op_ld16u:
5780     case INDEX_op_ld16s:
5781     case INDEX_op_ld32u:
5782     case INDEX_op_ld32s:
5783     case INDEX_op_ld:
5784         {
5785             const TCGOutOpLoad *out =
5786                 container_of(all_outop[op->opc], TCGOutOpLoad, base);
5787 
5788             tcg_debug_assert(!const_args[1]);
5789             out->out(s, type, new_args[0], new_args[1], new_args[2]);
5790         }
5791         break;
5792 
5793     case INDEX_op_muls2:
5794     case INDEX_op_mulu2:
5795         {
5796             const TCGOutOpMul2 *out =
5797                 container_of(all_outop[op->opc], TCGOutOpMul2, base);
5798 
5799             tcg_debug_assert(!const_args[2]);
5800             tcg_debug_assert(!const_args[3]);
5801             out->out_rrrr(s, type, new_args[0], new_args[1],
5802                           new_args[2], new_args[3]);
5803         }
5804         break;
5805 
5806     case INDEX_op_st32:
5807         /* Use tcg_op_st w/ I32. */
5808         type = TCG_TYPE_I32;
5809         /* fall through */
5810     case INDEX_op_st:
5811     case INDEX_op_st8:
5812     case INDEX_op_st16:
5813         {
5814             const TCGOutOpStore *out =
5815                 container_of(all_outop[op->opc], TCGOutOpStore, base);
5816 
5817             if (const_args[0]) {
5818                 out->out_i(s, type, new_args[0], new_args[1], new_args[2]);
5819             } else {
5820                 out->out_r(s, type, new_args[0], new_args[1], new_args[2]);
5821             }
5822         }
5823         break;
5824 
5825     case INDEX_op_qemu_ld:
5826     case INDEX_op_qemu_st:
5827         {
5828             const TCGOutOpQemuLdSt *out =
5829                 container_of(all_outop[op->opc], TCGOutOpQemuLdSt, base);
5830 
5831             out->out(s, type, new_args[0], new_args[1], new_args[2]);
5832         }
5833         break;
5834 
5835     case INDEX_op_qemu_ld2:
5836     case INDEX_op_qemu_st2:
5837         {
5838             const TCGOutOpQemuLdSt2 *out =
5839                 container_of(all_outop[op->opc], TCGOutOpQemuLdSt2, base);
5840 
5841             out->out(s, type, new_args[0], new_args[1],
5842                      new_args[2], new_args[3]);
5843         }
5844         break;
5845 
5846     case INDEX_op_brcond:
5847         {
5848             const TCGOutOpBrcond *out = &outop_brcond;
5849             TCGCond cond = new_args[2];
5850             TCGLabel *label = arg_label(new_args[3]);
5851 
5852             tcg_debug_assert(!const_args[0]);
5853             if (const_args[1]) {
5854                 out->out_ri(s, type, cond, new_args[0], new_args[1], label);
5855             } else {
5856                 out->out_rr(s, type, cond, new_args[0], new_args[1], label);
5857             }
5858         }
5859         break;
5860 
5861     case INDEX_op_movcond:
5862         {
5863             const TCGOutOpMovcond *out = &outop_movcond;
5864             TCGCond cond = new_args[5];
5865 
5866             tcg_debug_assert(!const_args[1]);
5867             out->out(s, type, cond, new_args[0],
5868                      new_args[1], new_args[2], const_args[2],
5869                      new_args[3], const_args[3],
5870                      new_args[4], const_args[4]);
5871         }
5872         break;
5873 
5874     case INDEX_op_setcond:
5875     case INDEX_op_negsetcond:
5876         {
5877             const TCGOutOpSetcond *out =
5878                 container_of(all_outop[op->opc], TCGOutOpSetcond, base);
5879             TCGCond cond = new_args[3];
5880 
5881             tcg_debug_assert(!const_args[1]);
5882             if (const_args[2]) {
5883                 out->out_rri(s, type, cond,
5884                              new_args[0], new_args[1], new_args[2]);
5885             } else {
5886                 out->out_rrr(s, type, cond,
5887                              new_args[0], new_args[1], new_args[2]);
5888             }
5889         }
5890         break;
5891 
5892 #if TCG_TARGET_REG_BITS == 32
5893     case INDEX_op_brcond2_i32:
5894         {
5895             const TCGOutOpBrcond2 *out = &outop_brcond2;
5896             TCGCond cond = new_args[4];
5897             TCGLabel *label = arg_label(new_args[5]);
5898 
5899             tcg_debug_assert(!const_args[0]);
5900             tcg_debug_assert(!const_args[1]);
5901             out->out(s, cond, new_args[0], new_args[1],
5902                      new_args[2], const_args[2],
5903                      new_args[3], const_args[3], label);
5904         }
5905         break;
5906     case INDEX_op_setcond2_i32:
5907         {
5908             const TCGOutOpSetcond2 *out = &outop_setcond2;
5909             TCGCond cond = new_args[5];
5910 
5911             tcg_debug_assert(!const_args[1]);
5912             tcg_debug_assert(!const_args[2]);
5913             out->out(s, cond, new_args[0], new_args[1], new_args[2],
5914                      new_args[3], const_args[3], new_args[4], const_args[4]);
5915         }
5916         break;
5917 #else
5918     case INDEX_op_brcond2_i32:
5919     case INDEX_op_setcond2_i32:
5920         g_assert_not_reached();
5921 #endif
5922 
5923     case INDEX_op_goto_ptr:
5924         tcg_debug_assert(!const_args[0]);
5925         tcg_out_goto_ptr(s, new_args[0]);
5926         break;
5927 
5928     default:
5929         tcg_debug_assert(def->flags & TCG_OPF_VECTOR);
5930         tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5931                        TCGOP_VECE(op), new_args, const_args);
5932         break;
5933     }
5934 
5935     if (def->flags & TCG_OPF_CARRY_IN) {
5936         s->carry_live = false;
5937     }
5938     if (def->flags & TCG_OPF_CARRY_OUT) {
5939         s->carry_live = true;
5940     }
5941 
5942     /* move the outputs in the correct register if needed */
5943     for(i = 0; i < nb_oargs; i++) {
5944         ts = arg_temp(op->args[i]);
5945 
5946         /* ENV should not be modified.  */
5947         tcg_debug_assert(!temp_readonly(ts));
5948 
5949         if (NEED_SYNC_ARG(i)) {
5950             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5951         } else if (IS_DEAD_ARG(i)) {
5952             temp_dead(s, ts);
5953         }
5954     }
5955 }
5956 
5957 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5958 {
5959     const TCGLifeData arg_life = op->life;
5960     TCGTemp *ots, *itsl, *itsh;
5961     TCGType vtype = TCGOP_TYPE(op);
5962 
5963     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5964     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5965     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5966 
5967     ots = arg_temp(op->args[0]);
5968     itsl = arg_temp(op->args[1]);
5969     itsh = arg_temp(op->args[2]);
5970 
5971     /* ENV should not be modified.  */
5972     tcg_debug_assert(!temp_readonly(ots));
5973 
5974     /* Allocate the output register now.  */
5975     if (ots->val_type != TEMP_VAL_REG) {
5976         TCGRegSet allocated_regs = s->reserved_regs;
5977         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5978         TCGReg oreg;
5979 
5980         /* Make sure to not spill the input registers. */
5981         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5982             tcg_regset_set_reg(allocated_regs, itsl->reg);
5983         }
5984         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5985             tcg_regset_set_reg(allocated_regs, itsh->reg);
5986         }
5987 
5988         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5989                              output_pref(op, 0), ots->indirect_base);
5990         set_temp_val_reg(s, ots, oreg);
5991     }
5992 
5993     /* Promote dup2 of immediates to dupi_vec. */
5994     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5995         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5996         MemOp vece = MO_64;
5997 
5998         if (val == dup_const(MO_8, val)) {
5999             vece = MO_8;
6000         } else if (val == dup_const(MO_16, val)) {
6001             vece = MO_16;
6002         } else if (val == dup_const(MO_32, val)) {
6003             vece = MO_32;
6004         }
6005 
6006         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
6007         goto done;
6008     }
6009 
6010     /* If the two inputs form one 64-bit value, try dupm_vec. */
6011     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
6012         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
6013         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
6014         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
6015 
6016         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
6017         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
6018 
6019         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
6020                              its->mem_base->reg, its->mem_offset)) {
6021             goto done;
6022         }
6023     }
6024 
6025     /* Fall back to generic expansion. */
6026     return false;
6027 
6028  done:
6029     ots->mem_coherent = 0;
6030     if (IS_DEAD_ARG(1)) {
6031         temp_dead(s, itsl);
6032     }
6033     if (IS_DEAD_ARG(2)) {
6034         temp_dead(s, itsh);
6035     }
6036     if (NEED_SYNC_ARG(0)) {
6037         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
6038     } else if (IS_DEAD_ARG(0)) {
6039         temp_dead(s, ots);
6040     }
6041     return true;
6042 }
6043 
6044 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
6045                          TCGRegSet allocated_regs)
6046 {
6047     if (ts->val_type == TEMP_VAL_REG) {
6048         if (ts->reg != reg) {
6049             tcg_reg_free(s, reg, allocated_regs);
6050             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
6051                 /*
6052                  * Cross register class move not supported.  Sync the
6053                  * temp back to its slot and load from there.
6054                  */
6055                 temp_sync(s, ts, allocated_regs, 0, 0);
6056                 tcg_out_ld(s, ts->type, reg,
6057                            ts->mem_base->reg, ts->mem_offset);
6058             }
6059         }
6060     } else {
6061         TCGRegSet arg_set = 0;
6062 
6063         tcg_reg_free(s, reg, allocated_regs);
6064         tcg_regset_set_reg(arg_set, reg);
6065         temp_load(s, ts, arg_set, allocated_regs, 0);
6066     }
6067 }
6068 
6069 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
6070                          TCGRegSet allocated_regs)
6071 {
6072     /*
6073      * When the destination is on the stack, load up the temp and store.
6074      * If there are many call-saved registers, the temp might live to
6075      * see another use; otherwise it'll be discarded.
6076      */
6077     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
6078     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
6079                arg_slot_stk_ofs(arg_slot));
6080 }
6081 
6082 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
6083                             TCGTemp *ts, TCGRegSet *allocated_regs)
6084 {
6085     if (arg_slot_reg_p(l->arg_slot)) {
6086         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
6087         load_arg_reg(s, reg, ts, *allocated_regs);
6088         tcg_regset_set_reg(*allocated_regs, reg);
6089     } else {
6090         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
6091     }
6092 }
6093 
6094 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
6095                          intptr_t ref_off, TCGRegSet *allocated_regs)
6096 {
6097     TCGReg reg;
6098 
6099     if (arg_slot_reg_p(arg_slot)) {
6100         reg = tcg_target_call_iarg_regs[arg_slot];
6101         tcg_reg_free(s, reg, *allocated_regs);
6102         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
6103         tcg_regset_set_reg(*allocated_regs, reg);
6104     } else {
6105         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
6106                             *allocated_regs, 0, false);
6107         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
6108         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
6109                    arg_slot_stk_ofs(arg_slot));
6110     }
6111 }
6112 
6113 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
6114 {
6115     const int nb_oargs = TCGOP_CALLO(op);
6116     const int nb_iargs = TCGOP_CALLI(op);
6117     const TCGLifeData arg_life = op->life;
6118     const TCGHelperInfo *info = tcg_call_info(op);
6119     TCGRegSet allocated_regs = s->reserved_regs;
6120     int i;
6121 
6122     /*
6123      * Move inputs into place in reverse order,
6124      * so that we place stacked arguments first.
6125      */
6126     for (i = nb_iargs - 1; i >= 0; --i) {
6127         const TCGCallArgumentLoc *loc = &info->in[i];
6128         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
6129 
6130         switch (loc->kind) {
6131         case TCG_CALL_ARG_NORMAL:
6132         case TCG_CALL_ARG_EXTEND_U:
6133         case TCG_CALL_ARG_EXTEND_S:
6134             load_arg_normal(s, loc, ts, &allocated_regs);
6135             break;
6136         case TCG_CALL_ARG_BY_REF:
6137             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
6138             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
6139                          arg_slot_stk_ofs(loc->ref_slot),
6140                          &allocated_regs);
6141             break;
6142         case TCG_CALL_ARG_BY_REF_N:
6143             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
6144             break;
6145         default:
6146             g_assert_not_reached();
6147         }
6148     }
6149 
6150     /* Mark dead temporaries and free the associated registers.  */
6151     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
6152         if (IS_DEAD_ARG(i)) {
6153             temp_dead(s, arg_temp(op->args[i]));
6154         }
6155     }
6156 
6157     /* Clobber call registers.  */
6158     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
6159         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
6160             tcg_reg_free(s, i, allocated_regs);
6161         }
6162     }
6163 
6164     /*
6165      * Save globals if they might be written by the helper,
6166      * sync them if they might be read.
6167      */
6168     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
6169         /* Nothing to do */
6170     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
6171         sync_globals(s, allocated_regs);
6172     } else {
6173         save_globals(s, allocated_regs);
6174     }
6175 
6176     /*
6177      * If the ABI passes a pointer to the returned struct as the first
6178      * argument, load that now.  Pass a pointer to the output home slot.
6179      */
6180     if (info->out_kind == TCG_CALL_RET_BY_REF) {
6181         TCGTemp *ts = arg_temp(op->args[0]);
6182 
6183         if (!ts->mem_allocated) {
6184             temp_allocate_frame(s, ts);
6185         }
6186         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
6187     }
6188 
6189     tcg_out_call(s, tcg_call_func(op), info);
6190 
6191     /* Assign output registers and emit moves if needed.  */
6192     switch (info->out_kind) {
6193     case TCG_CALL_RET_NORMAL:
6194         for (i = 0; i < nb_oargs; i++) {
6195             TCGTemp *ts = arg_temp(op->args[i]);
6196             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
6197 
6198             /* ENV should not be modified.  */
6199             tcg_debug_assert(!temp_readonly(ts));
6200 
6201             set_temp_val_reg(s, ts, reg);
6202             ts->mem_coherent = 0;
6203         }
6204         break;
6205 
6206     case TCG_CALL_RET_BY_VEC:
6207         {
6208             TCGTemp *ts = arg_temp(op->args[0]);
6209 
6210             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
6211             tcg_debug_assert(ts->temp_subindex == 0);
6212             if (!ts->mem_allocated) {
6213                 temp_allocate_frame(s, ts);
6214             }
6215             tcg_out_st(s, TCG_TYPE_V128,
6216                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6217                        ts->mem_base->reg, ts->mem_offset);
6218         }
6219         /* fall through to mark all parts in memory */
6220 
6221     case TCG_CALL_RET_BY_REF:
6222         /* The callee has performed a write through the reference. */
6223         for (i = 0; i < nb_oargs; i++) {
6224             TCGTemp *ts = arg_temp(op->args[i]);
6225             ts->val_type = TEMP_VAL_MEM;
6226         }
6227         break;
6228 
6229     default:
6230         g_assert_not_reached();
6231     }
6232 
6233     /* Flush or discard output registers as needed. */
6234     for (i = 0; i < nb_oargs; i++) {
6235         TCGTemp *ts = arg_temp(op->args[i]);
6236         if (NEED_SYNC_ARG(i)) {
6237             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
6238         } else if (IS_DEAD_ARG(i)) {
6239             temp_dead(s, ts);
6240         }
6241     }
6242 }
6243 
6244 /**
6245  * atom_and_align_for_opc:
6246  * @s: tcg context
6247  * @opc: memory operation code
6248  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
6249  * @allow_two_ops: true if we are prepared to issue two operations
6250  *
6251  * Return the alignment and atomicity to use for the inline fast path
6252  * for the given memory operation.  The alignment may be larger than
6253  * that specified in @opc, and the correct alignment will be diagnosed
6254  * by the slow path helper.
6255  *
6256  * If @allow_two_ops, the host is prepared to test for 2x alignment,
6257  * and issue two loads or stores for subalignment.
6258  */
6259 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
6260                                            MemOp host_atom, bool allow_two_ops)
6261 {
6262     MemOp align = memop_alignment_bits(opc);
6263     MemOp size = opc & MO_SIZE;
6264     MemOp half = size ? size - 1 : 0;
6265     MemOp atom = opc & MO_ATOM_MASK;
6266     MemOp atmax;
6267 
6268     switch (atom) {
6269     case MO_ATOM_NONE:
6270         /* The operation requires no specific atomicity. */
6271         atmax = MO_8;
6272         break;
6273 
6274     case MO_ATOM_IFALIGN:
6275         atmax = size;
6276         break;
6277 
6278     case MO_ATOM_IFALIGN_PAIR:
6279         atmax = half;
6280         break;
6281 
6282     case MO_ATOM_WITHIN16:
6283         atmax = size;
6284         if (size == MO_128) {
6285             /* Misalignment implies !within16, and therefore no atomicity. */
6286         } else if (host_atom != MO_ATOM_WITHIN16) {
6287             /* The host does not implement within16, so require alignment. */
6288             align = MAX(align, size);
6289         }
6290         break;
6291 
6292     case MO_ATOM_WITHIN16_PAIR:
6293         atmax = size;
6294         /*
6295          * Misalignment implies !within16, and therefore half atomicity.
6296          * Any host prepared for two operations can implement this with
6297          * half alignment.
6298          */
6299         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
6300             align = MAX(align, half);
6301         }
6302         break;
6303 
6304     case MO_ATOM_SUBALIGN:
6305         atmax = size;
6306         if (host_atom != MO_ATOM_SUBALIGN) {
6307             /* If unaligned but not odd, there are subobjects up to half. */
6308             if (allow_two_ops) {
6309                 align = MAX(align, half);
6310             } else {
6311                 align = MAX(align, size);
6312             }
6313         }
6314         break;
6315 
6316     default:
6317         g_assert_not_reached();
6318     }
6319 
6320     return (TCGAtomAlign){ .atom = atmax, .align = align };
6321 }
6322 
6323 /*
6324  * Similarly for qemu_ld/st slow path helpers.
6325  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
6326  * using only the provided backend tcg_out_* functions.
6327  */
6328 
6329 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
6330 {
6331     int ofs = arg_slot_stk_ofs(slot);
6332 
6333     /*
6334      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
6335      * require extension to uint64_t, adjust the address for uint32_t.
6336      */
6337     if (HOST_BIG_ENDIAN &&
6338         TCG_TARGET_REG_BITS == 64 &&
6339         type == TCG_TYPE_I32) {
6340         ofs += 4;
6341     }
6342     return ofs;
6343 }
6344 
6345 static void tcg_out_helper_load_slots(TCGContext *s,
6346                                       unsigned nmov, TCGMovExtend *mov,
6347                                       const TCGLdstHelperParam *parm)
6348 {
6349     unsigned i;
6350     TCGReg dst3;
6351 
6352     /*
6353      * Start from the end, storing to the stack first.
6354      * This frees those registers, so we need not consider overlap.
6355      */
6356     for (i = nmov; i-- > 0; ) {
6357         unsigned slot = mov[i].dst;
6358 
6359         if (arg_slot_reg_p(slot)) {
6360             goto found_reg;
6361         }
6362 
6363         TCGReg src = mov[i].src;
6364         TCGType dst_type = mov[i].dst_type;
6365         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6366 
6367         /* The argument is going onto the stack; extend into scratch. */
6368         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
6369             tcg_debug_assert(parm->ntmp != 0);
6370             mov[i].dst = src = parm->tmp[0];
6371             tcg_out_movext1(s, &mov[i]);
6372         }
6373 
6374         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
6375                    tcg_out_helper_stk_ofs(dst_type, slot));
6376     }
6377     return;
6378 
6379  found_reg:
6380     /*
6381      * The remaining arguments are in registers.
6382      * Convert slot numbers to argument registers.
6383      */
6384     nmov = i + 1;
6385     for (i = 0; i < nmov; ++i) {
6386         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
6387     }
6388 
6389     switch (nmov) {
6390     case 4:
6391         /* The backend must have provided enough temps for the worst case. */
6392         tcg_debug_assert(parm->ntmp >= 2);
6393 
6394         dst3 = mov[3].dst;
6395         for (unsigned j = 0; j < 3; ++j) {
6396             if (dst3 == mov[j].src) {
6397                 /*
6398                  * Conflict. Copy the source to a temporary, perform the
6399                  * remaining moves, then the extension from our scratch
6400                  * on the way out.
6401                  */
6402                 TCGReg scratch = parm->tmp[1];
6403 
6404                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
6405                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
6406                 tcg_out_movext1_new_src(s, &mov[3], scratch);
6407                 break;
6408             }
6409         }
6410 
6411         /* No conflicts: perform this move and continue. */
6412         tcg_out_movext1(s, &mov[3]);
6413         /* fall through */
6414 
6415     case 3:
6416         tcg_out_movext3(s, mov, mov + 1, mov + 2,
6417                         parm->ntmp ? parm->tmp[0] : -1);
6418         break;
6419     case 2:
6420         tcg_out_movext2(s, mov, mov + 1,
6421                         parm->ntmp ? parm->tmp[0] : -1);
6422         break;
6423     case 1:
6424         tcg_out_movext1(s, mov);
6425         break;
6426     default:
6427         g_assert_not_reached();
6428     }
6429 }
6430 
6431 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
6432                                     TCGType type, tcg_target_long imm,
6433                                     const TCGLdstHelperParam *parm)
6434 {
6435     if (arg_slot_reg_p(slot)) {
6436         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
6437     } else {
6438         int ofs = tcg_out_helper_stk_ofs(type, slot);
6439         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
6440             tcg_debug_assert(parm->ntmp != 0);
6441             tcg_out_movi(s, type, parm->tmp[0], imm);
6442             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
6443         }
6444     }
6445 }
6446 
6447 static void tcg_out_helper_load_common_args(TCGContext *s,
6448                                             const TCGLabelQemuLdst *ldst,
6449                                             const TCGLdstHelperParam *parm,
6450                                             const TCGHelperInfo *info,
6451                                             unsigned next_arg)
6452 {
6453     TCGMovExtend ptr_mov = {
6454         .dst_type = TCG_TYPE_PTR,
6455         .src_type = TCG_TYPE_PTR,
6456         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6457     };
6458     const TCGCallArgumentLoc *loc = &info->in[0];
6459     TCGType type;
6460     unsigned slot;
6461     tcg_target_ulong imm;
6462 
6463     /*
6464      * Handle env, which is always first.
6465      */
6466     ptr_mov.dst = loc->arg_slot;
6467     ptr_mov.src = TCG_AREG0;
6468     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6469 
6470     /*
6471      * Handle oi.
6472      */
6473     imm = ldst->oi;
6474     loc = &info->in[next_arg];
6475     type = TCG_TYPE_I32;
6476     switch (loc->kind) {
6477     case TCG_CALL_ARG_NORMAL:
6478         break;
6479     case TCG_CALL_ARG_EXTEND_U:
6480     case TCG_CALL_ARG_EXTEND_S:
6481         /* No extension required for MemOpIdx. */
6482         tcg_debug_assert(imm <= INT32_MAX);
6483         type = TCG_TYPE_REG;
6484         break;
6485     default:
6486         g_assert_not_reached();
6487     }
6488     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6489     next_arg++;
6490 
6491     /*
6492      * Handle ra.
6493      */
6494     loc = &info->in[next_arg];
6495     slot = loc->arg_slot;
6496     if (parm->ra_gen) {
6497         int arg_reg = -1;
6498         TCGReg ra_reg;
6499 
6500         if (arg_slot_reg_p(slot)) {
6501             arg_reg = tcg_target_call_iarg_regs[slot];
6502         }
6503         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6504 
6505         ptr_mov.dst = slot;
6506         ptr_mov.src = ra_reg;
6507         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6508     } else {
6509         imm = (uintptr_t)ldst->raddr;
6510         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6511     }
6512 }
6513 
6514 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6515                                        const TCGCallArgumentLoc *loc,
6516                                        TCGType dst_type, TCGType src_type,
6517                                        TCGReg lo, TCGReg hi)
6518 {
6519     MemOp reg_mo;
6520 
6521     if (dst_type <= TCG_TYPE_REG) {
6522         MemOp src_ext;
6523 
6524         switch (loc->kind) {
6525         case TCG_CALL_ARG_NORMAL:
6526             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6527             break;
6528         case TCG_CALL_ARG_EXTEND_U:
6529             dst_type = TCG_TYPE_REG;
6530             src_ext = MO_UL;
6531             break;
6532         case TCG_CALL_ARG_EXTEND_S:
6533             dst_type = TCG_TYPE_REG;
6534             src_ext = MO_SL;
6535             break;
6536         default:
6537             g_assert_not_reached();
6538         }
6539 
6540         mov[0].dst = loc->arg_slot;
6541         mov[0].dst_type = dst_type;
6542         mov[0].src = lo;
6543         mov[0].src_type = src_type;
6544         mov[0].src_ext = src_ext;
6545         return 1;
6546     }
6547 
6548     if (TCG_TARGET_REG_BITS == 32) {
6549         assert(dst_type == TCG_TYPE_I64);
6550         reg_mo = MO_32;
6551     } else {
6552         assert(dst_type == TCG_TYPE_I128);
6553         reg_mo = MO_64;
6554     }
6555 
6556     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6557     mov[0].src = lo;
6558     mov[0].dst_type = TCG_TYPE_REG;
6559     mov[0].src_type = TCG_TYPE_REG;
6560     mov[0].src_ext = reg_mo;
6561 
6562     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6563     mov[1].src = hi;
6564     mov[1].dst_type = TCG_TYPE_REG;
6565     mov[1].src_type = TCG_TYPE_REG;
6566     mov[1].src_ext = reg_mo;
6567 
6568     return 2;
6569 }
6570 
6571 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6572                                    const TCGLdstHelperParam *parm)
6573 {
6574     const TCGHelperInfo *info;
6575     const TCGCallArgumentLoc *loc;
6576     TCGMovExtend mov[2];
6577     unsigned next_arg, nmov;
6578     MemOp mop = get_memop(ldst->oi);
6579 
6580     switch (mop & MO_SIZE) {
6581     case MO_8:
6582     case MO_16:
6583     case MO_32:
6584         info = &info_helper_ld32_mmu;
6585         break;
6586     case MO_64:
6587         info = &info_helper_ld64_mmu;
6588         break;
6589     case MO_128:
6590         info = &info_helper_ld128_mmu;
6591         break;
6592     default:
6593         g_assert_not_reached();
6594     }
6595 
6596     /* Defer env argument. */
6597     next_arg = 1;
6598 
6599     loc = &info->in[next_arg];
6600     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6601         /*
6602          * 32-bit host with 32-bit guest: zero-extend the guest address
6603          * to 64-bits for the helper by storing the low part, then
6604          * load a zero for the high part.
6605          */
6606         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6607                                TCG_TYPE_I32, TCG_TYPE_I32,
6608                                ldst->addr_reg, -1);
6609         tcg_out_helper_load_slots(s, 1, mov, parm);
6610 
6611         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6612                                 TCG_TYPE_I32, 0, parm);
6613         next_arg += 2;
6614     } else {
6615         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6616                                       ldst->addr_reg, -1);
6617         tcg_out_helper_load_slots(s, nmov, mov, parm);
6618         next_arg += nmov;
6619     }
6620 
6621     switch (info->out_kind) {
6622     case TCG_CALL_RET_NORMAL:
6623     case TCG_CALL_RET_BY_VEC:
6624         break;
6625     case TCG_CALL_RET_BY_REF:
6626         /*
6627          * The return reference is in the first argument slot.
6628          * We need memory in which to return: re-use the top of stack.
6629          */
6630         {
6631             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6632 
6633             if (arg_slot_reg_p(0)) {
6634                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6635                                  TCG_REG_CALL_STACK, ofs_slot0);
6636             } else {
6637                 tcg_debug_assert(parm->ntmp != 0);
6638                 tcg_out_addi_ptr(s, parm->tmp[0],
6639                                  TCG_REG_CALL_STACK, ofs_slot0);
6640                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6641                            TCG_REG_CALL_STACK, ofs_slot0);
6642             }
6643         }
6644         break;
6645     default:
6646         g_assert_not_reached();
6647     }
6648 
6649     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6650 }
6651 
6652 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6653                                   bool load_sign,
6654                                   const TCGLdstHelperParam *parm)
6655 {
6656     MemOp mop = get_memop(ldst->oi);
6657     TCGMovExtend mov[2];
6658     int ofs_slot0;
6659 
6660     switch (ldst->type) {
6661     case TCG_TYPE_I64:
6662         if (TCG_TARGET_REG_BITS == 32) {
6663             break;
6664         }
6665         /* fall through */
6666 
6667     case TCG_TYPE_I32:
6668         mov[0].dst = ldst->datalo_reg;
6669         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6670         mov[0].dst_type = ldst->type;
6671         mov[0].src_type = TCG_TYPE_REG;
6672 
6673         /*
6674          * If load_sign, then we allowed the helper to perform the
6675          * appropriate sign extension to tcg_target_ulong, and all
6676          * we need now is a plain move.
6677          *
6678          * If they do not, then we expect the relevant extension
6679          * instruction to be no more expensive than a move, and
6680          * we thus save the icache etc by only using one of two
6681          * helper functions.
6682          */
6683         if (load_sign || !(mop & MO_SIGN)) {
6684             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6685                 mov[0].src_ext = MO_32;
6686             } else {
6687                 mov[0].src_ext = MO_64;
6688             }
6689         } else {
6690             mov[0].src_ext = mop & MO_SSIZE;
6691         }
6692         tcg_out_movext1(s, mov);
6693         return;
6694 
6695     case TCG_TYPE_I128:
6696         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6697         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6698         switch (TCG_TARGET_CALL_RET_I128) {
6699         case TCG_CALL_RET_NORMAL:
6700             break;
6701         case TCG_CALL_RET_BY_VEC:
6702             tcg_out_st(s, TCG_TYPE_V128,
6703                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6704                        TCG_REG_CALL_STACK, ofs_slot0);
6705             /* fall through */
6706         case TCG_CALL_RET_BY_REF:
6707             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6708                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6709             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6710                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6711             return;
6712         default:
6713             g_assert_not_reached();
6714         }
6715         break;
6716 
6717     default:
6718         g_assert_not_reached();
6719     }
6720 
6721     mov[0].dst = ldst->datalo_reg;
6722     mov[0].src =
6723         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6724     mov[0].dst_type = TCG_TYPE_REG;
6725     mov[0].src_type = TCG_TYPE_REG;
6726     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6727 
6728     mov[1].dst = ldst->datahi_reg;
6729     mov[1].src =
6730         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6731     mov[1].dst_type = TCG_TYPE_REG;
6732     mov[1].src_type = TCG_TYPE_REG;
6733     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6734 
6735     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6736 }
6737 
6738 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6739                                    const TCGLdstHelperParam *parm)
6740 {
6741     const TCGHelperInfo *info;
6742     const TCGCallArgumentLoc *loc;
6743     TCGMovExtend mov[4];
6744     TCGType data_type;
6745     unsigned next_arg, nmov, n;
6746     MemOp mop = get_memop(ldst->oi);
6747 
6748     switch (mop & MO_SIZE) {
6749     case MO_8:
6750     case MO_16:
6751     case MO_32:
6752         info = &info_helper_st32_mmu;
6753         data_type = TCG_TYPE_I32;
6754         break;
6755     case MO_64:
6756         info = &info_helper_st64_mmu;
6757         data_type = TCG_TYPE_I64;
6758         break;
6759     case MO_128:
6760         info = &info_helper_st128_mmu;
6761         data_type = TCG_TYPE_I128;
6762         break;
6763     default:
6764         g_assert_not_reached();
6765     }
6766 
6767     /* Defer env argument. */
6768     next_arg = 1;
6769     nmov = 0;
6770 
6771     /* Handle addr argument. */
6772     loc = &info->in[next_arg];
6773     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6774     if (TCG_TARGET_REG_BITS == 32) {
6775         /*
6776          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6777          * to 64-bits for the helper by storing the low part.  Later,
6778          * after we have processed the register inputs, we will load a
6779          * zero for the high part.
6780          */
6781         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6782                                TCG_TYPE_I32, TCG_TYPE_I32,
6783                                ldst->addr_reg, -1);
6784         next_arg += 2;
6785         nmov += 1;
6786     } else {
6787         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6788                                    ldst->addr_reg, -1);
6789         next_arg += n;
6790         nmov += n;
6791     }
6792 
6793     /* Handle data argument. */
6794     loc = &info->in[next_arg];
6795     switch (loc->kind) {
6796     case TCG_CALL_ARG_NORMAL:
6797     case TCG_CALL_ARG_EXTEND_U:
6798     case TCG_CALL_ARG_EXTEND_S:
6799         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6800                                    ldst->datalo_reg, ldst->datahi_reg);
6801         next_arg += n;
6802         nmov += n;
6803         tcg_out_helper_load_slots(s, nmov, mov, parm);
6804         break;
6805 
6806     case TCG_CALL_ARG_BY_REF:
6807         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6808         tcg_debug_assert(data_type == TCG_TYPE_I128);
6809         tcg_out_st(s, TCG_TYPE_I64,
6810                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6811                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6812         tcg_out_st(s, TCG_TYPE_I64,
6813                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6814                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6815 
6816         tcg_out_helper_load_slots(s, nmov, mov, parm);
6817 
6818         if (arg_slot_reg_p(loc->arg_slot)) {
6819             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6820                              TCG_REG_CALL_STACK,
6821                              arg_slot_stk_ofs(loc->ref_slot));
6822         } else {
6823             tcg_debug_assert(parm->ntmp != 0);
6824             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6825                              arg_slot_stk_ofs(loc->ref_slot));
6826             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6827                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6828         }
6829         next_arg += 2;
6830         break;
6831 
6832     default:
6833         g_assert_not_reached();
6834     }
6835 
6836     if (TCG_TARGET_REG_BITS == 32) {
6837         /* Zero extend the address by loading a zero for the high part. */
6838         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6839         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6840     }
6841 
6842     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6843 }
6844 
6845 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6846 {
6847     int i, num_insns;
6848     TCGOp *op;
6849 
6850     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6851                  && qemu_log_in_addr_range(pc_start))) {
6852         FILE *logfile = qemu_log_trylock();
6853         if (logfile) {
6854             fprintf(logfile, "OP:\n");
6855             tcg_dump_ops(s, logfile, false);
6856             fprintf(logfile, "\n");
6857             qemu_log_unlock(logfile);
6858         }
6859     }
6860 
6861 #ifdef CONFIG_DEBUG_TCG
6862     /* Ensure all labels referenced have been emitted.  */
6863     {
6864         TCGLabel *l;
6865         bool error = false;
6866 
6867         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6868             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6869                 qemu_log_mask(CPU_LOG_TB_OP,
6870                               "$L%d referenced but not present.\n", l->id);
6871                 error = true;
6872             }
6873         }
6874         assert(!error);
6875     }
6876 #endif
6877 
6878     /* Do not reuse any EBB that may be allocated within the TB. */
6879     tcg_temp_ebb_reset_freed(s);
6880 
6881     tcg_optimize(s);
6882 
6883     reachable_code_pass(s);
6884     liveness_pass_0(s);
6885     liveness_pass_1(s);
6886 
6887     if (s->nb_indirects > 0) {
6888         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6889                      && qemu_log_in_addr_range(pc_start))) {
6890             FILE *logfile = qemu_log_trylock();
6891             if (logfile) {
6892                 fprintf(logfile, "OP before indirect lowering:\n");
6893                 tcg_dump_ops(s, logfile, false);
6894                 fprintf(logfile, "\n");
6895                 qemu_log_unlock(logfile);
6896             }
6897         }
6898 
6899         /* Replace indirect temps with direct temps.  */
6900         if (liveness_pass_2(s)) {
6901             /* If changes were made, re-run liveness.  */
6902             liveness_pass_1(s);
6903         }
6904     }
6905 
6906     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6907                  && qemu_log_in_addr_range(pc_start))) {
6908         FILE *logfile = qemu_log_trylock();
6909         if (logfile) {
6910             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6911             tcg_dump_ops(s, logfile, true);
6912             fprintf(logfile, "\n");
6913             qemu_log_unlock(logfile);
6914         }
6915     }
6916 
6917     /* Initialize goto_tb jump offsets. */
6918     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6919     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6920     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6921     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6922 
6923     tcg_reg_alloc_start(s);
6924 
6925     /*
6926      * Reset the buffer pointers when restarting after overflow.
6927      * TODO: Move this into translate-all.c with the rest of the
6928      * buffer management.  Having only this done here is confusing.
6929      */
6930     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6931     s->code_ptr = s->code_buf;
6932     s->data_gen_ptr = NULL;
6933 
6934     QSIMPLEQ_INIT(&s->ldst_labels);
6935     s->pool_labels = NULL;
6936 
6937     s->gen_insn_data =
6938         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * INSN_START_WORDS);
6939 
6940     tcg_out_tb_start(s);
6941 
6942     num_insns = -1;
6943     s->carry_live = false;
6944     QTAILQ_FOREACH(op, &s->ops, link) {
6945         TCGOpcode opc = op->opc;
6946 
6947         switch (opc) {
6948         case INDEX_op_extrl_i64_i32:
6949             assert(TCG_TARGET_REG_BITS == 64);
6950             /*
6951              * If TCG_TYPE_I32 is represented in some canonical form,
6952              * e.g. zero or sign-extended, then emit as a unary op.
6953              * Otherwise we can treat this as a plain move.
6954              * If the output dies, treat this as a plain move, because
6955              * this will be implemented with a store.
6956              */
6957             if (TCG_TARGET_HAS_extr_i64_i32) {
6958                 TCGLifeData arg_life = op->life;
6959                 if (!IS_DEAD_ARG(0)) {
6960                     goto do_default;
6961                 }
6962             }
6963             /* fall through */
6964         case INDEX_op_mov:
6965         case INDEX_op_mov_vec:
6966             tcg_reg_alloc_mov(s, op);
6967             break;
6968         case INDEX_op_dup_vec:
6969             tcg_reg_alloc_dup(s, op);
6970             break;
6971         case INDEX_op_insn_start:
6972             assert_carry_dead(s);
6973             if (num_insns >= 0) {
6974                 size_t off = tcg_current_code_size(s);
6975                 s->gen_insn_end_off[num_insns] = off;
6976                 /* Assert that we do not overflow our stored offset.  */
6977                 assert(s->gen_insn_end_off[num_insns] == off);
6978             }
6979             num_insns++;
6980             for (i = 0; i < INSN_START_WORDS; ++i) {
6981                 s->gen_insn_data[num_insns * INSN_START_WORDS + i] =
6982                     tcg_get_insn_start_param(op, i);
6983             }
6984             break;
6985         case INDEX_op_discard:
6986             temp_dead(s, arg_temp(op->args[0]));
6987             break;
6988         case INDEX_op_set_label:
6989             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6990             tcg_out_label(s, arg_label(op->args[0]));
6991             break;
6992         case INDEX_op_call:
6993             assert_carry_dead(s);
6994             tcg_reg_alloc_call(s, op);
6995             break;
6996         case INDEX_op_exit_tb:
6997             tcg_out_exit_tb(s, op->args[0]);
6998             break;
6999         case INDEX_op_goto_tb:
7000             tcg_out_goto_tb(s, op->args[0]);
7001             break;
7002         case INDEX_op_br:
7003             tcg_out_br(s, arg_label(op->args[0]));
7004             break;
7005         case INDEX_op_mb:
7006             tcg_out_mb(s, op->args[0]);
7007             break;
7008         case INDEX_op_dup2_vec:
7009             if (tcg_reg_alloc_dup2(s, op)) {
7010                 break;
7011             }
7012             /* fall through */
7013         default:
7014         do_default:
7015             /* Sanity check that we've not introduced any unhandled opcodes. */
7016             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
7017                                               TCGOP_FLAGS(op)));
7018             /* Note: in order to speed up the code, it would be much
7019                faster to have specialized register allocator functions for
7020                some common argument patterns */
7021             tcg_reg_alloc_op(s, op);
7022             break;
7023         }
7024         /* Test for (pending) buffer overflow.  The assumption is that any
7025            one operation beginning below the high water mark cannot overrun
7026            the buffer completely.  Thus we can test for overflow after
7027            generating code without having to check during generation.  */
7028         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
7029             return -1;
7030         }
7031         /* Test for TB overflow, as seen by gen_insn_end_off.  */
7032         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
7033             return -2;
7034         }
7035     }
7036     assert_carry_dead(s);
7037 
7038     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
7039     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
7040 
7041     /* Generate TB finalization at the end of block */
7042     i = tcg_out_ldst_finalize(s);
7043     if (i < 0) {
7044         return i;
7045     }
7046     i = tcg_out_pool_finalize(s);
7047     if (i < 0) {
7048         return i;
7049     }
7050     if (!tcg_resolve_relocs(s)) {
7051         return -2;
7052     }
7053 
7054 #ifndef CONFIG_TCG_INTERPRETER
7055     /* flush instruction cache */
7056     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
7057                         (uintptr_t)s->code_buf,
7058                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
7059 #endif
7060 
7061     return tcg_current_code_size(s);
7062 }
7063 
7064 #ifdef ELF_HOST_MACHINE
7065 /* In order to use this feature, the backend needs to do three things:
7066 
7067    (1) Define ELF_HOST_MACHINE to indicate both what value to
7068        put into the ELF image and to indicate support for the feature.
7069 
7070    (2) Define tcg_register_jit.  This should create a buffer containing
7071        the contents of a .debug_frame section that describes the post-
7072        prologue unwind info for the tcg machine.
7073 
7074    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
7075 */
7076 
7077 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
7078 typedef enum {
7079     JIT_NOACTION = 0,
7080     JIT_REGISTER_FN,
7081     JIT_UNREGISTER_FN
7082 } jit_actions_t;
7083 
7084 struct jit_code_entry {
7085     struct jit_code_entry *next_entry;
7086     struct jit_code_entry *prev_entry;
7087     const void *symfile_addr;
7088     uint64_t symfile_size;
7089 };
7090 
7091 struct jit_descriptor {
7092     uint32_t version;
7093     uint32_t action_flag;
7094     struct jit_code_entry *relevant_entry;
7095     struct jit_code_entry *first_entry;
7096 };
7097 
7098 void __jit_debug_register_code(void) __attribute__((noinline));
7099 void __jit_debug_register_code(void)
7100 {
7101     asm("");
7102 }
7103 
7104 /* Must statically initialize the version, because GDB may check
7105    the version before we can set it.  */
7106 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
7107 
7108 /* End GDB interface.  */
7109 
7110 static int find_string(const char *strtab, const char *str)
7111 {
7112     const char *p = strtab + 1;
7113 
7114     while (1) {
7115         if (strcmp(p, str) == 0) {
7116             return p - strtab;
7117         }
7118         p += strlen(p) + 1;
7119     }
7120 }
7121 
7122 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
7123                                  const void *debug_frame,
7124                                  size_t debug_frame_size)
7125 {
7126     struct __attribute__((packed)) DebugInfo {
7127         uint32_t  len;
7128         uint16_t  version;
7129         uint32_t  abbrev;
7130         uint8_t   ptr_size;
7131         uint8_t   cu_die;
7132         uint16_t  cu_lang;
7133         uintptr_t cu_low_pc;
7134         uintptr_t cu_high_pc;
7135         uint8_t   fn_die;
7136         char      fn_name[16];
7137         uintptr_t fn_low_pc;
7138         uintptr_t fn_high_pc;
7139         uint8_t   cu_eoc;
7140     };
7141 
7142     struct ElfImage {
7143         ElfW(Ehdr) ehdr;
7144         ElfW(Phdr) phdr;
7145         ElfW(Shdr) shdr[7];
7146         ElfW(Sym)  sym[2];
7147         struct DebugInfo di;
7148         uint8_t    da[24];
7149         char       str[80];
7150     };
7151 
7152     struct ElfImage *img;
7153 
7154     static const struct ElfImage img_template = {
7155         .ehdr = {
7156             .e_ident[EI_MAG0] = ELFMAG0,
7157             .e_ident[EI_MAG1] = ELFMAG1,
7158             .e_ident[EI_MAG2] = ELFMAG2,
7159             .e_ident[EI_MAG3] = ELFMAG3,
7160             .e_ident[EI_CLASS] = ELF_CLASS,
7161             .e_ident[EI_DATA] = ELF_DATA,
7162             .e_ident[EI_VERSION] = EV_CURRENT,
7163             .e_type = ET_EXEC,
7164             .e_machine = ELF_HOST_MACHINE,
7165             .e_version = EV_CURRENT,
7166             .e_phoff = offsetof(struct ElfImage, phdr),
7167             .e_shoff = offsetof(struct ElfImage, shdr),
7168             .e_ehsize = sizeof(ElfW(Shdr)),
7169             .e_phentsize = sizeof(ElfW(Phdr)),
7170             .e_phnum = 1,
7171             .e_shentsize = sizeof(ElfW(Shdr)),
7172             .e_shnum = ARRAY_SIZE(img->shdr),
7173             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
7174 #ifdef ELF_HOST_FLAGS
7175             .e_flags = ELF_HOST_FLAGS,
7176 #endif
7177 #ifdef ELF_OSABI
7178             .e_ident[EI_OSABI] = ELF_OSABI,
7179 #endif
7180         },
7181         .phdr = {
7182             .p_type = PT_LOAD,
7183             .p_flags = PF_X,
7184         },
7185         .shdr = {
7186             [0] = { .sh_type = SHT_NULL },
7187             /* Trick: The contents of code_gen_buffer are not present in
7188                this fake ELF file; that got allocated elsewhere.  Therefore
7189                we mark .text as SHT_NOBITS (similar to .bss) so that readers
7190                will not look for contents.  We can record any address.  */
7191             [1] = { /* .text */
7192                 .sh_type = SHT_NOBITS,
7193                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
7194             },
7195             [2] = { /* .debug_info */
7196                 .sh_type = SHT_PROGBITS,
7197                 .sh_offset = offsetof(struct ElfImage, di),
7198                 .sh_size = sizeof(struct DebugInfo),
7199             },
7200             [3] = { /* .debug_abbrev */
7201                 .sh_type = SHT_PROGBITS,
7202                 .sh_offset = offsetof(struct ElfImage, da),
7203                 .sh_size = sizeof(img->da),
7204             },
7205             [4] = { /* .debug_frame */
7206                 .sh_type = SHT_PROGBITS,
7207                 .sh_offset = sizeof(struct ElfImage),
7208             },
7209             [5] = { /* .symtab */
7210                 .sh_type = SHT_SYMTAB,
7211                 .sh_offset = offsetof(struct ElfImage, sym),
7212                 .sh_size = sizeof(img->sym),
7213                 .sh_info = 1,
7214                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
7215                 .sh_entsize = sizeof(ElfW(Sym)),
7216             },
7217             [6] = { /* .strtab */
7218                 .sh_type = SHT_STRTAB,
7219                 .sh_offset = offsetof(struct ElfImage, str),
7220                 .sh_size = sizeof(img->str),
7221             }
7222         },
7223         .sym = {
7224             [1] = { /* code_gen_buffer */
7225                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
7226                 .st_shndx = 1,
7227             }
7228         },
7229         .di = {
7230             .len = sizeof(struct DebugInfo) - 4,
7231             .version = 2,
7232             .ptr_size = sizeof(void *),
7233             .cu_die = 1,
7234             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
7235             .fn_die = 2,
7236             .fn_name = "code_gen_buffer"
7237         },
7238         .da = {
7239             1,          /* abbrev number (the cu) */
7240             0x11, 1,    /* DW_TAG_compile_unit, has children */
7241             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
7242             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
7243             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
7244             0, 0,       /* end of abbrev */
7245             2,          /* abbrev number (the fn) */
7246             0x2e, 0,    /* DW_TAG_subprogram, no children */
7247             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
7248             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
7249             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
7250             0, 0,       /* end of abbrev */
7251             0           /* no more abbrev */
7252         },
7253         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
7254                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
7255     };
7256 
7257     /* We only need a single jit entry; statically allocate it.  */
7258     static struct jit_code_entry one_entry;
7259 
7260     uintptr_t buf = (uintptr_t)buf_ptr;
7261     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
7262     DebugFrameHeader *dfh;
7263 
7264     img = g_malloc(img_size);
7265     *img = img_template;
7266 
7267     img->phdr.p_vaddr = buf;
7268     img->phdr.p_paddr = buf;
7269     img->phdr.p_memsz = buf_size;
7270 
7271     img->shdr[1].sh_name = find_string(img->str, ".text");
7272     img->shdr[1].sh_addr = buf;
7273     img->shdr[1].sh_size = buf_size;
7274 
7275     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
7276     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
7277 
7278     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
7279     img->shdr[4].sh_size = debug_frame_size;
7280 
7281     img->shdr[5].sh_name = find_string(img->str, ".symtab");
7282     img->shdr[6].sh_name = find_string(img->str, ".strtab");
7283 
7284     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
7285     img->sym[1].st_value = buf;
7286     img->sym[1].st_size = buf_size;
7287 
7288     img->di.cu_low_pc = buf;
7289     img->di.cu_high_pc = buf + buf_size;
7290     img->di.fn_low_pc = buf;
7291     img->di.fn_high_pc = buf + buf_size;
7292 
7293     dfh = (DebugFrameHeader *)(img + 1);
7294     memcpy(dfh, debug_frame, debug_frame_size);
7295     dfh->fde.func_start = buf;
7296     dfh->fde.func_len = buf_size;
7297 
7298 #ifdef DEBUG_JIT
7299     /* Enable this block to be able to debug the ELF image file creation.
7300        One can use readelf, objdump, or other inspection utilities.  */
7301     {
7302         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
7303         FILE *f = fopen(jit, "w+b");
7304         if (f) {
7305             if (fwrite(img, img_size, 1, f) != img_size) {
7306                 /* Avoid stupid unused return value warning for fwrite.  */
7307             }
7308             fclose(f);
7309         }
7310     }
7311 #endif
7312 
7313     one_entry.symfile_addr = img;
7314     one_entry.symfile_size = img_size;
7315 
7316     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
7317     __jit_debug_descriptor.relevant_entry = &one_entry;
7318     __jit_debug_descriptor.first_entry = &one_entry;
7319     __jit_debug_register_code();
7320 }
7321 #else
7322 /* No support for the feature.  Provide the entry point expected by exec.c,
7323    and implement the internal function we declared earlier.  */
7324 
7325 static void tcg_register_jit_int(const void *buf, size_t size,
7326                                  const void *debug_frame,
7327                                  size_t debug_frame_size)
7328 {
7329 }
7330 
7331 void tcg_register_jit(const void *buf, size_t buf_size)
7332 {
7333 }
7334 #endif /* ELF_HOST_MACHINE */
7335 
7336 #if !TCG_TARGET_MAYBE_vec
7337 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
7338 {
7339     g_assert_not_reached();
7340 }
7341 #endif
7342