xref: /openbmc/qemu/tcg/tcg.c (revision ea46c4bce8c8a8285e6715c1bac29f5b73f5062b)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpBrcond {
990     TCGOutOp base;
991     void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond,
992                    TCGReg a1, TCGReg a2, TCGLabel *label);
993     void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond,
994                    TCGReg a1, tcg_target_long a2, TCGLabel *label);
995 } TCGOutOpBrcond;
996 
997 typedef struct TCGOutOpDivRem {
998     TCGOutOp base;
999     void (*out_rr01r)(TCGContext *s, TCGType type,
1000                       TCGReg a0, TCGReg a1, TCGReg a4);
1001 } TCGOutOpDivRem;
1002 
1003 typedef struct TCGOutOpMovcond {
1004     TCGOutOp base;
1005     void (*out)(TCGContext *s, TCGType type, TCGCond cond,
1006                 TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2,
1007                 TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf);
1008 } TCGOutOpMovcond;
1009 
1010 typedef struct TCGOutOpMul2 {
1011     TCGOutOp base;
1012     void (*out_rrrr)(TCGContext *s, TCGType type,
1013                      TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3);
1014 } TCGOutOpMul2;
1015 
1016 typedef struct TCGOutOpUnary {
1017     TCGOutOp base;
1018     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
1019 } TCGOutOpUnary;
1020 
1021 typedef struct TCGOutOpSetcond {
1022     TCGOutOp base;
1023     void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond,
1024                     TCGReg ret, TCGReg a1, TCGReg a2);
1025     void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond,
1026                     TCGReg ret, TCGReg a1, tcg_target_long a2);
1027 } TCGOutOpSetcond;
1028 
1029 typedef struct TCGOutOpSubtract {
1030     TCGOutOp base;
1031     void (*out_rrr)(TCGContext *s, TCGType type,
1032                     TCGReg a0, TCGReg a1, TCGReg a2);
1033     void (*out_rir)(TCGContext *s, TCGType type,
1034                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1035 } TCGOutOpSubtract;
1036 
1037 #include "tcg-target.c.inc"
1038 
1039 #ifndef CONFIG_TCG_INTERPRETER
1040 /* Validate CPUTLBDescFast placement. */
1041 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1042                         sizeof(CPUNegativeOffsetState))
1043                   < MIN_TLB_MASK_TABLE_OFS);
1044 #endif
1045 
1046 /*
1047  * Register V as the TCGOutOp for O.
1048  * This verifies that V is of type T, otherwise give a nice compiler error.
1049  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1050  */
1051 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1052 
1053 /* Register allocation descriptions for every TCGOpcode. */
1054 static const TCGOutOp * const all_outop[NB_OPS] = {
1055     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1056     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1057     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1058     OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond),
1059     OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz),
1060     OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop),
1061     OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz),
1062     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1063     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1064     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1065     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1066     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1067     OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond),
1068     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1069     OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2),
1070     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1071     OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2),
1072     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1073     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1074     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1075     OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond),
1076     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1077     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1078     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1079     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1080     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1081     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1082     OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl),
1083     OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr),
1084     OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1085     OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond),
1086     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1087     OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1088     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1089     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1090 };
1091 
1092 #undef OUTOP
1093 
1094 /*
1095  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1096  * and registered the target's TCG globals) must register with this function
1097  * before initiating translation.
1098  *
1099  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1100  * of tcg_region_init() for the reasoning behind this.
1101  *
1102  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1103  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1104  * is not used anymore for translation once this function is called.
1105  *
1106  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1107  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1108  * modes.
1109  */
1110 #ifdef CONFIG_USER_ONLY
1111 void tcg_register_thread(void)
1112 {
1113     tcg_ctx = &tcg_init_ctx;
1114 }
1115 #else
1116 void tcg_register_thread(void)
1117 {
1118     TCGContext *s = g_malloc(sizeof(*s));
1119     unsigned int i, n;
1120 
1121     *s = tcg_init_ctx;
1122 
1123     /* Relink mem_base.  */
1124     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1125         if (tcg_init_ctx.temps[i].mem_base) {
1126             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1127             tcg_debug_assert(b >= 0 && b < n);
1128             s->temps[i].mem_base = &s->temps[b];
1129         }
1130     }
1131 
1132     /* Claim an entry in tcg_ctxs */
1133     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1134     g_assert(n < tcg_max_ctxs);
1135     qatomic_set(&tcg_ctxs[n], s);
1136 
1137     if (n > 0) {
1138         tcg_region_initial_alloc(s);
1139     }
1140 
1141     tcg_ctx = s;
1142 }
1143 #endif /* !CONFIG_USER_ONLY */
1144 
1145 /* pool based memory allocation */
1146 void *tcg_malloc_internal(TCGContext *s, int size)
1147 {
1148     TCGPool *p;
1149     int pool_size;
1150 
1151     if (size > TCG_POOL_CHUNK_SIZE) {
1152         /* big malloc: insert a new pool (XXX: could optimize) */
1153         p = g_malloc(sizeof(TCGPool) + size);
1154         p->size = size;
1155         p->next = s->pool_first_large;
1156         s->pool_first_large = p;
1157         return p->data;
1158     } else {
1159         p = s->pool_current;
1160         if (!p) {
1161             p = s->pool_first;
1162             if (!p)
1163                 goto new_pool;
1164         } else {
1165             if (!p->next) {
1166             new_pool:
1167                 pool_size = TCG_POOL_CHUNK_SIZE;
1168                 p = g_malloc(sizeof(TCGPool) + pool_size);
1169                 p->size = pool_size;
1170                 p->next = NULL;
1171                 if (s->pool_current) {
1172                     s->pool_current->next = p;
1173                 } else {
1174                     s->pool_first = p;
1175                 }
1176             } else {
1177                 p = p->next;
1178             }
1179         }
1180     }
1181     s->pool_current = p;
1182     s->pool_cur = p->data + size;
1183     s->pool_end = p->data + p->size;
1184     return p->data;
1185 }
1186 
1187 void tcg_pool_reset(TCGContext *s)
1188 {
1189     TCGPool *p, *t;
1190     for (p = s->pool_first_large; p; p = t) {
1191         t = p->next;
1192         g_free(p);
1193     }
1194     s->pool_first_large = NULL;
1195     s->pool_cur = s->pool_end = NULL;
1196     s->pool_current = NULL;
1197 }
1198 
1199 /*
1200  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1201  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1202  * We only use these for layout in tcg_out_ld_helper_ret and
1203  * tcg_out_st_helper_args, and share them between several of
1204  * the helpers, with the end result that it's easier to build manually.
1205  */
1206 
1207 #if TCG_TARGET_REG_BITS == 32
1208 # define dh_typecode_ttl  dh_typecode_i32
1209 #else
1210 # define dh_typecode_ttl  dh_typecode_i64
1211 #endif
1212 
1213 static TCGHelperInfo info_helper_ld32_mmu = {
1214     .flags = TCG_CALL_NO_WG,
1215     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1216               | dh_typemask(env, 1)
1217               | dh_typemask(i64, 2)  /* uint64_t addr */
1218               | dh_typemask(i32, 3)  /* unsigned oi */
1219               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1220 };
1221 
1222 static TCGHelperInfo info_helper_ld64_mmu = {
1223     .flags = TCG_CALL_NO_WG,
1224     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1225               | dh_typemask(env, 1)
1226               | dh_typemask(i64, 2)  /* uint64_t addr */
1227               | dh_typemask(i32, 3)  /* unsigned oi */
1228               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1229 };
1230 
1231 static TCGHelperInfo info_helper_ld128_mmu = {
1232     .flags = TCG_CALL_NO_WG,
1233     .typemask = dh_typemask(i128, 0) /* return Int128 */
1234               | dh_typemask(env, 1)
1235               | dh_typemask(i64, 2)  /* uint64_t addr */
1236               | dh_typemask(i32, 3)  /* unsigned oi */
1237               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1238 };
1239 
1240 static TCGHelperInfo info_helper_st32_mmu = {
1241     .flags = TCG_CALL_NO_WG,
1242     .typemask = dh_typemask(void, 0)
1243               | dh_typemask(env, 1)
1244               | dh_typemask(i64, 2)  /* uint64_t addr */
1245               | dh_typemask(i32, 3)  /* uint32_t data */
1246               | dh_typemask(i32, 4)  /* unsigned oi */
1247               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1248 };
1249 
1250 static TCGHelperInfo info_helper_st64_mmu = {
1251     .flags = TCG_CALL_NO_WG,
1252     .typemask = dh_typemask(void, 0)
1253               | dh_typemask(env, 1)
1254               | dh_typemask(i64, 2)  /* uint64_t addr */
1255               | dh_typemask(i64, 3)  /* uint64_t data */
1256               | dh_typemask(i32, 4)  /* unsigned oi */
1257               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1258 };
1259 
1260 static TCGHelperInfo info_helper_st128_mmu = {
1261     .flags = TCG_CALL_NO_WG,
1262     .typemask = dh_typemask(void, 0)
1263               | dh_typemask(env, 1)
1264               | dh_typemask(i64, 2)  /* uint64_t addr */
1265               | dh_typemask(i128, 3) /* Int128 data */
1266               | dh_typemask(i32, 4)  /* unsigned oi */
1267               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1268 };
1269 
1270 #ifdef CONFIG_TCG_INTERPRETER
1271 static ffi_type *typecode_to_ffi(int argmask)
1272 {
1273     /*
1274      * libffi does not support __int128_t, so we have forced Int128
1275      * to use the structure definition instead of the builtin type.
1276      */
1277     static ffi_type *ffi_type_i128_elements[3] = {
1278         &ffi_type_uint64,
1279         &ffi_type_uint64,
1280         NULL
1281     };
1282     static ffi_type ffi_type_i128 = {
1283         .size = 16,
1284         .alignment = __alignof__(Int128),
1285         .type = FFI_TYPE_STRUCT,
1286         .elements = ffi_type_i128_elements,
1287     };
1288 
1289     switch (argmask) {
1290     case dh_typecode_void:
1291         return &ffi_type_void;
1292     case dh_typecode_i32:
1293         return &ffi_type_uint32;
1294     case dh_typecode_s32:
1295         return &ffi_type_sint32;
1296     case dh_typecode_i64:
1297         return &ffi_type_uint64;
1298     case dh_typecode_s64:
1299         return &ffi_type_sint64;
1300     case dh_typecode_ptr:
1301         return &ffi_type_pointer;
1302     case dh_typecode_i128:
1303         return &ffi_type_i128;
1304     }
1305     g_assert_not_reached();
1306 }
1307 
1308 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1309 {
1310     unsigned typemask = info->typemask;
1311     struct {
1312         ffi_cif cif;
1313         ffi_type *args[];
1314     } *ca;
1315     ffi_status status;
1316     int nargs;
1317 
1318     /* Ignoring the return type, find the last non-zero field. */
1319     nargs = 32 - clz32(typemask >> 3);
1320     nargs = DIV_ROUND_UP(nargs, 3);
1321     assert(nargs <= MAX_CALL_IARGS);
1322 
1323     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1324     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1325     ca->cif.nargs = nargs;
1326 
1327     if (nargs != 0) {
1328         ca->cif.arg_types = ca->args;
1329         for (int j = 0; j < nargs; ++j) {
1330             int typecode = extract32(typemask, (j + 1) * 3, 3);
1331             ca->args[j] = typecode_to_ffi(typecode);
1332         }
1333     }
1334 
1335     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1336                           ca->cif.rtype, ca->cif.arg_types);
1337     assert(status == FFI_OK);
1338 
1339     return &ca->cif;
1340 }
1341 
1342 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1343 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1344 #else
1345 #define HELPER_INFO_INIT(I)      (&(I)->init)
1346 #define HELPER_INFO_INIT_VAL(I)  1
1347 #endif /* CONFIG_TCG_INTERPRETER */
1348 
1349 static inline bool arg_slot_reg_p(unsigned arg_slot)
1350 {
1351     /*
1352      * Split the sizeof away from the comparison to avoid Werror from
1353      * "unsigned < 0 is always false", when iarg_regs is empty.
1354      */
1355     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1356     return arg_slot < nreg;
1357 }
1358 
1359 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1360 {
1361     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1362     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1363 
1364     tcg_debug_assert(stk_slot < max);
1365     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1366 }
1367 
1368 typedef struct TCGCumulativeArgs {
1369     int arg_idx;                /* tcg_gen_callN args[] */
1370     int info_in_idx;            /* TCGHelperInfo in[] */
1371     int arg_slot;               /* regs+stack slot */
1372     int ref_slot;               /* stack slots for references */
1373 } TCGCumulativeArgs;
1374 
1375 static void layout_arg_even(TCGCumulativeArgs *cum)
1376 {
1377     cum->arg_slot += cum->arg_slot & 1;
1378 }
1379 
1380 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1381                          TCGCallArgumentKind kind)
1382 {
1383     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1384 
1385     *loc = (TCGCallArgumentLoc){
1386         .kind = kind,
1387         .arg_idx = cum->arg_idx,
1388         .arg_slot = cum->arg_slot,
1389     };
1390     cum->info_in_idx++;
1391     cum->arg_slot++;
1392 }
1393 
1394 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1395                                 TCGHelperInfo *info, int n)
1396 {
1397     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1398 
1399     for (int i = 0; i < n; ++i) {
1400         /* Layout all using the same arg_idx, adjusting the subindex. */
1401         loc[i] = (TCGCallArgumentLoc){
1402             .kind = TCG_CALL_ARG_NORMAL,
1403             .arg_idx = cum->arg_idx,
1404             .tmp_subindex = i,
1405             .arg_slot = cum->arg_slot + i,
1406         };
1407     }
1408     cum->info_in_idx += n;
1409     cum->arg_slot += n;
1410 }
1411 
1412 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1413 {
1414     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1415     int n = 128 / TCG_TARGET_REG_BITS;
1416 
1417     /* The first subindex carries the pointer. */
1418     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1419 
1420     /*
1421      * The callee is allowed to clobber memory associated with
1422      * structure pass by-reference.  Therefore we must make copies.
1423      * Allocate space from "ref_slot", which will be adjusted to
1424      * follow the parameters on the stack.
1425      */
1426     loc[0].ref_slot = cum->ref_slot;
1427 
1428     /*
1429      * Subsequent words also go into the reference slot, but
1430      * do not accumulate into the regular arguments.
1431      */
1432     for (int i = 1; i < n; ++i) {
1433         loc[i] = (TCGCallArgumentLoc){
1434             .kind = TCG_CALL_ARG_BY_REF_N,
1435             .arg_idx = cum->arg_idx,
1436             .tmp_subindex = i,
1437             .ref_slot = cum->ref_slot + i,
1438         };
1439     }
1440     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1441     cum->ref_slot += n;
1442 }
1443 
1444 static void init_call_layout(TCGHelperInfo *info)
1445 {
1446     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1447     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1448     unsigned typemask = info->typemask;
1449     unsigned typecode;
1450     TCGCumulativeArgs cum = { };
1451 
1452     /*
1453      * Parse and place any function return value.
1454      */
1455     typecode = typemask & 7;
1456     switch (typecode) {
1457     case dh_typecode_void:
1458         info->nr_out = 0;
1459         break;
1460     case dh_typecode_i32:
1461     case dh_typecode_s32:
1462     case dh_typecode_ptr:
1463         info->nr_out = 1;
1464         info->out_kind = TCG_CALL_RET_NORMAL;
1465         break;
1466     case dh_typecode_i64:
1467     case dh_typecode_s64:
1468         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1469         info->out_kind = TCG_CALL_RET_NORMAL;
1470         /* Query the last register now to trigger any assert early. */
1471         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1472         break;
1473     case dh_typecode_i128:
1474         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1475         info->out_kind = TCG_TARGET_CALL_RET_I128;
1476         switch (TCG_TARGET_CALL_RET_I128) {
1477         case TCG_CALL_RET_NORMAL:
1478             /* Query the last register now to trigger any assert early. */
1479             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1480             break;
1481         case TCG_CALL_RET_BY_VEC:
1482             /* Query the single register now to trigger any assert early. */
1483             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1484             break;
1485         case TCG_CALL_RET_BY_REF:
1486             /*
1487              * Allocate the first argument to the output.
1488              * We don't need to store this anywhere, just make it
1489              * unavailable for use in the input loop below.
1490              */
1491             cum.arg_slot = 1;
1492             break;
1493         default:
1494             qemu_build_not_reached();
1495         }
1496         break;
1497     default:
1498         g_assert_not_reached();
1499     }
1500 
1501     /*
1502      * Parse and place function arguments.
1503      */
1504     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1505         TCGCallArgumentKind kind;
1506         TCGType type;
1507 
1508         typecode = typemask & 7;
1509         switch (typecode) {
1510         case dh_typecode_i32:
1511         case dh_typecode_s32:
1512             type = TCG_TYPE_I32;
1513             break;
1514         case dh_typecode_i64:
1515         case dh_typecode_s64:
1516             type = TCG_TYPE_I64;
1517             break;
1518         case dh_typecode_ptr:
1519             type = TCG_TYPE_PTR;
1520             break;
1521         case dh_typecode_i128:
1522             type = TCG_TYPE_I128;
1523             break;
1524         default:
1525             g_assert_not_reached();
1526         }
1527 
1528         switch (type) {
1529         case TCG_TYPE_I32:
1530             switch (TCG_TARGET_CALL_ARG_I32) {
1531             case TCG_CALL_ARG_EVEN:
1532                 layout_arg_even(&cum);
1533                 /* fall through */
1534             case TCG_CALL_ARG_NORMAL:
1535                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1536                 break;
1537             case TCG_CALL_ARG_EXTEND:
1538                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1539                 layout_arg_1(&cum, info, kind);
1540                 break;
1541             default:
1542                 qemu_build_not_reached();
1543             }
1544             break;
1545 
1546         case TCG_TYPE_I64:
1547             switch (TCG_TARGET_CALL_ARG_I64) {
1548             case TCG_CALL_ARG_EVEN:
1549                 layout_arg_even(&cum);
1550                 /* fall through */
1551             case TCG_CALL_ARG_NORMAL:
1552                 if (TCG_TARGET_REG_BITS == 32) {
1553                     layout_arg_normal_n(&cum, info, 2);
1554                 } else {
1555                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1556                 }
1557                 break;
1558             default:
1559                 qemu_build_not_reached();
1560             }
1561             break;
1562 
1563         case TCG_TYPE_I128:
1564             switch (TCG_TARGET_CALL_ARG_I128) {
1565             case TCG_CALL_ARG_EVEN:
1566                 layout_arg_even(&cum);
1567                 /* fall through */
1568             case TCG_CALL_ARG_NORMAL:
1569                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1570                 break;
1571             case TCG_CALL_ARG_BY_REF:
1572                 layout_arg_by_ref(&cum, info);
1573                 break;
1574             default:
1575                 qemu_build_not_reached();
1576             }
1577             break;
1578 
1579         default:
1580             g_assert_not_reached();
1581         }
1582     }
1583     info->nr_in = cum.info_in_idx;
1584 
1585     /* Validate that we didn't overrun the input array. */
1586     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1587     /* Validate the backend has enough argument space. */
1588     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1589 
1590     /*
1591      * Relocate the "ref_slot" area to the end of the parameters.
1592      * Minimizing this stack offset helps code size for x86,
1593      * which has a signed 8-bit offset encoding.
1594      */
1595     if (cum.ref_slot != 0) {
1596         int ref_base = 0;
1597 
1598         if (cum.arg_slot > max_reg_slots) {
1599             int align = __alignof(Int128) / sizeof(tcg_target_long);
1600 
1601             ref_base = cum.arg_slot - max_reg_slots;
1602             if (align > 1) {
1603                 ref_base = ROUND_UP(ref_base, align);
1604             }
1605         }
1606         assert(ref_base + cum.ref_slot <= max_stk_slots);
1607         ref_base += max_reg_slots;
1608 
1609         if (ref_base != 0) {
1610             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1611                 TCGCallArgumentLoc *loc = &info->in[i];
1612                 switch (loc->kind) {
1613                 case TCG_CALL_ARG_BY_REF:
1614                 case TCG_CALL_ARG_BY_REF_N:
1615                     loc->ref_slot += ref_base;
1616                     break;
1617                 default:
1618                     break;
1619                 }
1620             }
1621         }
1622     }
1623 }
1624 
1625 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1626 static void process_constraint_sets(void);
1627 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1628                                             TCGReg reg, const char *name);
1629 
1630 static void tcg_context_init(unsigned max_threads)
1631 {
1632     TCGContext *s = &tcg_init_ctx;
1633     int n, i;
1634     TCGTemp *ts;
1635 
1636     memset(s, 0, sizeof(*s));
1637     s->nb_globals = 0;
1638 
1639     init_call_layout(&info_helper_ld32_mmu);
1640     init_call_layout(&info_helper_ld64_mmu);
1641     init_call_layout(&info_helper_ld128_mmu);
1642     init_call_layout(&info_helper_st32_mmu);
1643     init_call_layout(&info_helper_st64_mmu);
1644     init_call_layout(&info_helper_st128_mmu);
1645 
1646     tcg_target_init(s);
1647     process_constraint_sets();
1648 
1649     /* Reverse the order of the saved registers, assuming they're all at
1650        the start of tcg_target_reg_alloc_order.  */
1651     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1652         int r = tcg_target_reg_alloc_order[n];
1653         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1654             break;
1655         }
1656     }
1657     for (i = 0; i < n; ++i) {
1658         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1659     }
1660     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1661         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1662     }
1663 
1664     tcg_ctx = s;
1665     /*
1666      * In user-mode we simply share the init context among threads, since we
1667      * use a single region. See the documentation tcg_region_init() for the
1668      * reasoning behind this.
1669      * In system-mode we will have at most max_threads TCG threads.
1670      */
1671 #ifdef CONFIG_USER_ONLY
1672     tcg_ctxs = &tcg_ctx;
1673     tcg_cur_ctxs = 1;
1674     tcg_max_ctxs = 1;
1675 #else
1676     tcg_max_ctxs = max_threads;
1677     tcg_ctxs = g_new0(TCGContext *, max_threads);
1678 #endif
1679 
1680     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1681     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1682     tcg_env = temp_tcgv_ptr(ts);
1683 }
1684 
1685 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1686 {
1687     tcg_context_init(max_threads);
1688     tcg_region_init(tb_size, splitwx, max_threads);
1689 }
1690 
1691 /*
1692  * Allocate TBs right before their corresponding translated code, making
1693  * sure that TBs and code are on different cache lines.
1694  */
1695 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1696 {
1697     uintptr_t align = qemu_icache_linesize;
1698     TranslationBlock *tb;
1699     void *next;
1700 
1701  retry:
1702     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1703     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1704 
1705     if (unlikely(next > s->code_gen_highwater)) {
1706         if (tcg_region_alloc(s)) {
1707             return NULL;
1708         }
1709         goto retry;
1710     }
1711     qatomic_set(&s->code_gen_ptr, next);
1712     return tb;
1713 }
1714 
1715 void tcg_prologue_init(void)
1716 {
1717     TCGContext *s = tcg_ctx;
1718     size_t prologue_size;
1719 
1720     s->code_ptr = s->code_gen_ptr;
1721     s->code_buf = s->code_gen_ptr;
1722     s->data_gen_ptr = NULL;
1723 
1724 #ifndef CONFIG_TCG_INTERPRETER
1725     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1726 #endif
1727 
1728     s->pool_labels = NULL;
1729 
1730     qemu_thread_jit_write();
1731     /* Generate the prologue.  */
1732     tcg_target_qemu_prologue(s);
1733 
1734     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1735     {
1736         int result = tcg_out_pool_finalize(s);
1737         tcg_debug_assert(result == 0);
1738     }
1739 
1740     prologue_size = tcg_current_code_size(s);
1741     perf_report_prologue(s->code_gen_ptr, prologue_size);
1742 
1743 #ifndef CONFIG_TCG_INTERPRETER
1744     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1745                         (uintptr_t)s->code_buf, prologue_size);
1746 #endif
1747 
1748     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1749         FILE *logfile = qemu_log_trylock();
1750         if (logfile) {
1751             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1752             if (s->data_gen_ptr) {
1753                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1754                 size_t data_size = prologue_size - code_size;
1755                 size_t i;
1756 
1757                 disas(logfile, s->code_gen_ptr, code_size);
1758 
1759                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1760                     if (sizeof(tcg_target_ulong) == 8) {
1761                         fprintf(logfile,
1762                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1763                                 (uintptr_t)s->data_gen_ptr + i,
1764                                 *(uint64_t *)(s->data_gen_ptr + i));
1765                     } else {
1766                         fprintf(logfile,
1767                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1768                                 (uintptr_t)s->data_gen_ptr + i,
1769                                 *(uint32_t *)(s->data_gen_ptr + i));
1770                     }
1771                 }
1772             } else {
1773                 disas(logfile, s->code_gen_ptr, prologue_size);
1774             }
1775             fprintf(logfile, "\n");
1776             qemu_log_unlock(logfile);
1777         }
1778     }
1779 
1780 #ifndef CONFIG_TCG_INTERPRETER
1781     /*
1782      * Assert that goto_ptr is implemented completely, setting an epilogue.
1783      * For tci, we use NULL as the signal to return from the interpreter,
1784      * so skip this check.
1785      */
1786     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1787 #endif
1788 
1789     tcg_region_prologue_set(s);
1790 }
1791 
1792 void tcg_func_start(TCGContext *s)
1793 {
1794     tcg_pool_reset(s);
1795     s->nb_temps = s->nb_globals;
1796 
1797     /* No temps have been previously allocated for size or locality.  */
1798     tcg_temp_ebb_reset_freed(s);
1799 
1800     /* No constant temps have been previously allocated. */
1801     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1802         if (s->const_table[i]) {
1803             g_hash_table_remove_all(s->const_table[i]);
1804         }
1805     }
1806 
1807     s->nb_ops = 0;
1808     s->nb_labels = 0;
1809     s->current_frame_offset = s->frame_start;
1810 
1811 #ifdef CONFIG_DEBUG_TCG
1812     s->goto_tb_issue_mask = 0;
1813 #endif
1814 
1815     QTAILQ_INIT(&s->ops);
1816     QTAILQ_INIT(&s->free_ops);
1817     s->emit_before_op = NULL;
1818     QSIMPLEQ_INIT(&s->labels);
1819 
1820     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1821     tcg_debug_assert(s->insn_start_words > 0);
1822 }
1823 
1824 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1825 {
1826     int n = s->nb_temps++;
1827 
1828     if (n >= TCG_MAX_TEMPS) {
1829         tcg_raise_tb_overflow(s);
1830     }
1831     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1832 }
1833 
1834 static TCGTemp *tcg_global_alloc(TCGContext *s)
1835 {
1836     TCGTemp *ts;
1837 
1838     tcg_debug_assert(s->nb_globals == s->nb_temps);
1839     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1840     s->nb_globals++;
1841     ts = tcg_temp_alloc(s);
1842     ts->kind = TEMP_GLOBAL;
1843 
1844     return ts;
1845 }
1846 
1847 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1848                                             TCGReg reg, const char *name)
1849 {
1850     TCGTemp *ts;
1851 
1852     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1853 
1854     ts = tcg_global_alloc(s);
1855     ts->base_type = type;
1856     ts->type = type;
1857     ts->kind = TEMP_FIXED;
1858     ts->reg = reg;
1859     ts->name = name;
1860     tcg_regset_set_reg(s->reserved_regs, reg);
1861 
1862     return ts;
1863 }
1864 
1865 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1866 {
1867     s->frame_start = start;
1868     s->frame_end = start + size;
1869     s->frame_temp
1870         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1871 }
1872 
1873 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1874                                             const char *name, TCGType type)
1875 {
1876     TCGContext *s = tcg_ctx;
1877     TCGTemp *base_ts = tcgv_ptr_temp(base);
1878     TCGTemp *ts = tcg_global_alloc(s);
1879     int indirect_reg = 0;
1880 
1881     switch (base_ts->kind) {
1882     case TEMP_FIXED:
1883         break;
1884     case TEMP_GLOBAL:
1885         /* We do not support double-indirect registers.  */
1886         tcg_debug_assert(!base_ts->indirect_reg);
1887         base_ts->indirect_base = 1;
1888         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1889                             ? 2 : 1);
1890         indirect_reg = 1;
1891         break;
1892     default:
1893         g_assert_not_reached();
1894     }
1895 
1896     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1897         TCGTemp *ts2 = tcg_global_alloc(s);
1898         char buf[64];
1899 
1900         ts->base_type = TCG_TYPE_I64;
1901         ts->type = TCG_TYPE_I32;
1902         ts->indirect_reg = indirect_reg;
1903         ts->mem_allocated = 1;
1904         ts->mem_base = base_ts;
1905         ts->mem_offset = offset;
1906         pstrcpy(buf, sizeof(buf), name);
1907         pstrcat(buf, sizeof(buf), "_0");
1908         ts->name = strdup(buf);
1909 
1910         tcg_debug_assert(ts2 == ts + 1);
1911         ts2->base_type = TCG_TYPE_I64;
1912         ts2->type = TCG_TYPE_I32;
1913         ts2->indirect_reg = indirect_reg;
1914         ts2->mem_allocated = 1;
1915         ts2->mem_base = base_ts;
1916         ts2->mem_offset = offset + 4;
1917         ts2->temp_subindex = 1;
1918         pstrcpy(buf, sizeof(buf), name);
1919         pstrcat(buf, sizeof(buf), "_1");
1920         ts2->name = strdup(buf);
1921     } else {
1922         ts->base_type = type;
1923         ts->type = type;
1924         ts->indirect_reg = indirect_reg;
1925         ts->mem_allocated = 1;
1926         ts->mem_base = base_ts;
1927         ts->mem_offset = offset;
1928         ts->name = name;
1929     }
1930     return ts;
1931 }
1932 
1933 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1934 {
1935     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1936     return temp_tcgv_i32(ts);
1937 }
1938 
1939 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1940 {
1941     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1942     return temp_tcgv_i64(ts);
1943 }
1944 
1945 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1946 {
1947     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1948     return temp_tcgv_ptr(ts);
1949 }
1950 
1951 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1952 {
1953     TCGContext *s = tcg_ctx;
1954     TCGTemp *ts;
1955     int n;
1956 
1957     if (kind == TEMP_EBB) {
1958         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1959 
1960         if (idx < TCG_MAX_TEMPS) {
1961             /* There is already an available temp with the right type.  */
1962             clear_bit(idx, s->free_temps[type].l);
1963 
1964             ts = &s->temps[idx];
1965             ts->temp_allocated = 1;
1966             tcg_debug_assert(ts->base_type == type);
1967             tcg_debug_assert(ts->kind == kind);
1968             return ts;
1969         }
1970     } else {
1971         tcg_debug_assert(kind == TEMP_TB);
1972     }
1973 
1974     switch (type) {
1975     case TCG_TYPE_I32:
1976     case TCG_TYPE_V64:
1977     case TCG_TYPE_V128:
1978     case TCG_TYPE_V256:
1979         n = 1;
1980         break;
1981     case TCG_TYPE_I64:
1982         n = 64 / TCG_TARGET_REG_BITS;
1983         break;
1984     case TCG_TYPE_I128:
1985         n = 128 / TCG_TARGET_REG_BITS;
1986         break;
1987     default:
1988         g_assert_not_reached();
1989     }
1990 
1991     ts = tcg_temp_alloc(s);
1992     ts->base_type = type;
1993     ts->temp_allocated = 1;
1994     ts->kind = kind;
1995 
1996     if (n == 1) {
1997         ts->type = type;
1998     } else {
1999         ts->type = TCG_TYPE_REG;
2000 
2001         for (int i = 1; i < n; ++i) {
2002             TCGTemp *ts2 = tcg_temp_alloc(s);
2003 
2004             tcg_debug_assert(ts2 == ts + i);
2005             ts2->base_type = type;
2006             ts2->type = TCG_TYPE_REG;
2007             ts2->temp_allocated = 1;
2008             ts2->temp_subindex = i;
2009             ts2->kind = kind;
2010         }
2011     }
2012     return ts;
2013 }
2014 
2015 TCGv_i32 tcg_temp_new_i32(void)
2016 {
2017     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
2018 }
2019 
2020 TCGv_i32 tcg_temp_ebb_new_i32(void)
2021 {
2022     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
2023 }
2024 
2025 TCGv_i64 tcg_temp_new_i64(void)
2026 {
2027     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
2028 }
2029 
2030 TCGv_i64 tcg_temp_ebb_new_i64(void)
2031 {
2032     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
2033 }
2034 
2035 TCGv_ptr tcg_temp_new_ptr(void)
2036 {
2037     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
2038 }
2039 
2040 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2041 {
2042     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2043 }
2044 
2045 TCGv_i128 tcg_temp_new_i128(void)
2046 {
2047     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2048 }
2049 
2050 TCGv_i128 tcg_temp_ebb_new_i128(void)
2051 {
2052     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2053 }
2054 
2055 TCGv_vec tcg_temp_new_vec(TCGType type)
2056 {
2057     TCGTemp *t;
2058 
2059 #ifdef CONFIG_DEBUG_TCG
2060     switch (type) {
2061     case TCG_TYPE_V64:
2062         assert(TCG_TARGET_HAS_v64);
2063         break;
2064     case TCG_TYPE_V128:
2065         assert(TCG_TARGET_HAS_v128);
2066         break;
2067     case TCG_TYPE_V256:
2068         assert(TCG_TARGET_HAS_v256);
2069         break;
2070     default:
2071         g_assert_not_reached();
2072     }
2073 #endif
2074 
2075     t = tcg_temp_new_internal(type, TEMP_EBB);
2076     return temp_tcgv_vec(t);
2077 }
2078 
2079 /* Create a new temp of the same type as an existing temp.  */
2080 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2081 {
2082     TCGTemp *t = tcgv_vec_temp(match);
2083 
2084     tcg_debug_assert(t->temp_allocated != 0);
2085 
2086     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2087     return temp_tcgv_vec(t);
2088 }
2089 
2090 void tcg_temp_free_internal(TCGTemp *ts)
2091 {
2092     TCGContext *s = tcg_ctx;
2093 
2094     switch (ts->kind) {
2095     case TEMP_CONST:
2096     case TEMP_TB:
2097         /* Silently ignore free. */
2098         break;
2099     case TEMP_EBB:
2100         tcg_debug_assert(ts->temp_allocated != 0);
2101         ts->temp_allocated = 0;
2102         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2103         break;
2104     default:
2105         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2106         g_assert_not_reached();
2107     }
2108 }
2109 
2110 void tcg_temp_free_i32(TCGv_i32 arg)
2111 {
2112     tcg_temp_free_internal(tcgv_i32_temp(arg));
2113 }
2114 
2115 void tcg_temp_free_i64(TCGv_i64 arg)
2116 {
2117     tcg_temp_free_internal(tcgv_i64_temp(arg));
2118 }
2119 
2120 void tcg_temp_free_i128(TCGv_i128 arg)
2121 {
2122     tcg_temp_free_internal(tcgv_i128_temp(arg));
2123 }
2124 
2125 void tcg_temp_free_ptr(TCGv_ptr arg)
2126 {
2127     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2128 }
2129 
2130 void tcg_temp_free_vec(TCGv_vec arg)
2131 {
2132     tcg_temp_free_internal(tcgv_vec_temp(arg));
2133 }
2134 
2135 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2136 {
2137     TCGContext *s = tcg_ctx;
2138     GHashTable *h = s->const_table[type];
2139     TCGTemp *ts;
2140 
2141     if (h == NULL) {
2142         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2143         s->const_table[type] = h;
2144     }
2145 
2146     ts = g_hash_table_lookup(h, &val);
2147     if (ts == NULL) {
2148         int64_t *val_ptr;
2149 
2150         ts = tcg_temp_alloc(s);
2151 
2152         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2153             TCGTemp *ts2 = tcg_temp_alloc(s);
2154 
2155             tcg_debug_assert(ts2 == ts + 1);
2156 
2157             ts->base_type = TCG_TYPE_I64;
2158             ts->type = TCG_TYPE_I32;
2159             ts->kind = TEMP_CONST;
2160             ts->temp_allocated = 1;
2161 
2162             ts2->base_type = TCG_TYPE_I64;
2163             ts2->type = TCG_TYPE_I32;
2164             ts2->kind = TEMP_CONST;
2165             ts2->temp_allocated = 1;
2166             ts2->temp_subindex = 1;
2167 
2168             /*
2169              * Retain the full value of the 64-bit constant in the low
2170              * part, so that the hash table works.  Actual uses will
2171              * truncate the value to the low part.
2172              */
2173             ts[HOST_BIG_ENDIAN].val = val;
2174             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2175             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2176         } else {
2177             ts->base_type = type;
2178             ts->type = type;
2179             ts->kind = TEMP_CONST;
2180             ts->temp_allocated = 1;
2181             ts->val = val;
2182             val_ptr = &ts->val;
2183         }
2184         g_hash_table_insert(h, val_ptr, ts);
2185     }
2186 
2187     return ts;
2188 }
2189 
2190 TCGv_i32 tcg_constant_i32(int32_t val)
2191 {
2192     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2193 }
2194 
2195 TCGv_i64 tcg_constant_i64(int64_t val)
2196 {
2197     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2198 }
2199 
2200 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2201 {
2202     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2203 }
2204 
2205 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2206 {
2207     val = dup_const(vece, val);
2208     return temp_tcgv_vec(tcg_constant_internal(type, val));
2209 }
2210 
2211 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2212 {
2213     TCGTemp *t = tcgv_vec_temp(match);
2214 
2215     tcg_debug_assert(t->temp_allocated != 0);
2216     return tcg_constant_vec(t->base_type, vece, val);
2217 }
2218 
2219 #ifdef CONFIG_DEBUG_TCG
2220 size_t temp_idx(TCGTemp *ts)
2221 {
2222     ptrdiff_t n = ts - tcg_ctx->temps;
2223     assert(n >= 0 && n < tcg_ctx->nb_temps);
2224     return n;
2225 }
2226 
2227 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2228 {
2229     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2230 
2231     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2232     assert(o % sizeof(TCGTemp) == 0);
2233 
2234     return (void *)tcg_ctx + (uintptr_t)v;
2235 }
2236 #endif /* CONFIG_DEBUG_TCG */
2237 
2238 /*
2239  * Return true if OP may appear in the opcode stream with TYPE.
2240  * Test the runtime variable that controls each opcode.
2241  */
2242 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2243 {
2244     bool has_type;
2245 
2246     switch (type) {
2247     case TCG_TYPE_I32:
2248         has_type = true;
2249         break;
2250     case TCG_TYPE_I64:
2251         has_type = TCG_TARGET_REG_BITS == 64;
2252         break;
2253     case TCG_TYPE_V64:
2254         has_type = TCG_TARGET_HAS_v64;
2255         break;
2256     case TCG_TYPE_V128:
2257         has_type = TCG_TARGET_HAS_v128;
2258         break;
2259     case TCG_TYPE_V256:
2260         has_type = TCG_TARGET_HAS_v256;
2261         break;
2262     default:
2263         has_type = false;
2264         break;
2265     }
2266 
2267     switch (op) {
2268     case INDEX_op_discard:
2269     case INDEX_op_set_label:
2270     case INDEX_op_call:
2271     case INDEX_op_br:
2272     case INDEX_op_mb:
2273     case INDEX_op_insn_start:
2274     case INDEX_op_exit_tb:
2275     case INDEX_op_goto_tb:
2276     case INDEX_op_goto_ptr:
2277     case INDEX_op_qemu_ld_i32:
2278     case INDEX_op_qemu_st_i32:
2279     case INDEX_op_qemu_ld_i64:
2280     case INDEX_op_qemu_st_i64:
2281         return true;
2282 
2283     case INDEX_op_qemu_st8_i32:
2284         return TCG_TARGET_HAS_qemu_st8_i32;
2285 
2286     case INDEX_op_qemu_ld_i128:
2287     case INDEX_op_qemu_st_i128:
2288         return TCG_TARGET_HAS_qemu_ldst_i128;
2289 
2290     case INDEX_op_add:
2291     case INDEX_op_and:
2292     case INDEX_op_brcond:
2293     case INDEX_op_mov:
2294     case INDEX_op_movcond:
2295     case INDEX_op_negsetcond:
2296     case INDEX_op_or:
2297     case INDEX_op_setcond:
2298     case INDEX_op_xor:
2299         return has_type;
2300 
2301     case INDEX_op_ld8u_i32:
2302     case INDEX_op_ld8s_i32:
2303     case INDEX_op_ld16u_i32:
2304     case INDEX_op_ld16s_i32:
2305     case INDEX_op_ld_i32:
2306     case INDEX_op_st8_i32:
2307     case INDEX_op_st16_i32:
2308     case INDEX_op_st_i32:
2309     case INDEX_op_extract_i32:
2310     case INDEX_op_sextract_i32:
2311     case INDEX_op_deposit_i32:
2312         return true;
2313 
2314     case INDEX_op_extract2_i32:
2315         return TCG_TARGET_HAS_extract2_i32;
2316     case INDEX_op_add2_i32:
2317         return TCG_TARGET_HAS_add2_i32;
2318     case INDEX_op_sub2_i32:
2319         return TCG_TARGET_HAS_sub2_i32;
2320     case INDEX_op_bswap16_i32:
2321         return TCG_TARGET_HAS_bswap16_i32;
2322     case INDEX_op_bswap32_i32:
2323         return TCG_TARGET_HAS_bswap32_i32;
2324 
2325     case INDEX_op_brcond2_i32:
2326     case INDEX_op_setcond2_i32:
2327         return TCG_TARGET_REG_BITS == 32;
2328 
2329     case INDEX_op_ld8u_i64:
2330     case INDEX_op_ld8s_i64:
2331     case INDEX_op_ld16u_i64:
2332     case INDEX_op_ld16s_i64:
2333     case INDEX_op_ld32u_i64:
2334     case INDEX_op_ld32s_i64:
2335     case INDEX_op_ld_i64:
2336     case INDEX_op_st8_i64:
2337     case INDEX_op_st16_i64:
2338     case INDEX_op_st32_i64:
2339     case INDEX_op_st_i64:
2340     case INDEX_op_ext_i32_i64:
2341     case INDEX_op_extu_i32_i64:
2342     case INDEX_op_extract_i64:
2343     case INDEX_op_sextract_i64:
2344     case INDEX_op_deposit_i64:
2345         return TCG_TARGET_REG_BITS == 64;
2346 
2347     case INDEX_op_extract2_i64:
2348         return TCG_TARGET_HAS_extract2_i64;
2349     case INDEX_op_extrl_i64_i32:
2350     case INDEX_op_extrh_i64_i32:
2351         return TCG_TARGET_HAS_extr_i64_i32;
2352     case INDEX_op_bswap16_i64:
2353         return TCG_TARGET_HAS_bswap16_i64;
2354     case INDEX_op_bswap32_i64:
2355         return TCG_TARGET_HAS_bswap32_i64;
2356     case INDEX_op_bswap64_i64:
2357         return TCG_TARGET_HAS_bswap64_i64;
2358     case INDEX_op_add2_i64:
2359         return TCG_TARGET_HAS_add2_i64;
2360     case INDEX_op_sub2_i64:
2361         return TCG_TARGET_HAS_sub2_i64;
2362 
2363     case INDEX_op_mov_vec:
2364     case INDEX_op_dup_vec:
2365     case INDEX_op_dupm_vec:
2366     case INDEX_op_ld_vec:
2367     case INDEX_op_st_vec:
2368     case INDEX_op_add_vec:
2369     case INDEX_op_sub_vec:
2370     case INDEX_op_and_vec:
2371     case INDEX_op_or_vec:
2372     case INDEX_op_xor_vec:
2373     case INDEX_op_cmp_vec:
2374         return has_type;
2375     case INDEX_op_dup2_vec:
2376         return has_type && TCG_TARGET_REG_BITS == 32;
2377     case INDEX_op_not_vec:
2378         return has_type && TCG_TARGET_HAS_not_vec;
2379     case INDEX_op_neg_vec:
2380         return has_type && TCG_TARGET_HAS_neg_vec;
2381     case INDEX_op_abs_vec:
2382         return has_type && TCG_TARGET_HAS_abs_vec;
2383     case INDEX_op_andc_vec:
2384         return has_type && TCG_TARGET_HAS_andc_vec;
2385     case INDEX_op_orc_vec:
2386         return has_type && TCG_TARGET_HAS_orc_vec;
2387     case INDEX_op_nand_vec:
2388         return has_type && TCG_TARGET_HAS_nand_vec;
2389     case INDEX_op_nor_vec:
2390         return has_type && TCG_TARGET_HAS_nor_vec;
2391     case INDEX_op_eqv_vec:
2392         return has_type && TCG_TARGET_HAS_eqv_vec;
2393     case INDEX_op_mul_vec:
2394         return has_type && TCG_TARGET_HAS_mul_vec;
2395     case INDEX_op_shli_vec:
2396     case INDEX_op_shri_vec:
2397     case INDEX_op_sari_vec:
2398         return has_type && TCG_TARGET_HAS_shi_vec;
2399     case INDEX_op_shls_vec:
2400     case INDEX_op_shrs_vec:
2401     case INDEX_op_sars_vec:
2402         return has_type && TCG_TARGET_HAS_shs_vec;
2403     case INDEX_op_shlv_vec:
2404     case INDEX_op_shrv_vec:
2405     case INDEX_op_sarv_vec:
2406         return has_type && TCG_TARGET_HAS_shv_vec;
2407     case INDEX_op_rotli_vec:
2408         return has_type && TCG_TARGET_HAS_roti_vec;
2409     case INDEX_op_rotls_vec:
2410         return has_type && TCG_TARGET_HAS_rots_vec;
2411     case INDEX_op_rotlv_vec:
2412     case INDEX_op_rotrv_vec:
2413         return has_type && TCG_TARGET_HAS_rotv_vec;
2414     case INDEX_op_ssadd_vec:
2415     case INDEX_op_usadd_vec:
2416     case INDEX_op_sssub_vec:
2417     case INDEX_op_ussub_vec:
2418         return has_type && TCG_TARGET_HAS_sat_vec;
2419     case INDEX_op_smin_vec:
2420     case INDEX_op_umin_vec:
2421     case INDEX_op_smax_vec:
2422     case INDEX_op_umax_vec:
2423         return has_type && TCG_TARGET_HAS_minmax_vec;
2424     case INDEX_op_bitsel_vec:
2425         return has_type && TCG_TARGET_HAS_bitsel_vec;
2426     case INDEX_op_cmpsel_vec:
2427         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2428 
2429     default:
2430         if (op < INDEX_op_last_generic) {
2431             const TCGOutOp *outop;
2432             TCGConstraintSetIndex con_set;
2433 
2434             if (!has_type) {
2435                 return false;
2436             }
2437 
2438             outop = all_outop[op];
2439             tcg_debug_assert(outop != NULL);
2440 
2441             con_set = outop->static_constraint;
2442             if (con_set == C_Dynamic) {
2443                 con_set = outop->dynamic_constraint(type, flags);
2444             }
2445             if (con_set >= 0) {
2446                 return true;
2447             }
2448             tcg_debug_assert(con_set == C_NotImplemented);
2449             return false;
2450         }
2451         tcg_debug_assert(op < NB_OPS);
2452         return true;
2453 
2454     case INDEX_op_last_generic:
2455         g_assert_not_reached();
2456     }
2457 }
2458 
2459 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2460 {
2461     unsigned width;
2462 
2463     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2464     width = (type == TCG_TYPE_I32 ? 32 : 64);
2465 
2466     tcg_debug_assert(ofs < width);
2467     tcg_debug_assert(len > 0);
2468     tcg_debug_assert(len <= width - ofs);
2469 
2470     return TCG_TARGET_deposit_valid(type, ofs, len);
2471 }
2472 
2473 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2474 
2475 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2476                           TCGTemp *ret, TCGTemp **args)
2477 {
2478     TCGv_i64 extend_free[MAX_CALL_IARGS];
2479     int n_extend = 0;
2480     TCGOp *op;
2481     int i, n, pi = 0, total_args;
2482 
2483     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2484         init_call_layout(info);
2485         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2486     }
2487 
2488     total_args = info->nr_out + info->nr_in + 2;
2489     op = tcg_op_alloc(INDEX_op_call, total_args);
2490 
2491 #ifdef CONFIG_PLUGIN
2492     /* Flag helpers that may affect guest state */
2493     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2494         tcg_ctx->plugin_insn->calls_helpers = true;
2495     }
2496 #endif
2497 
2498     TCGOP_CALLO(op) = n = info->nr_out;
2499     switch (n) {
2500     case 0:
2501         tcg_debug_assert(ret == NULL);
2502         break;
2503     case 1:
2504         tcg_debug_assert(ret != NULL);
2505         op->args[pi++] = temp_arg(ret);
2506         break;
2507     case 2:
2508     case 4:
2509         tcg_debug_assert(ret != NULL);
2510         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2511         tcg_debug_assert(ret->temp_subindex == 0);
2512         for (i = 0; i < n; ++i) {
2513             op->args[pi++] = temp_arg(ret + i);
2514         }
2515         break;
2516     default:
2517         g_assert_not_reached();
2518     }
2519 
2520     TCGOP_CALLI(op) = n = info->nr_in;
2521     for (i = 0; i < n; i++) {
2522         const TCGCallArgumentLoc *loc = &info->in[i];
2523         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2524 
2525         switch (loc->kind) {
2526         case TCG_CALL_ARG_NORMAL:
2527         case TCG_CALL_ARG_BY_REF:
2528         case TCG_CALL_ARG_BY_REF_N:
2529             op->args[pi++] = temp_arg(ts);
2530             break;
2531 
2532         case TCG_CALL_ARG_EXTEND_U:
2533         case TCG_CALL_ARG_EXTEND_S:
2534             {
2535                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2536                 TCGv_i32 orig = temp_tcgv_i32(ts);
2537 
2538                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2539                     tcg_gen_ext_i32_i64(temp, orig);
2540                 } else {
2541                     tcg_gen_extu_i32_i64(temp, orig);
2542                 }
2543                 op->args[pi++] = tcgv_i64_arg(temp);
2544                 extend_free[n_extend++] = temp;
2545             }
2546             break;
2547 
2548         default:
2549             g_assert_not_reached();
2550         }
2551     }
2552     op->args[pi++] = (uintptr_t)func;
2553     op->args[pi++] = (uintptr_t)info;
2554     tcg_debug_assert(pi == total_args);
2555 
2556     if (tcg_ctx->emit_before_op) {
2557         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2558     } else {
2559         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2560     }
2561 
2562     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2563     for (i = 0; i < n_extend; ++i) {
2564         tcg_temp_free_i64(extend_free[i]);
2565     }
2566 }
2567 
2568 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2569 {
2570     tcg_gen_callN(func, info, ret, NULL);
2571 }
2572 
2573 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2574 {
2575     tcg_gen_callN(func, info, ret, &t1);
2576 }
2577 
2578 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2579                    TCGTemp *t1, TCGTemp *t2)
2580 {
2581     TCGTemp *args[2] = { t1, t2 };
2582     tcg_gen_callN(func, info, ret, args);
2583 }
2584 
2585 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2586                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2587 {
2588     TCGTemp *args[3] = { t1, t2, t3 };
2589     tcg_gen_callN(func, info, ret, args);
2590 }
2591 
2592 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2593                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2594 {
2595     TCGTemp *args[4] = { t1, t2, t3, t4 };
2596     tcg_gen_callN(func, info, ret, args);
2597 }
2598 
2599 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2600                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2601 {
2602     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2603     tcg_gen_callN(func, info, ret, args);
2604 }
2605 
2606 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2607                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2608                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2609 {
2610     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2611     tcg_gen_callN(func, info, ret, args);
2612 }
2613 
2614 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2615                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2616                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2617 {
2618     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2619     tcg_gen_callN(func, info, ret, args);
2620 }
2621 
2622 static void tcg_reg_alloc_start(TCGContext *s)
2623 {
2624     int i, n;
2625 
2626     for (i = 0, n = s->nb_temps; i < n; i++) {
2627         TCGTemp *ts = &s->temps[i];
2628         TCGTempVal val = TEMP_VAL_MEM;
2629 
2630         switch (ts->kind) {
2631         case TEMP_CONST:
2632             val = TEMP_VAL_CONST;
2633             break;
2634         case TEMP_FIXED:
2635             val = TEMP_VAL_REG;
2636             break;
2637         case TEMP_GLOBAL:
2638             break;
2639         case TEMP_EBB:
2640             val = TEMP_VAL_DEAD;
2641             /* fall through */
2642         case TEMP_TB:
2643             ts->mem_allocated = 0;
2644             break;
2645         default:
2646             g_assert_not_reached();
2647         }
2648         ts->val_type = val;
2649     }
2650 
2651     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2652 }
2653 
2654 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2655                                  TCGTemp *ts)
2656 {
2657     int idx = temp_idx(ts);
2658 
2659     switch (ts->kind) {
2660     case TEMP_FIXED:
2661     case TEMP_GLOBAL:
2662         pstrcpy(buf, buf_size, ts->name);
2663         break;
2664     case TEMP_TB:
2665         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2666         break;
2667     case TEMP_EBB:
2668         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2669         break;
2670     case TEMP_CONST:
2671         switch (ts->type) {
2672         case TCG_TYPE_I32:
2673             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2674             break;
2675 #if TCG_TARGET_REG_BITS > 32
2676         case TCG_TYPE_I64:
2677             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2678             break;
2679 #endif
2680         case TCG_TYPE_V64:
2681         case TCG_TYPE_V128:
2682         case TCG_TYPE_V256:
2683             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2684                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2685             break;
2686         default:
2687             g_assert_not_reached();
2688         }
2689         break;
2690     }
2691     return buf;
2692 }
2693 
2694 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2695                              int buf_size, TCGArg arg)
2696 {
2697     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2698 }
2699 
2700 static const char * const cond_name[] =
2701 {
2702     [TCG_COND_NEVER] = "never",
2703     [TCG_COND_ALWAYS] = "always",
2704     [TCG_COND_EQ] = "eq",
2705     [TCG_COND_NE] = "ne",
2706     [TCG_COND_LT] = "lt",
2707     [TCG_COND_GE] = "ge",
2708     [TCG_COND_LE] = "le",
2709     [TCG_COND_GT] = "gt",
2710     [TCG_COND_LTU] = "ltu",
2711     [TCG_COND_GEU] = "geu",
2712     [TCG_COND_LEU] = "leu",
2713     [TCG_COND_GTU] = "gtu",
2714     [TCG_COND_TSTEQ] = "tsteq",
2715     [TCG_COND_TSTNE] = "tstne",
2716 };
2717 
2718 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2719 {
2720     [MO_UB]   = "ub",
2721     [MO_SB]   = "sb",
2722     [MO_LEUW] = "leuw",
2723     [MO_LESW] = "lesw",
2724     [MO_LEUL] = "leul",
2725     [MO_LESL] = "lesl",
2726     [MO_LEUQ] = "leq",
2727     [MO_BEUW] = "beuw",
2728     [MO_BESW] = "besw",
2729     [MO_BEUL] = "beul",
2730     [MO_BESL] = "besl",
2731     [MO_BEUQ] = "beq",
2732     [MO_128 + MO_BE] = "beo",
2733     [MO_128 + MO_LE] = "leo",
2734 };
2735 
2736 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2737     [MO_UNALN >> MO_ASHIFT]    = "un+",
2738     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2739     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2740     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2741     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2742     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2743     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2744     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2745 };
2746 
2747 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2748     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2749     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2750     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2751     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2752     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2753     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2754 };
2755 
2756 static const char bswap_flag_name[][6] = {
2757     [TCG_BSWAP_IZ] = "iz",
2758     [TCG_BSWAP_OZ] = "oz",
2759     [TCG_BSWAP_OS] = "os",
2760     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2761     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2762 };
2763 
2764 #ifdef CONFIG_PLUGIN
2765 static const char * const plugin_from_name[] = {
2766     "from-tb",
2767     "from-insn",
2768     "after-insn",
2769     "after-tb",
2770 };
2771 #endif
2772 
2773 static inline bool tcg_regset_single(TCGRegSet d)
2774 {
2775     return (d & (d - 1)) == 0;
2776 }
2777 
2778 static inline TCGReg tcg_regset_first(TCGRegSet d)
2779 {
2780     if (TCG_TARGET_NB_REGS <= 32) {
2781         return ctz32(d);
2782     } else {
2783         return ctz64(d);
2784     }
2785 }
2786 
2787 /* Return only the number of characters output -- no error return. */
2788 #define ne_fprintf(...) \
2789     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2790 
2791 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2792 {
2793     char buf[128];
2794     TCGOp *op;
2795 
2796     QTAILQ_FOREACH(op, &s->ops, link) {
2797         int i, k, nb_oargs, nb_iargs, nb_cargs;
2798         const TCGOpDef *def;
2799         TCGOpcode c;
2800         int col = 0;
2801 
2802         c = op->opc;
2803         def = &tcg_op_defs[c];
2804 
2805         if (c == INDEX_op_insn_start) {
2806             nb_oargs = 0;
2807             col += ne_fprintf(f, "\n ----");
2808 
2809             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2810                 col += ne_fprintf(f, " %016" PRIx64,
2811                                   tcg_get_insn_start_param(op, i));
2812             }
2813         } else if (c == INDEX_op_call) {
2814             const TCGHelperInfo *info = tcg_call_info(op);
2815             void *func = tcg_call_func(op);
2816 
2817             /* variable number of arguments */
2818             nb_oargs = TCGOP_CALLO(op);
2819             nb_iargs = TCGOP_CALLI(op);
2820             nb_cargs = def->nb_cargs;
2821 
2822             col += ne_fprintf(f, " %s ", def->name);
2823 
2824             /*
2825              * Print the function name from TCGHelperInfo, if available.
2826              * Note that plugins have a template function for the info,
2827              * but the actual function pointer comes from the plugin.
2828              */
2829             if (func == info->func) {
2830                 col += ne_fprintf(f, "%s", info->name);
2831             } else {
2832                 col += ne_fprintf(f, "plugin(%p)", func);
2833             }
2834 
2835             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2836             for (i = 0; i < nb_oargs; i++) {
2837                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2838                                                             op->args[i]));
2839             }
2840             for (i = 0; i < nb_iargs; i++) {
2841                 TCGArg arg = op->args[nb_oargs + i];
2842                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2843                 col += ne_fprintf(f, ",%s", t);
2844             }
2845         } else {
2846             if (def->flags & TCG_OPF_INT) {
2847                 col += ne_fprintf(f, " %s_i%d ",
2848                                   def->name,
2849                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2850             } else if (def->flags & TCG_OPF_VECTOR) {
2851                 col += ne_fprintf(f, "%s v%d,e%d,",
2852                                   def->name,
2853                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2854                                   8 << TCGOP_VECE(op));
2855             } else {
2856                 col += ne_fprintf(f, " %s ", def->name);
2857             }
2858 
2859             nb_oargs = def->nb_oargs;
2860             nb_iargs = def->nb_iargs;
2861             nb_cargs = def->nb_cargs;
2862 
2863             k = 0;
2864             for (i = 0; i < nb_oargs; i++) {
2865                 const char *sep =  k ? "," : "";
2866                 col += ne_fprintf(f, "%s%s", sep,
2867                                   tcg_get_arg_str(s, buf, sizeof(buf),
2868                                                   op->args[k++]));
2869             }
2870             for (i = 0; i < nb_iargs; i++) {
2871                 const char *sep =  k ? "," : "";
2872                 col += ne_fprintf(f, "%s%s", sep,
2873                                   tcg_get_arg_str(s, buf, sizeof(buf),
2874                                                   op->args[k++]));
2875             }
2876             switch (c) {
2877             case INDEX_op_brcond:
2878             case INDEX_op_setcond:
2879             case INDEX_op_negsetcond:
2880             case INDEX_op_movcond:
2881             case INDEX_op_brcond2_i32:
2882             case INDEX_op_setcond2_i32:
2883             case INDEX_op_cmp_vec:
2884             case INDEX_op_cmpsel_vec:
2885                 if (op->args[k] < ARRAY_SIZE(cond_name)
2886                     && cond_name[op->args[k]]) {
2887                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2888                 } else {
2889                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2890                 }
2891                 i = 1;
2892                 break;
2893             case INDEX_op_qemu_ld_i32:
2894             case INDEX_op_qemu_st_i32:
2895             case INDEX_op_qemu_st8_i32:
2896             case INDEX_op_qemu_ld_i64:
2897             case INDEX_op_qemu_st_i64:
2898             case INDEX_op_qemu_ld_i128:
2899             case INDEX_op_qemu_st_i128:
2900                 {
2901                     const char *s_al, *s_op, *s_at;
2902                     MemOpIdx oi = op->args[k++];
2903                     MemOp mop = get_memop(oi);
2904                     unsigned ix = get_mmuidx(oi);
2905 
2906                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2907                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2908                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2909                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2910 
2911                     /* If all fields are accounted for, print symbolically. */
2912                     if (!mop && s_al && s_op && s_at) {
2913                         col += ne_fprintf(f, ",%s%s%s,%u",
2914                                           s_at, s_al, s_op, ix);
2915                     } else {
2916                         mop = get_memop(oi);
2917                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2918                     }
2919                     i = 1;
2920                 }
2921                 break;
2922             case INDEX_op_bswap16_i32:
2923             case INDEX_op_bswap16_i64:
2924             case INDEX_op_bswap32_i32:
2925             case INDEX_op_bswap32_i64:
2926             case INDEX_op_bswap64_i64:
2927                 {
2928                     TCGArg flags = op->args[k];
2929                     const char *name = NULL;
2930 
2931                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2932                         name = bswap_flag_name[flags];
2933                     }
2934                     if (name) {
2935                         col += ne_fprintf(f, ",%s", name);
2936                     } else {
2937                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2938                     }
2939                     i = k = 1;
2940                 }
2941                 break;
2942 #ifdef CONFIG_PLUGIN
2943             case INDEX_op_plugin_cb:
2944                 {
2945                     TCGArg from = op->args[k++];
2946                     const char *name = NULL;
2947 
2948                     if (from < ARRAY_SIZE(plugin_from_name)) {
2949                         name = plugin_from_name[from];
2950                     }
2951                     if (name) {
2952                         col += ne_fprintf(f, "%s", name);
2953                     } else {
2954                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2955                     }
2956                     i = 1;
2957                 }
2958                 break;
2959 #endif
2960             default:
2961                 i = 0;
2962                 break;
2963             }
2964             switch (c) {
2965             case INDEX_op_set_label:
2966             case INDEX_op_br:
2967             case INDEX_op_brcond:
2968             case INDEX_op_brcond2_i32:
2969                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2970                                   arg_label(op->args[k])->id);
2971                 i++, k++;
2972                 break;
2973             case INDEX_op_mb:
2974                 {
2975                     TCGBar membar = op->args[k];
2976                     const char *b_op, *m_op;
2977 
2978                     switch (membar & TCG_BAR_SC) {
2979                     case 0:
2980                         b_op = "none";
2981                         break;
2982                     case TCG_BAR_LDAQ:
2983                         b_op = "acq";
2984                         break;
2985                     case TCG_BAR_STRL:
2986                         b_op = "rel";
2987                         break;
2988                     case TCG_BAR_SC:
2989                         b_op = "seq";
2990                         break;
2991                     default:
2992                         g_assert_not_reached();
2993                     }
2994 
2995                     switch (membar & TCG_MO_ALL) {
2996                     case 0:
2997                         m_op = "none";
2998                         break;
2999                     case TCG_MO_LD_LD:
3000                         m_op = "rr";
3001                         break;
3002                     case TCG_MO_LD_ST:
3003                         m_op = "rw";
3004                         break;
3005                     case TCG_MO_ST_LD:
3006                         m_op = "wr";
3007                         break;
3008                     case TCG_MO_ST_ST:
3009                         m_op = "ww";
3010                         break;
3011                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3012                         m_op = "rr+rw";
3013                         break;
3014                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3015                         m_op = "rr+wr";
3016                         break;
3017                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3018                         m_op = "rr+ww";
3019                         break;
3020                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3021                         m_op = "rw+wr";
3022                         break;
3023                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3024                         m_op = "rw+ww";
3025                         break;
3026                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3027                         m_op = "wr+ww";
3028                         break;
3029                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3030                         m_op = "rr+rw+wr";
3031                         break;
3032                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3033                         m_op = "rr+rw+ww";
3034                         break;
3035                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3036                         m_op = "rr+wr+ww";
3037                         break;
3038                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3039                         m_op = "rw+wr+ww";
3040                         break;
3041                     case TCG_MO_ALL:
3042                         m_op = "all";
3043                         break;
3044                     default:
3045                         g_assert_not_reached();
3046                     }
3047 
3048                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3049                     i++, k++;
3050                 }
3051                 break;
3052             default:
3053                 break;
3054             }
3055             for (; i < nb_cargs; i++, k++) {
3056                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3057                                   op->args[k]);
3058             }
3059         }
3060 
3061         if (have_prefs || op->life) {
3062             for (; col < 40; ++col) {
3063                 putc(' ', f);
3064             }
3065         }
3066 
3067         if (op->life) {
3068             unsigned life = op->life;
3069 
3070             if (life & (SYNC_ARG * 3)) {
3071                 ne_fprintf(f, "  sync:");
3072                 for (i = 0; i < 2; ++i) {
3073                     if (life & (SYNC_ARG << i)) {
3074                         ne_fprintf(f, " %d", i);
3075                     }
3076                 }
3077             }
3078             life /= DEAD_ARG;
3079             if (life) {
3080                 ne_fprintf(f, "  dead:");
3081                 for (i = 0; life; ++i, life >>= 1) {
3082                     if (life & 1) {
3083                         ne_fprintf(f, " %d", i);
3084                     }
3085                 }
3086             }
3087         }
3088 
3089         if (have_prefs) {
3090             for (i = 0; i < nb_oargs; ++i) {
3091                 TCGRegSet set = output_pref(op, i);
3092 
3093                 if (i == 0) {
3094                     ne_fprintf(f, "  pref=");
3095                 } else {
3096                     ne_fprintf(f, ",");
3097                 }
3098                 if (set == 0) {
3099                     ne_fprintf(f, "none");
3100                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3101                     ne_fprintf(f, "all");
3102 #ifdef CONFIG_DEBUG_TCG
3103                 } else if (tcg_regset_single(set)) {
3104                     TCGReg reg = tcg_regset_first(set);
3105                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3106 #endif
3107                 } else if (TCG_TARGET_NB_REGS <= 32) {
3108                     ne_fprintf(f, "0x%x", (uint32_t)set);
3109                 } else {
3110                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3111                 }
3112             }
3113         }
3114 
3115         putc('\n', f);
3116     }
3117 }
3118 
3119 /* we give more priority to constraints with less registers */
3120 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3121 {
3122     int n;
3123 
3124     arg_ct += k;
3125     n = ctpop64(arg_ct->regs);
3126 
3127     /*
3128      * Sort constraints of a single register first, which includes output
3129      * aliases (which must exactly match the input already allocated).
3130      */
3131     if (n == 1 || arg_ct->oalias) {
3132         return INT_MAX;
3133     }
3134 
3135     /*
3136      * Sort register pairs next, first then second immediately after.
3137      * Arbitrarily sort multiple pairs by the index of the first reg;
3138      * there shouldn't be many pairs.
3139      */
3140     switch (arg_ct->pair) {
3141     case 1:
3142     case 3:
3143         return (k + 1) * 2;
3144     case 2:
3145         return (arg_ct->pair_index + 1) * 2 - 1;
3146     }
3147 
3148     /* Finally, sort by decreasing register count. */
3149     assert(n > 1);
3150     return -n;
3151 }
3152 
3153 /* sort from highest priority to lowest */
3154 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3155 {
3156     int i, j;
3157 
3158     for (i = 0; i < n; i++) {
3159         a[start + i].sort_index = start + i;
3160     }
3161     if (n <= 1) {
3162         return;
3163     }
3164     for (i = 0; i < n - 1; i++) {
3165         for (j = i + 1; j < n; j++) {
3166             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3167             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3168             if (p1 < p2) {
3169                 int tmp = a[start + i].sort_index;
3170                 a[start + i].sort_index = a[start + j].sort_index;
3171                 a[start + j].sort_index = tmp;
3172             }
3173         }
3174     }
3175 }
3176 
3177 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3178 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3179 
3180 static void process_constraint_sets(void)
3181 {
3182     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3183         const TCGConstraintSet *tdefs = &constraint_sets[c];
3184         TCGArgConstraint *args_ct = all_cts[c];
3185         int nb_oargs = tdefs->nb_oargs;
3186         int nb_iargs = tdefs->nb_iargs;
3187         int nb_args = nb_oargs + nb_iargs;
3188         bool saw_alias_pair = false;
3189 
3190         for (int i = 0; i < nb_args; i++) {
3191             const char *ct_str = tdefs->args_ct_str[i];
3192             bool input_p = i >= nb_oargs;
3193             int o;
3194 
3195             switch (*ct_str) {
3196             case '0' ... '9':
3197                 o = *ct_str - '0';
3198                 tcg_debug_assert(input_p);
3199                 tcg_debug_assert(o < nb_oargs);
3200                 tcg_debug_assert(args_ct[o].regs != 0);
3201                 tcg_debug_assert(!args_ct[o].oalias);
3202                 args_ct[i] = args_ct[o];
3203                 /* The output sets oalias.  */
3204                 args_ct[o].oalias = 1;
3205                 args_ct[o].alias_index = i;
3206                 /* The input sets ialias. */
3207                 args_ct[i].ialias = 1;
3208                 args_ct[i].alias_index = o;
3209                 if (args_ct[i].pair) {
3210                     saw_alias_pair = true;
3211                 }
3212                 tcg_debug_assert(ct_str[1] == '\0');
3213                 continue;
3214 
3215             case '&':
3216                 tcg_debug_assert(!input_p);
3217                 args_ct[i].newreg = true;
3218                 ct_str++;
3219                 break;
3220 
3221             case 'p': /* plus */
3222                 /* Allocate to the register after the previous. */
3223                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3224                 o = i - 1;
3225                 tcg_debug_assert(!args_ct[o].pair);
3226                 tcg_debug_assert(!args_ct[o].ct);
3227                 args_ct[i] = (TCGArgConstraint){
3228                     .pair = 2,
3229                     .pair_index = o,
3230                     .regs = args_ct[o].regs << 1,
3231                     .newreg = args_ct[o].newreg,
3232                 };
3233                 args_ct[o].pair = 1;
3234                 args_ct[o].pair_index = i;
3235                 tcg_debug_assert(ct_str[1] == '\0');
3236                 continue;
3237 
3238             case 'm': /* minus */
3239                 /* Allocate to the register before the previous. */
3240                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3241                 o = i - 1;
3242                 tcg_debug_assert(!args_ct[o].pair);
3243                 tcg_debug_assert(!args_ct[o].ct);
3244                 args_ct[i] = (TCGArgConstraint){
3245                     .pair = 1,
3246                     .pair_index = o,
3247                     .regs = args_ct[o].regs >> 1,
3248                     .newreg = args_ct[o].newreg,
3249                 };
3250                 args_ct[o].pair = 2;
3251                 args_ct[o].pair_index = i;
3252                 tcg_debug_assert(ct_str[1] == '\0');
3253                 continue;
3254             }
3255 
3256             do {
3257                 switch (*ct_str) {
3258                 case 'i':
3259                     args_ct[i].ct |= TCG_CT_CONST;
3260                     break;
3261 #ifdef TCG_REG_ZERO
3262                 case 'z':
3263                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3264                     break;
3265 #endif
3266 
3267                 /* Include all of the target-specific constraints. */
3268 
3269 #undef CONST
3270 #define CONST(CASE, MASK) \
3271     case CASE: args_ct[i].ct |= MASK; break;
3272 #define REGS(CASE, MASK) \
3273     case CASE: args_ct[i].regs |= MASK; break;
3274 
3275 #include "tcg-target-con-str.h"
3276 
3277 #undef REGS
3278 #undef CONST
3279                 default:
3280                 case '0' ... '9':
3281                 case '&':
3282                 case 'p':
3283                 case 'm':
3284                     /* Typo in TCGConstraintSet constraint. */
3285                     g_assert_not_reached();
3286                 }
3287             } while (*++ct_str != '\0');
3288         }
3289 
3290         /*
3291          * Fix up output pairs that are aliased with inputs.
3292          * When we created the alias, we copied pair from the output.
3293          * There are three cases:
3294          *    (1a) Pairs of inputs alias pairs of outputs.
3295          *    (1b) One input aliases the first of a pair of outputs.
3296          *    (2)  One input aliases the second of a pair of outputs.
3297          *
3298          * Case 1a is handled by making sure that the pair_index'es are
3299          * properly updated so that they appear the same as a pair of inputs.
3300          *
3301          * Case 1b is handled by setting the pair_index of the input to
3302          * itself, simply so it doesn't point to an unrelated argument.
3303          * Since we don't encounter the "second" during the input allocation
3304          * phase, nothing happens with the second half of the input pair.
3305          *
3306          * Case 2 is handled by setting the second input to pair=3, the
3307          * first output to pair=3, and the pair_index'es to match.
3308          */
3309         if (saw_alias_pair) {
3310             for (int i = nb_oargs; i < nb_args; i++) {
3311                 int o, o2, i2;
3312 
3313                 /*
3314                  * Since [0-9pm] must be alone in the constraint string,
3315                  * the only way they can both be set is if the pair comes
3316                  * from the output alias.
3317                  */
3318                 if (!args_ct[i].ialias) {
3319                     continue;
3320                 }
3321                 switch (args_ct[i].pair) {
3322                 case 0:
3323                     break;
3324                 case 1:
3325                     o = args_ct[i].alias_index;
3326                     o2 = args_ct[o].pair_index;
3327                     tcg_debug_assert(args_ct[o].pair == 1);
3328                     tcg_debug_assert(args_ct[o2].pair == 2);
3329                     if (args_ct[o2].oalias) {
3330                         /* Case 1a */
3331                         i2 = args_ct[o2].alias_index;
3332                         tcg_debug_assert(args_ct[i2].pair == 2);
3333                         args_ct[i2].pair_index = i;
3334                         args_ct[i].pair_index = i2;
3335                     } else {
3336                         /* Case 1b */
3337                         args_ct[i].pair_index = i;
3338                     }
3339                     break;
3340                 case 2:
3341                     o = args_ct[i].alias_index;
3342                     o2 = args_ct[o].pair_index;
3343                     tcg_debug_assert(args_ct[o].pair == 2);
3344                     tcg_debug_assert(args_ct[o2].pair == 1);
3345                     if (args_ct[o2].oalias) {
3346                         /* Case 1a */
3347                         i2 = args_ct[o2].alias_index;
3348                         tcg_debug_assert(args_ct[i2].pair == 1);
3349                         args_ct[i2].pair_index = i;
3350                         args_ct[i].pair_index = i2;
3351                     } else {
3352                         /* Case 2 */
3353                         args_ct[i].pair = 3;
3354                         args_ct[o2].pair = 3;
3355                         args_ct[i].pair_index = o2;
3356                         args_ct[o2].pair_index = i;
3357                     }
3358                     break;
3359                 default:
3360                     g_assert_not_reached();
3361                 }
3362             }
3363         }
3364 
3365         /* sort the constraints (XXX: this is just an heuristic) */
3366         sort_constraints(args_ct, 0, nb_oargs);
3367         sort_constraints(args_ct, nb_oargs, nb_iargs);
3368     }
3369 }
3370 
3371 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3372 {
3373     TCGOpcode opc = op->opc;
3374     TCGType type = TCGOP_TYPE(op);
3375     unsigned flags = TCGOP_FLAGS(op);
3376     const TCGOpDef *def = &tcg_op_defs[opc];
3377     const TCGOutOp *outop = all_outop[opc];
3378     TCGConstraintSetIndex con_set;
3379 
3380     if (def->flags & TCG_OPF_NOT_PRESENT) {
3381         return empty_cts;
3382     }
3383 
3384     if (outop) {
3385         con_set = outop->static_constraint;
3386         if (con_set == C_Dynamic) {
3387             con_set = outop->dynamic_constraint(type, flags);
3388         }
3389     } else {
3390         con_set = tcg_target_op_def(opc, type, flags);
3391     }
3392     tcg_debug_assert(con_set >= 0);
3393     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3394 
3395     /* The constraint arguments must match TCGOpcode arguments. */
3396     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3397     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3398 
3399     return all_cts[con_set];
3400 }
3401 
3402 static void remove_label_use(TCGOp *op, int idx)
3403 {
3404     TCGLabel *label = arg_label(op->args[idx]);
3405     TCGLabelUse *use;
3406 
3407     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3408         if (use->op == op) {
3409             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3410             return;
3411         }
3412     }
3413     g_assert_not_reached();
3414 }
3415 
3416 void tcg_op_remove(TCGContext *s, TCGOp *op)
3417 {
3418     switch (op->opc) {
3419     case INDEX_op_br:
3420         remove_label_use(op, 0);
3421         break;
3422     case INDEX_op_brcond:
3423         remove_label_use(op, 3);
3424         break;
3425     case INDEX_op_brcond2_i32:
3426         remove_label_use(op, 5);
3427         break;
3428     default:
3429         break;
3430     }
3431 
3432     QTAILQ_REMOVE(&s->ops, op, link);
3433     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3434     s->nb_ops--;
3435 }
3436 
3437 void tcg_remove_ops_after(TCGOp *op)
3438 {
3439     TCGContext *s = tcg_ctx;
3440 
3441     while (true) {
3442         TCGOp *last = tcg_last_op();
3443         if (last == op) {
3444             return;
3445         }
3446         tcg_op_remove(s, last);
3447     }
3448 }
3449 
3450 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3451 {
3452     TCGContext *s = tcg_ctx;
3453     TCGOp *op = NULL;
3454 
3455     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3456         QTAILQ_FOREACH(op, &s->free_ops, link) {
3457             if (nargs <= op->nargs) {
3458                 QTAILQ_REMOVE(&s->free_ops, op, link);
3459                 nargs = op->nargs;
3460                 goto found;
3461             }
3462         }
3463     }
3464 
3465     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3466     nargs = MAX(4, nargs);
3467     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3468 
3469  found:
3470     memset(op, 0, offsetof(TCGOp, link));
3471     op->opc = opc;
3472     op->nargs = nargs;
3473 
3474     /* Check for bitfield overflow. */
3475     tcg_debug_assert(op->nargs == nargs);
3476 
3477     s->nb_ops++;
3478     return op;
3479 }
3480 
3481 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3482 {
3483     TCGOp *op = tcg_op_alloc(opc, nargs);
3484 
3485     if (tcg_ctx->emit_before_op) {
3486         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3487     } else {
3488         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3489     }
3490     return op;
3491 }
3492 
3493 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3494                             TCGOpcode opc, TCGType type, unsigned nargs)
3495 {
3496     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3497 
3498     TCGOP_TYPE(new_op) = type;
3499     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3500     return new_op;
3501 }
3502 
3503 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3504                            TCGOpcode opc, TCGType type, unsigned nargs)
3505 {
3506     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3507 
3508     TCGOP_TYPE(new_op) = type;
3509     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3510     return new_op;
3511 }
3512 
3513 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3514 {
3515     TCGLabelUse *u;
3516 
3517     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3518         TCGOp *op = u->op;
3519         switch (op->opc) {
3520         case INDEX_op_br:
3521             op->args[0] = label_arg(to);
3522             break;
3523         case INDEX_op_brcond:
3524             op->args[3] = label_arg(to);
3525             break;
3526         case INDEX_op_brcond2_i32:
3527             op->args[5] = label_arg(to);
3528             break;
3529         default:
3530             g_assert_not_reached();
3531         }
3532     }
3533 
3534     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3535 }
3536 
3537 /* Reachable analysis : remove unreachable code.  */
3538 static void __attribute__((noinline))
3539 reachable_code_pass(TCGContext *s)
3540 {
3541     TCGOp *op, *op_next, *op_prev;
3542     bool dead = false;
3543 
3544     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3545         bool remove = dead;
3546         TCGLabel *label;
3547 
3548         switch (op->opc) {
3549         case INDEX_op_set_label:
3550             label = arg_label(op->args[0]);
3551 
3552             /*
3553              * Note that the first op in the TB is always a load,
3554              * so there is always something before a label.
3555              */
3556             op_prev = QTAILQ_PREV(op, link);
3557 
3558             /*
3559              * If we find two sequential labels, move all branches to
3560              * reference the second label and remove the first label.
3561              * Do this before branch to next optimization, so that the
3562              * middle label is out of the way.
3563              */
3564             if (op_prev->opc == INDEX_op_set_label) {
3565                 move_label_uses(label, arg_label(op_prev->args[0]));
3566                 tcg_op_remove(s, op_prev);
3567                 op_prev = QTAILQ_PREV(op, link);
3568             }
3569 
3570             /*
3571              * Optimization can fold conditional branches to unconditional.
3572              * If we find a label which is preceded by an unconditional
3573              * branch to next, remove the branch.  We couldn't do this when
3574              * processing the branch because any dead code between the branch
3575              * and label had not yet been removed.
3576              */
3577             if (op_prev->opc == INDEX_op_br &&
3578                 label == arg_label(op_prev->args[0])) {
3579                 tcg_op_remove(s, op_prev);
3580                 /* Fall through means insns become live again.  */
3581                 dead = false;
3582             }
3583 
3584             if (QSIMPLEQ_EMPTY(&label->branches)) {
3585                 /*
3586                  * While there is an occasional backward branch, virtually
3587                  * all branches generated by the translators are forward.
3588                  * Which means that generally we will have already removed
3589                  * all references to the label that will be, and there is
3590                  * little to be gained by iterating.
3591                  */
3592                 remove = true;
3593             } else {
3594                 /* Once we see a label, insns become live again.  */
3595                 dead = false;
3596                 remove = false;
3597             }
3598             break;
3599 
3600         case INDEX_op_br:
3601         case INDEX_op_exit_tb:
3602         case INDEX_op_goto_ptr:
3603             /* Unconditional branches; everything following is dead.  */
3604             dead = true;
3605             break;
3606 
3607         case INDEX_op_call:
3608             /* Notice noreturn helper calls, raising exceptions.  */
3609             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3610                 dead = true;
3611             }
3612             break;
3613 
3614         case INDEX_op_insn_start:
3615             /* Never remove -- we need to keep these for unwind.  */
3616             remove = false;
3617             break;
3618 
3619         default:
3620             break;
3621         }
3622 
3623         if (remove) {
3624             tcg_op_remove(s, op);
3625         }
3626     }
3627 }
3628 
3629 #define TS_DEAD  1
3630 #define TS_MEM   2
3631 
3632 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3633 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3634 
3635 /* For liveness_pass_1, the register preferences for a given temp.  */
3636 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3637 {
3638     return ts->state_ptr;
3639 }
3640 
3641 /* For liveness_pass_1, reset the preferences for a given temp to the
3642  * maximal regset for its type.
3643  */
3644 static inline void la_reset_pref(TCGTemp *ts)
3645 {
3646     *la_temp_pref(ts)
3647         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3648 }
3649 
3650 /* liveness analysis: end of function: all temps are dead, and globals
3651    should be in memory. */
3652 static void la_func_end(TCGContext *s, int ng, int nt)
3653 {
3654     int i;
3655 
3656     for (i = 0; i < ng; ++i) {
3657         s->temps[i].state = TS_DEAD | TS_MEM;
3658         la_reset_pref(&s->temps[i]);
3659     }
3660     for (i = ng; i < nt; ++i) {
3661         s->temps[i].state = TS_DEAD;
3662         la_reset_pref(&s->temps[i]);
3663     }
3664 }
3665 
3666 /* liveness analysis: end of basic block: all temps are dead, globals
3667    and local temps should be in memory. */
3668 static void la_bb_end(TCGContext *s, int ng, int nt)
3669 {
3670     int i;
3671 
3672     for (i = 0; i < nt; ++i) {
3673         TCGTemp *ts = &s->temps[i];
3674         int state;
3675 
3676         switch (ts->kind) {
3677         case TEMP_FIXED:
3678         case TEMP_GLOBAL:
3679         case TEMP_TB:
3680             state = TS_DEAD | TS_MEM;
3681             break;
3682         case TEMP_EBB:
3683         case TEMP_CONST:
3684             state = TS_DEAD;
3685             break;
3686         default:
3687             g_assert_not_reached();
3688         }
3689         ts->state = state;
3690         la_reset_pref(ts);
3691     }
3692 }
3693 
3694 /* liveness analysis: sync globals back to memory.  */
3695 static void la_global_sync(TCGContext *s, int ng)
3696 {
3697     int i;
3698 
3699     for (i = 0; i < ng; ++i) {
3700         int state = s->temps[i].state;
3701         s->temps[i].state = state | TS_MEM;
3702         if (state == TS_DEAD) {
3703             /* If the global was previously dead, reset prefs.  */
3704             la_reset_pref(&s->temps[i]);
3705         }
3706     }
3707 }
3708 
3709 /*
3710  * liveness analysis: conditional branch: all temps are dead unless
3711  * explicitly live-across-conditional-branch, globals and local temps
3712  * should be synced.
3713  */
3714 static void la_bb_sync(TCGContext *s, int ng, int nt)
3715 {
3716     la_global_sync(s, ng);
3717 
3718     for (int i = ng; i < nt; ++i) {
3719         TCGTemp *ts = &s->temps[i];
3720         int state;
3721 
3722         switch (ts->kind) {
3723         case TEMP_TB:
3724             state = ts->state;
3725             ts->state = state | TS_MEM;
3726             if (state != TS_DEAD) {
3727                 continue;
3728             }
3729             break;
3730         case TEMP_EBB:
3731         case TEMP_CONST:
3732             continue;
3733         default:
3734             g_assert_not_reached();
3735         }
3736         la_reset_pref(&s->temps[i]);
3737     }
3738 }
3739 
3740 /* liveness analysis: sync globals back to memory and kill.  */
3741 static void la_global_kill(TCGContext *s, int ng)
3742 {
3743     int i;
3744 
3745     for (i = 0; i < ng; i++) {
3746         s->temps[i].state = TS_DEAD | TS_MEM;
3747         la_reset_pref(&s->temps[i]);
3748     }
3749 }
3750 
3751 /* liveness analysis: note live globals crossing calls.  */
3752 static void la_cross_call(TCGContext *s, int nt)
3753 {
3754     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3755     int i;
3756 
3757     for (i = 0; i < nt; i++) {
3758         TCGTemp *ts = &s->temps[i];
3759         if (!(ts->state & TS_DEAD)) {
3760             TCGRegSet *pset = la_temp_pref(ts);
3761             TCGRegSet set = *pset;
3762 
3763             set &= mask;
3764             /* If the combination is not possible, restart.  */
3765             if (set == 0) {
3766                 set = tcg_target_available_regs[ts->type] & mask;
3767             }
3768             *pset = set;
3769         }
3770     }
3771 }
3772 
3773 /*
3774  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3775  * to TEMP_EBB, if possible.
3776  */
3777 static void __attribute__((noinline))
3778 liveness_pass_0(TCGContext *s)
3779 {
3780     void * const multiple_ebb = (void *)(uintptr_t)-1;
3781     int nb_temps = s->nb_temps;
3782     TCGOp *op, *ebb;
3783 
3784     for (int i = s->nb_globals; i < nb_temps; ++i) {
3785         s->temps[i].state_ptr = NULL;
3786     }
3787 
3788     /*
3789      * Represent each EBB by the op at which it begins.  In the case of
3790      * the first EBB, this is the first op, otherwise it is a label.
3791      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3792      * within a single EBB, else MULTIPLE_EBB.
3793      */
3794     ebb = QTAILQ_FIRST(&s->ops);
3795     QTAILQ_FOREACH(op, &s->ops, link) {
3796         const TCGOpDef *def;
3797         int nb_oargs, nb_iargs;
3798 
3799         switch (op->opc) {
3800         case INDEX_op_set_label:
3801             ebb = op;
3802             continue;
3803         case INDEX_op_discard:
3804             continue;
3805         case INDEX_op_call:
3806             nb_oargs = TCGOP_CALLO(op);
3807             nb_iargs = TCGOP_CALLI(op);
3808             break;
3809         default:
3810             def = &tcg_op_defs[op->opc];
3811             nb_oargs = def->nb_oargs;
3812             nb_iargs = def->nb_iargs;
3813             break;
3814         }
3815 
3816         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3817             TCGTemp *ts = arg_temp(op->args[i]);
3818 
3819             if (ts->kind != TEMP_TB) {
3820                 continue;
3821             }
3822             if (ts->state_ptr == NULL) {
3823                 ts->state_ptr = ebb;
3824             } else if (ts->state_ptr != ebb) {
3825                 ts->state_ptr = multiple_ebb;
3826             }
3827         }
3828     }
3829 
3830     /*
3831      * For TEMP_TB that turned out not to be used beyond one EBB,
3832      * reduce the liveness to TEMP_EBB.
3833      */
3834     for (int i = s->nb_globals; i < nb_temps; ++i) {
3835         TCGTemp *ts = &s->temps[i];
3836         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3837             ts->kind = TEMP_EBB;
3838         }
3839     }
3840 }
3841 
3842 /* Liveness analysis : update the opc_arg_life array to tell if a
3843    given input arguments is dead. Instructions updating dead
3844    temporaries are removed. */
3845 static void __attribute__((noinline))
3846 liveness_pass_1(TCGContext *s)
3847 {
3848     int nb_globals = s->nb_globals;
3849     int nb_temps = s->nb_temps;
3850     TCGOp *op, *op_prev;
3851     TCGRegSet *prefs;
3852     int i;
3853 
3854     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3855     for (i = 0; i < nb_temps; ++i) {
3856         s->temps[i].state_ptr = prefs + i;
3857     }
3858 
3859     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3860     la_func_end(s, nb_globals, nb_temps);
3861 
3862     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3863         int nb_iargs, nb_oargs;
3864         TCGOpcode opc_new, opc_new2;
3865         TCGLifeData arg_life = 0;
3866         TCGTemp *ts;
3867         TCGOpcode opc = op->opc;
3868         const TCGOpDef *def = &tcg_op_defs[opc];
3869         const TCGArgConstraint *args_ct;
3870 
3871         switch (opc) {
3872         case INDEX_op_call:
3873             {
3874                 const TCGHelperInfo *info = tcg_call_info(op);
3875                 int call_flags = tcg_call_flags(op);
3876 
3877                 nb_oargs = TCGOP_CALLO(op);
3878                 nb_iargs = TCGOP_CALLI(op);
3879 
3880                 /* pure functions can be removed if their result is unused */
3881                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3882                     for (i = 0; i < nb_oargs; i++) {
3883                         ts = arg_temp(op->args[i]);
3884                         if (ts->state != TS_DEAD) {
3885                             goto do_not_remove_call;
3886                         }
3887                     }
3888                     goto do_remove;
3889                 }
3890             do_not_remove_call:
3891 
3892                 /* Output args are dead.  */
3893                 for (i = 0; i < nb_oargs; i++) {
3894                     ts = arg_temp(op->args[i]);
3895                     if (ts->state & TS_DEAD) {
3896                         arg_life |= DEAD_ARG << i;
3897                     }
3898                     if (ts->state & TS_MEM) {
3899                         arg_life |= SYNC_ARG << i;
3900                     }
3901                     ts->state = TS_DEAD;
3902                     la_reset_pref(ts);
3903                 }
3904 
3905                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3906                 memset(op->output_pref, 0, sizeof(op->output_pref));
3907 
3908                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3909                                     TCG_CALL_NO_READ_GLOBALS))) {
3910                     la_global_kill(s, nb_globals);
3911                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3912                     la_global_sync(s, nb_globals);
3913                 }
3914 
3915                 /* Record arguments that die in this helper.  */
3916                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3917                     ts = arg_temp(op->args[i]);
3918                     if (ts->state & TS_DEAD) {
3919                         arg_life |= DEAD_ARG << i;
3920                     }
3921                 }
3922 
3923                 /* For all live registers, remove call-clobbered prefs.  */
3924                 la_cross_call(s, nb_temps);
3925 
3926                 /*
3927                  * Input arguments are live for preceding opcodes.
3928                  *
3929                  * For those arguments that die, and will be allocated in
3930                  * registers, clear the register set for that arg, to be
3931                  * filled in below.  For args that will be on the stack,
3932                  * reset to any available reg.  Process arguments in reverse
3933                  * order so that if a temp is used more than once, the stack
3934                  * reset to max happens before the register reset to 0.
3935                  */
3936                 for (i = nb_iargs - 1; i >= 0; i--) {
3937                     const TCGCallArgumentLoc *loc = &info->in[i];
3938                     ts = arg_temp(op->args[nb_oargs + i]);
3939 
3940                     if (ts->state & TS_DEAD) {
3941                         switch (loc->kind) {
3942                         case TCG_CALL_ARG_NORMAL:
3943                         case TCG_CALL_ARG_EXTEND_U:
3944                         case TCG_CALL_ARG_EXTEND_S:
3945                             if (arg_slot_reg_p(loc->arg_slot)) {
3946                                 *la_temp_pref(ts) = 0;
3947                                 break;
3948                             }
3949                             /* fall through */
3950                         default:
3951                             *la_temp_pref(ts) =
3952                                 tcg_target_available_regs[ts->type];
3953                             break;
3954                         }
3955                         ts->state &= ~TS_DEAD;
3956                     }
3957                 }
3958 
3959                 /*
3960                  * For each input argument, add its input register to prefs.
3961                  * If a temp is used once, this produces a single set bit;
3962                  * if a temp is used multiple times, this produces a set.
3963                  */
3964                 for (i = 0; i < nb_iargs; i++) {
3965                     const TCGCallArgumentLoc *loc = &info->in[i];
3966                     ts = arg_temp(op->args[nb_oargs + i]);
3967 
3968                     switch (loc->kind) {
3969                     case TCG_CALL_ARG_NORMAL:
3970                     case TCG_CALL_ARG_EXTEND_U:
3971                     case TCG_CALL_ARG_EXTEND_S:
3972                         if (arg_slot_reg_p(loc->arg_slot)) {
3973                             tcg_regset_set_reg(*la_temp_pref(ts),
3974                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3975                         }
3976                         break;
3977                     default:
3978                         break;
3979                     }
3980                 }
3981             }
3982             break;
3983         case INDEX_op_insn_start:
3984             break;
3985         case INDEX_op_discard:
3986             /* mark the temporary as dead */
3987             ts = arg_temp(op->args[0]);
3988             ts->state = TS_DEAD;
3989             la_reset_pref(ts);
3990             break;
3991 
3992         case INDEX_op_add2_i32:
3993         case INDEX_op_add2_i64:
3994             opc_new = INDEX_op_add;
3995             goto do_addsub2;
3996         case INDEX_op_sub2_i32:
3997         case INDEX_op_sub2_i64:
3998             opc_new = INDEX_op_sub;
3999         do_addsub2:
4000             nb_iargs = 4;
4001             nb_oargs = 2;
4002             /* Test if the high part of the operation is dead, but not
4003                the low part.  The result can be optimized to a simple
4004                add or sub.  This happens often for x86_64 guest when the
4005                cpu mode is set to 32 bit.  */
4006             if (arg_temp(op->args[1])->state == TS_DEAD) {
4007                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4008                     goto do_remove;
4009                 }
4010                 /* Replace the opcode and adjust the args in place,
4011                    leaving 3 unused args at the end.  */
4012                 op->opc = opc = opc_new;
4013                 op->args[1] = op->args[2];
4014                 op->args[2] = op->args[4];
4015                 /* Fall through and mark the single-word operation live.  */
4016                 nb_iargs = 2;
4017                 nb_oargs = 1;
4018             }
4019             goto do_not_remove;
4020 
4021         case INDEX_op_muls2:
4022             opc_new = INDEX_op_mul;
4023             opc_new2 = INDEX_op_mulsh;
4024             goto do_mul2;
4025         case INDEX_op_mulu2:
4026             opc_new = INDEX_op_mul;
4027             opc_new2 = INDEX_op_muluh;
4028         do_mul2:
4029             nb_iargs = 2;
4030             nb_oargs = 2;
4031             if (arg_temp(op->args[1])->state == TS_DEAD) {
4032                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4033                     /* Both parts of the operation are dead.  */
4034                     goto do_remove;
4035                 }
4036                 /* The high part of the operation is dead; generate the low. */
4037                 op->opc = opc = opc_new;
4038                 op->args[1] = op->args[2];
4039                 op->args[2] = op->args[3];
4040             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4041                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4042                 /* The low part of the operation is dead; generate the high. */
4043                 op->opc = opc = opc_new2;
4044                 op->args[0] = op->args[1];
4045                 op->args[1] = op->args[2];
4046                 op->args[2] = op->args[3];
4047             } else {
4048                 goto do_not_remove;
4049             }
4050             /* Mark the single-word operation live.  */
4051             nb_oargs = 1;
4052             goto do_not_remove;
4053 
4054         default:
4055             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4056             nb_iargs = def->nb_iargs;
4057             nb_oargs = def->nb_oargs;
4058 
4059             /* Test if the operation can be removed because all
4060                its outputs are dead. We assume that nb_oargs == 0
4061                implies side effects */
4062             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4063                 for (i = 0; i < nb_oargs; i++) {
4064                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4065                         goto do_not_remove;
4066                     }
4067                 }
4068                 goto do_remove;
4069             }
4070             goto do_not_remove;
4071 
4072         do_remove:
4073             tcg_op_remove(s, op);
4074             break;
4075 
4076         do_not_remove:
4077             for (i = 0; i < nb_oargs; i++) {
4078                 ts = arg_temp(op->args[i]);
4079 
4080                 /* Remember the preference of the uses that followed.  */
4081                 if (i < ARRAY_SIZE(op->output_pref)) {
4082                     op->output_pref[i] = *la_temp_pref(ts);
4083                 }
4084 
4085                 /* Output args are dead.  */
4086                 if (ts->state & TS_DEAD) {
4087                     arg_life |= DEAD_ARG << i;
4088                 }
4089                 if (ts->state & TS_MEM) {
4090                     arg_life |= SYNC_ARG << i;
4091                 }
4092                 ts->state = TS_DEAD;
4093                 la_reset_pref(ts);
4094             }
4095 
4096             /* If end of basic block, update.  */
4097             if (def->flags & TCG_OPF_BB_EXIT) {
4098                 la_func_end(s, nb_globals, nb_temps);
4099             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4100                 la_bb_sync(s, nb_globals, nb_temps);
4101             } else if (def->flags & TCG_OPF_BB_END) {
4102                 la_bb_end(s, nb_globals, nb_temps);
4103             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4104                 la_global_sync(s, nb_globals);
4105                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4106                     la_cross_call(s, nb_temps);
4107                 }
4108             }
4109 
4110             /* Record arguments that die in this opcode.  */
4111             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4112                 ts = arg_temp(op->args[i]);
4113                 if (ts->state & TS_DEAD) {
4114                     arg_life |= DEAD_ARG << i;
4115                 }
4116             }
4117 
4118             /* Input arguments are live for preceding opcodes.  */
4119             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4120                 ts = arg_temp(op->args[i]);
4121                 if (ts->state & TS_DEAD) {
4122                     /* For operands that were dead, initially allow
4123                        all regs for the type.  */
4124                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4125                     ts->state &= ~TS_DEAD;
4126                 }
4127             }
4128 
4129             /* Incorporate constraints for this operand.  */
4130             switch (opc) {
4131             case INDEX_op_mov:
4132                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4133                    have proper constraints.  That said, special case
4134                    moves to propagate preferences backward.  */
4135                 if (IS_DEAD_ARG(1)) {
4136                     *la_temp_pref(arg_temp(op->args[0]))
4137                         = *la_temp_pref(arg_temp(op->args[1]));
4138                 }
4139                 break;
4140 
4141             default:
4142                 args_ct = opcode_args_ct(op);
4143                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4144                     const TCGArgConstraint *ct = &args_ct[i];
4145                     TCGRegSet set, *pset;
4146 
4147                     ts = arg_temp(op->args[i]);
4148                     pset = la_temp_pref(ts);
4149                     set = *pset;
4150 
4151                     set &= ct->regs;
4152                     if (ct->ialias) {
4153                         set &= output_pref(op, ct->alias_index);
4154                     }
4155                     /* If the combination is not possible, restart.  */
4156                     if (set == 0) {
4157                         set = ct->regs;
4158                     }
4159                     *pset = set;
4160                 }
4161                 break;
4162             }
4163             break;
4164         }
4165         op->life = arg_life;
4166     }
4167 }
4168 
4169 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4170 static bool __attribute__((noinline))
4171 liveness_pass_2(TCGContext *s)
4172 {
4173     int nb_globals = s->nb_globals;
4174     int nb_temps, i;
4175     bool changes = false;
4176     TCGOp *op, *op_next;
4177 
4178     /* Create a temporary for each indirect global.  */
4179     for (i = 0; i < nb_globals; ++i) {
4180         TCGTemp *its = &s->temps[i];
4181         if (its->indirect_reg) {
4182             TCGTemp *dts = tcg_temp_alloc(s);
4183             dts->type = its->type;
4184             dts->base_type = its->base_type;
4185             dts->temp_subindex = its->temp_subindex;
4186             dts->kind = TEMP_EBB;
4187             its->state_ptr = dts;
4188         } else {
4189             its->state_ptr = NULL;
4190         }
4191         /* All globals begin dead.  */
4192         its->state = TS_DEAD;
4193     }
4194     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4195         TCGTemp *its = &s->temps[i];
4196         its->state_ptr = NULL;
4197         its->state = TS_DEAD;
4198     }
4199 
4200     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4201         TCGOpcode opc = op->opc;
4202         const TCGOpDef *def = &tcg_op_defs[opc];
4203         TCGLifeData arg_life = op->life;
4204         int nb_iargs, nb_oargs, call_flags;
4205         TCGTemp *arg_ts, *dir_ts;
4206 
4207         if (opc == INDEX_op_call) {
4208             nb_oargs = TCGOP_CALLO(op);
4209             nb_iargs = TCGOP_CALLI(op);
4210             call_flags = tcg_call_flags(op);
4211         } else {
4212             nb_iargs = def->nb_iargs;
4213             nb_oargs = def->nb_oargs;
4214 
4215             /* Set flags similar to how calls require.  */
4216             if (def->flags & TCG_OPF_COND_BRANCH) {
4217                 /* Like reading globals: sync_globals */
4218                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4219             } else if (def->flags & TCG_OPF_BB_END) {
4220                 /* Like writing globals: save_globals */
4221                 call_flags = 0;
4222             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4223                 /* Like reading globals: sync_globals */
4224                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4225             } else {
4226                 /* No effect on globals.  */
4227                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4228                               TCG_CALL_NO_WRITE_GLOBALS);
4229             }
4230         }
4231 
4232         /* Make sure that input arguments are available.  */
4233         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4234             arg_ts = arg_temp(op->args[i]);
4235             dir_ts = arg_ts->state_ptr;
4236             if (dir_ts && arg_ts->state == TS_DEAD) {
4237                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4238                                   ? INDEX_op_ld_i32
4239                                   : INDEX_op_ld_i64);
4240                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4241                                                   arg_ts->type, 3);
4242 
4243                 lop->args[0] = temp_arg(dir_ts);
4244                 lop->args[1] = temp_arg(arg_ts->mem_base);
4245                 lop->args[2] = arg_ts->mem_offset;
4246 
4247                 /* Loaded, but synced with memory.  */
4248                 arg_ts->state = TS_MEM;
4249             }
4250         }
4251 
4252         /* Perform input replacement, and mark inputs that became dead.
4253            No action is required except keeping temp_state up to date
4254            so that we reload when needed.  */
4255         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4256             arg_ts = arg_temp(op->args[i]);
4257             dir_ts = arg_ts->state_ptr;
4258             if (dir_ts) {
4259                 op->args[i] = temp_arg(dir_ts);
4260                 changes = true;
4261                 if (IS_DEAD_ARG(i)) {
4262                     arg_ts->state = TS_DEAD;
4263                 }
4264             }
4265         }
4266 
4267         /* Liveness analysis should ensure that the following are
4268            all correct, for call sites and basic block end points.  */
4269         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4270             /* Nothing to do */
4271         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4272             for (i = 0; i < nb_globals; ++i) {
4273                 /* Liveness should see that globals are synced back,
4274                    that is, either TS_DEAD or TS_MEM.  */
4275                 arg_ts = &s->temps[i];
4276                 tcg_debug_assert(arg_ts->state_ptr == 0
4277                                  || arg_ts->state != 0);
4278             }
4279         } else {
4280             for (i = 0; i < nb_globals; ++i) {
4281                 /* Liveness should see that globals are saved back,
4282                    that is, TS_DEAD, waiting to be reloaded.  */
4283                 arg_ts = &s->temps[i];
4284                 tcg_debug_assert(arg_ts->state_ptr == 0
4285                                  || arg_ts->state == TS_DEAD);
4286             }
4287         }
4288 
4289         /* Outputs become available.  */
4290         if (opc == INDEX_op_mov) {
4291             arg_ts = arg_temp(op->args[0]);
4292             dir_ts = arg_ts->state_ptr;
4293             if (dir_ts) {
4294                 op->args[0] = temp_arg(dir_ts);
4295                 changes = true;
4296 
4297                 /* The output is now live and modified.  */
4298                 arg_ts->state = 0;
4299 
4300                 if (NEED_SYNC_ARG(0)) {
4301                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4302                                       ? INDEX_op_st_i32
4303                                       : INDEX_op_st_i64);
4304                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4305                                                      arg_ts->type, 3);
4306                     TCGTemp *out_ts = dir_ts;
4307 
4308                     if (IS_DEAD_ARG(0)) {
4309                         out_ts = arg_temp(op->args[1]);
4310                         arg_ts->state = TS_DEAD;
4311                         tcg_op_remove(s, op);
4312                     } else {
4313                         arg_ts->state = TS_MEM;
4314                     }
4315 
4316                     sop->args[0] = temp_arg(out_ts);
4317                     sop->args[1] = temp_arg(arg_ts->mem_base);
4318                     sop->args[2] = arg_ts->mem_offset;
4319                 } else {
4320                     tcg_debug_assert(!IS_DEAD_ARG(0));
4321                 }
4322             }
4323         } else {
4324             for (i = 0; i < nb_oargs; i++) {
4325                 arg_ts = arg_temp(op->args[i]);
4326                 dir_ts = arg_ts->state_ptr;
4327                 if (!dir_ts) {
4328                     continue;
4329                 }
4330                 op->args[i] = temp_arg(dir_ts);
4331                 changes = true;
4332 
4333                 /* The output is now live and modified.  */
4334                 arg_ts->state = 0;
4335 
4336                 /* Sync outputs upon their last write.  */
4337                 if (NEED_SYNC_ARG(i)) {
4338                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4339                                       ? INDEX_op_st_i32
4340                                       : INDEX_op_st_i64);
4341                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4342                                                      arg_ts->type, 3);
4343 
4344                     sop->args[0] = temp_arg(dir_ts);
4345                     sop->args[1] = temp_arg(arg_ts->mem_base);
4346                     sop->args[2] = arg_ts->mem_offset;
4347 
4348                     arg_ts->state = TS_MEM;
4349                 }
4350                 /* Drop outputs that are dead.  */
4351                 if (IS_DEAD_ARG(i)) {
4352                     arg_ts->state = TS_DEAD;
4353                 }
4354             }
4355         }
4356     }
4357 
4358     return changes;
4359 }
4360 
4361 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4362 {
4363     intptr_t off;
4364     int size, align;
4365 
4366     /* When allocating an object, look at the full type. */
4367     size = tcg_type_size(ts->base_type);
4368     switch (ts->base_type) {
4369     case TCG_TYPE_I32:
4370         align = 4;
4371         break;
4372     case TCG_TYPE_I64:
4373     case TCG_TYPE_V64:
4374         align = 8;
4375         break;
4376     case TCG_TYPE_I128:
4377     case TCG_TYPE_V128:
4378     case TCG_TYPE_V256:
4379         /*
4380          * Note that we do not require aligned storage for V256,
4381          * and that we provide alignment for I128 to match V128,
4382          * even if that's above what the host ABI requires.
4383          */
4384         align = 16;
4385         break;
4386     default:
4387         g_assert_not_reached();
4388     }
4389 
4390     /*
4391      * Assume the stack is sufficiently aligned.
4392      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4393      * and do not require 16 byte vector alignment.  This seems slightly
4394      * easier than fully parameterizing the above switch statement.
4395      */
4396     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4397     off = ROUND_UP(s->current_frame_offset, align);
4398 
4399     /* If we've exhausted the stack frame, restart with a smaller TB. */
4400     if (off + size > s->frame_end) {
4401         tcg_raise_tb_overflow(s);
4402     }
4403     s->current_frame_offset = off + size;
4404 #if defined(__sparc__)
4405     off += TCG_TARGET_STACK_BIAS;
4406 #endif
4407 
4408     /* If the object was subdivided, assign memory to all the parts. */
4409     if (ts->base_type != ts->type) {
4410         int part_size = tcg_type_size(ts->type);
4411         int part_count = size / part_size;
4412 
4413         /*
4414          * Each part is allocated sequentially in tcg_temp_new_internal.
4415          * Jump back to the first part by subtracting the current index.
4416          */
4417         ts -= ts->temp_subindex;
4418         for (int i = 0; i < part_count; ++i) {
4419             ts[i].mem_offset = off + i * part_size;
4420             ts[i].mem_base = s->frame_temp;
4421             ts[i].mem_allocated = 1;
4422         }
4423     } else {
4424         ts->mem_offset = off;
4425         ts->mem_base = s->frame_temp;
4426         ts->mem_allocated = 1;
4427     }
4428 }
4429 
4430 /* Assign @reg to @ts, and update reg_to_temp[]. */
4431 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4432 {
4433     if (ts->val_type == TEMP_VAL_REG) {
4434         TCGReg old = ts->reg;
4435         tcg_debug_assert(s->reg_to_temp[old] == ts);
4436         if (old == reg) {
4437             return;
4438         }
4439         s->reg_to_temp[old] = NULL;
4440     }
4441     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4442     s->reg_to_temp[reg] = ts;
4443     ts->val_type = TEMP_VAL_REG;
4444     ts->reg = reg;
4445 }
4446 
4447 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4448 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4449 {
4450     tcg_debug_assert(type != TEMP_VAL_REG);
4451     if (ts->val_type == TEMP_VAL_REG) {
4452         TCGReg reg = ts->reg;
4453         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4454         s->reg_to_temp[reg] = NULL;
4455     }
4456     ts->val_type = type;
4457 }
4458 
4459 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4460 
4461 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4462    mark it free; otherwise mark it dead.  */
4463 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4464 {
4465     TCGTempVal new_type;
4466 
4467     switch (ts->kind) {
4468     case TEMP_FIXED:
4469         return;
4470     case TEMP_GLOBAL:
4471     case TEMP_TB:
4472         new_type = TEMP_VAL_MEM;
4473         break;
4474     case TEMP_EBB:
4475         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4476         break;
4477     case TEMP_CONST:
4478         new_type = TEMP_VAL_CONST;
4479         break;
4480     default:
4481         g_assert_not_reached();
4482     }
4483     set_temp_val_nonreg(s, ts, new_type);
4484 }
4485 
4486 /* Mark a temporary as dead.  */
4487 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4488 {
4489     temp_free_or_dead(s, ts, 1);
4490 }
4491 
4492 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4493    registers needs to be allocated to store a constant.  If 'free_or_dead'
4494    is non-zero, subsequently release the temporary; if it is positive, the
4495    temp is dead; if it is negative, the temp is free.  */
4496 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4497                       TCGRegSet preferred_regs, int free_or_dead)
4498 {
4499     if (!temp_readonly(ts) && !ts->mem_coherent) {
4500         if (!ts->mem_allocated) {
4501             temp_allocate_frame(s, ts);
4502         }
4503         switch (ts->val_type) {
4504         case TEMP_VAL_CONST:
4505             /* If we're going to free the temp immediately, then we won't
4506                require it later in a register, so attempt to store the
4507                constant to memory directly.  */
4508             if (free_or_dead
4509                 && tcg_out_sti(s, ts->type, ts->val,
4510                                ts->mem_base->reg, ts->mem_offset)) {
4511                 break;
4512             }
4513             temp_load(s, ts, tcg_target_available_regs[ts->type],
4514                       allocated_regs, preferred_regs);
4515             /* fallthrough */
4516 
4517         case TEMP_VAL_REG:
4518             tcg_out_st(s, ts->type, ts->reg,
4519                        ts->mem_base->reg, ts->mem_offset);
4520             break;
4521 
4522         case TEMP_VAL_MEM:
4523             break;
4524 
4525         case TEMP_VAL_DEAD:
4526         default:
4527             g_assert_not_reached();
4528         }
4529         ts->mem_coherent = 1;
4530     }
4531     if (free_or_dead) {
4532         temp_free_or_dead(s, ts, free_or_dead);
4533     }
4534 }
4535 
4536 /* free register 'reg' by spilling the corresponding temporary if necessary */
4537 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4538 {
4539     TCGTemp *ts = s->reg_to_temp[reg];
4540     if (ts != NULL) {
4541         temp_sync(s, ts, allocated_regs, 0, -1);
4542     }
4543 }
4544 
4545 /**
4546  * tcg_reg_alloc:
4547  * @required_regs: Set of registers in which we must allocate.
4548  * @allocated_regs: Set of registers which must be avoided.
4549  * @preferred_regs: Set of registers we should prefer.
4550  * @rev: True if we search the registers in "indirect" order.
4551  *
4552  * The allocated register must be in @required_regs & ~@allocated_regs,
4553  * but if we can put it in @preferred_regs we may save a move later.
4554  */
4555 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4556                             TCGRegSet allocated_regs,
4557                             TCGRegSet preferred_regs, bool rev)
4558 {
4559     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4560     TCGRegSet reg_ct[2];
4561     const int *order;
4562 
4563     reg_ct[1] = required_regs & ~allocated_regs;
4564     tcg_debug_assert(reg_ct[1] != 0);
4565     reg_ct[0] = reg_ct[1] & preferred_regs;
4566 
4567     /* Skip the preferred_regs option if it cannot be satisfied,
4568        or if the preference made no difference.  */
4569     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4570 
4571     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4572 
4573     /* Try free registers, preferences first.  */
4574     for (j = f; j < 2; j++) {
4575         TCGRegSet set = reg_ct[j];
4576 
4577         if (tcg_regset_single(set)) {
4578             /* One register in the set.  */
4579             TCGReg reg = tcg_regset_first(set);
4580             if (s->reg_to_temp[reg] == NULL) {
4581                 return reg;
4582             }
4583         } else {
4584             for (i = 0; i < n; i++) {
4585                 TCGReg reg = order[i];
4586                 if (s->reg_to_temp[reg] == NULL &&
4587                     tcg_regset_test_reg(set, reg)) {
4588                     return reg;
4589                 }
4590             }
4591         }
4592     }
4593 
4594     /* We must spill something.  */
4595     for (j = f; j < 2; j++) {
4596         TCGRegSet set = reg_ct[j];
4597 
4598         if (tcg_regset_single(set)) {
4599             /* One register in the set.  */
4600             TCGReg reg = tcg_regset_first(set);
4601             tcg_reg_free(s, reg, allocated_regs);
4602             return reg;
4603         } else {
4604             for (i = 0; i < n; i++) {
4605                 TCGReg reg = order[i];
4606                 if (tcg_regset_test_reg(set, reg)) {
4607                     tcg_reg_free(s, reg, allocated_regs);
4608                     return reg;
4609                 }
4610             }
4611         }
4612     }
4613 
4614     g_assert_not_reached();
4615 }
4616 
4617 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4618                                  TCGRegSet allocated_regs,
4619                                  TCGRegSet preferred_regs, bool rev)
4620 {
4621     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4622     TCGRegSet reg_ct[2];
4623     const int *order;
4624 
4625     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4626     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4627     tcg_debug_assert(reg_ct[1] != 0);
4628     reg_ct[0] = reg_ct[1] & preferred_regs;
4629 
4630     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4631 
4632     /*
4633      * Skip the preferred_regs option if it cannot be satisfied,
4634      * or if the preference made no difference.
4635      */
4636     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4637 
4638     /*
4639      * Minimize the number of flushes by looking for 2 free registers first,
4640      * then a single flush, then two flushes.
4641      */
4642     for (fmin = 2; fmin >= 0; fmin--) {
4643         for (j = k; j < 2; j++) {
4644             TCGRegSet set = reg_ct[j];
4645 
4646             for (i = 0; i < n; i++) {
4647                 TCGReg reg = order[i];
4648 
4649                 if (tcg_regset_test_reg(set, reg)) {
4650                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4651                     if (f >= fmin) {
4652                         tcg_reg_free(s, reg, allocated_regs);
4653                         tcg_reg_free(s, reg + 1, allocated_regs);
4654                         return reg;
4655                     }
4656                 }
4657             }
4658         }
4659     }
4660     g_assert_not_reached();
4661 }
4662 
4663 /* Make sure the temporary is in a register.  If needed, allocate the register
4664    from DESIRED while avoiding ALLOCATED.  */
4665 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4666                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4667 {
4668     TCGReg reg;
4669 
4670     switch (ts->val_type) {
4671     case TEMP_VAL_REG:
4672         return;
4673     case TEMP_VAL_CONST:
4674         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4675                             preferred_regs, ts->indirect_base);
4676         if (ts->type <= TCG_TYPE_I64) {
4677             tcg_out_movi(s, ts->type, reg, ts->val);
4678         } else {
4679             uint64_t val = ts->val;
4680             MemOp vece = MO_64;
4681 
4682             /*
4683              * Find the minimal vector element that matches the constant.
4684              * The targets will, in general, have to do this search anyway,
4685              * do this generically.
4686              */
4687             if (val == dup_const(MO_8, val)) {
4688                 vece = MO_8;
4689             } else if (val == dup_const(MO_16, val)) {
4690                 vece = MO_16;
4691             } else if (val == dup_const(MO_32, val)) {
4692                 vece = MO_32;
4693             }
4694 
4695             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4696         }
4697         ts->mem_coherent = 0;
4698         break;
4699     case TEMP_VAL_MEM:
4700         if (!ts->mem_allocated) {
4701             temp_allocate_frame(s, ts);
4702         }
4703         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4704                             preferred_regs, ts->indirect_base);
4705         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4706         ts->mem_coherent = 1;
4707         break;
4708     case TEMP_VAL_DEAD:
4709     default:
4710         g_assert_not_reached();
4711     }
4712     set_temp_val_reg(s, ts, reg);
4713 }
4714 
4715 /* Save a temporary to memory. 'allocated_regs' is used in case a
4716    temporary registers needs to be allocated to store a constant.  */
4717 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4718 {
4719     /* The liveness analysis already ensures that globals are back
4720        in memory. Keep an tcg_debug_assert for safety. */
4721     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4722 }
4723 
4724 /* save globals to their canonical location and assume they can be
4725    modified be the following code. 'allocated_regs' is used in case a
4726    temporary registers needs to be allocated to store a constant. */
4727 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4728 {
4729     int i, n;
4730 
4731     for (i = 0, n = s->nb_globals; i < n; i++) {
4732         temp_save(s, &s->temps[i], allocated_regs);
4733     }
4734 }
4735 
4736 /* sync globals to their canonical location and assume they can be
4737    read by the following code. 'allocated_regs' is used in case a
4738    temporary registers needs to be allocated to store a constant. */
4739 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4740 {
4741     int i, n;
4742 
4743     for (i = 0, n = s->nb_globals; i < n; i++) {
4744         TCGTemp *ts = &s->temps[i];
4745         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4746                          || ts->kind == TEMP_FIXED
4747                          || ts->mem_coherent);
4748     }
4749 }
4750 
4751 /* at the end of a basic block, we assume all temporaries are dead and
4752    all globals are stored at their canonical location. */
4753 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4754 {
4755     int i;
4756 
4757     for (i = s->nb_globals; i < s->nb_temps; i++) {
4758         TCGTemp *ts = &s->temps[i];
4759 
4760         switch (ts->kind) {
4761         case TEMP_TB:
4762             temp_save(s, ts, allocated_regs);
4763             break;
4764         case TEMP_EBB:
4765             /* The liveness analysis already ensures that temps are dead.
4766                Keep an tcg_debug_assert for safety. */
4767             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4768             break;
4769         case TEMP_CONST:
4770             /* Similarly, we should have freed any allocated register. */
4771             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4772             break;
4773         default:
4774             g_assert_not_reached();
4775         }
4776     }
4777 
4778     save_globals(s, allocated_regs);
4779 }
4780 
4781 /*
4782  * At a conditional branch, we assume all temporaries are dead unless
4783  * explicitly live-across-conditional-branch; all globals and local
4784  * temps are synced to their location.
4785  */
4786 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4787 {
4788     sync_globals(s, allocated_regs);
4789 
4790     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4791         TCGTemp *ts = &s->temps[i];
4792         /*
4793          * The liveness analysis already ensures that temps are dead.
4794          * Keep tcg_debug_asserts for safety.
4795          */
4796         switch (ts->kind) {
4797         case TEMP_TB:
4798             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4799             break;
4800         case TEMP_EBB:
4801         case TEMP_CONST:
4802             break;
4803         default:
4804             g_assert_not_reached();
4805         }
4806     }
4807 }
4808 
4809 /*
4810  * Specialized code generation for INDEX_op_mov_* with a constant.
4811  */
4812 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4813                                   tcg_target_ulong val, TCGLifeData arg_life,
4814                                   TCGRegSet preferred_regs)
4815 {
4816     /* ENV should not be modified.  */
4817     tcg_debug_assert(!temp_readonly(ots));
4818 
4819     /* The movi is not explicitly generated here.  */
4820     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4821     ots->val = val;
4822     ots->mem_coherent = 0;
4823     if (NEED_SYNC_ARG(0)) {
4824         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4825     } else if (IS_DEAD_ARG(0)) {
4826         temp_dead(s, ots);
4827     }
4828 }
4829 
4830 /*
4831  * Specialized code generation for INDEX_op_mov_*.
4832  */
4833 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4834 {
4835     const TCGLifeData arg_life = op->life;
4836     TCGRegSet allocated_regs, preferred_regs;
4837     TCGTemp *ts, *ots;
4838     TCGType otype, itype;
4839     TCGReg oreg, ireg;
4840 
4841     allocated_regs = s->reserved_regs;
4842     preferred_regs = output_pref(op, 0);
4843     ots = arg_temp(op->args[0]);
4844     ts = arg_temp(op->args[1]);
4845 
4846     /* ENV should not be modified.  */
4847     tcg_debug_assert(!temp_readonly(ots));
4848 
4849     /* Note that otype != itype for no-op truncation.  */
4850     otype = ots->type;
4851     itype = ts->type;
4852 
4853     if (ts->val_type == TEMP_VAL_CONST) {
4854         /* propagate constant or generate sti */
4855         tcg_target_ulong val = ts->val;
4856         if (IS_DEAD_ARG(1)) {
4857             temp_dead(s, ts);
4858         }
4859         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4860         return;
4861     }
4862 
4863     /* If the source value is in memory we're going to be forced
4864        to have it in a register in order to perform the copy.  Copy
4865        the SOURCE value into its own register first, that way we
4866        don't have to reload SOURCE the next time it is used. */
4867     if (ts->val_type == TEMP_VAL_MEM) {
4868         temp_load(s, ts, tcg_target_available_regs[itype],
4869                   allocated_regs, preferred_regs);
4870     }
4871     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4872     ireg = ts->reg;
4873 
4874     if (IS_DEAD_ARG(0)) {
4875         /* mov to a non-saved dead register makes no sense (even with
4876            liveness analysis disabled). */
4877         tcg_debug_assert(NEED_SYNC_ARG(0));
4878         if (!ots->mem_allocated) {
4879             temp_allocate_frame(s, ots);
4880         }
4881         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4882         if (IS_DEAD_ARG(1)) {
4883             temp_dead(s, ts);
4884         }
4885         temp_dead(s, ots);
4886         return;
4887     }
4888 
4889     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4890         /*
4891          * The mov can be suppressed.  Kill input first, so that it
4892          * is unlinked from reg_to_temp, then set the output to the
4893          * reg that we saved from the input.
4894          */
4895         temp_dead(s, ts);
4896         oreg = ireg;
4897     } else {
4898         if (ots->val_type == TEMP_VAL_REG) {
4899             oreg = ots->reg;
4900         } else {
4901             /* Make sure to not spill the input register during allocation. */
4902             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4903                                  allocated_regs | ((TCGRegSet)1 << ireg),
4904                                  preferred_regs, ots->indirect_base);
4905         }
4906         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4907             /*
4908              * Cross register class move not supported.
4909              * Store the source register into the destination slot
4910              * and leave the destination temp as TEMP_VAL_MEM.
4911              */
4912             assert(!temp_readonly(ots));
4913             if (!ts->mem_allocated) {
4914                 temp_allocate_frame(s, ots);
4915             }
4916             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4917             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4918             ots->mem_coherent = 1;
4919             return;
4920         }
4921     }
4922     set_temp_val_reg(s, ots, oreg);
4923     ots->mem_coherent = 0;
4924 
4925     if (NEED_SYNC_ARG(0)) {
4926         temp_sync(s, ots, allocated_regs, 0, 0);
4927     }
4928 }
4929 
4930 /*
4931  * Specialized code generation for INDEX_op_dup_vec.
4932  */
4933 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4934 {
4935     const TCGLifeData arg_life = op->life;
4936     TCGRegSet dup_out_regs, dup_in_regs;
4937     const TCGArgConstraint *dup_args_ct;
4938     TCGTemp *its, *ots;
4939     TCGType itype, vtype;
4940     unsigned vece;
4941     int lowpart_ofs;
4942     bool ok;
4943 
4944     ots = arg_temp(op->args[0]);
4945     its = arg_temp(op->args[1]);
4946 
4947     /* ENV should not be modified.  */
4948     tcg_debug_assert(!temp_readonly(ots));
4949 
4950     itype = its->type;
4951     vece = TCGOP_VECE(op);
4952     vtype = TCGOP_TYPE(op);
4953 
4954     if (its->val_type == TEMP_VAL_CONST) {
4955         /* Propagate constant via movi -> dupi.  */
4956         tcg_target_ulong val = its->val;
4957         if (IS_DEAD_ARG(1)) {
4958             temp_dead(s, its);
4959         }
4960         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4961         return;
4962     }
4963 
4964     dup_args_ct = opcode_args_ct(op);
4965     dup_out_regs = dup_args_ct[0].regs;
4966     dup_in_regs = dup_args_ct[1].regs;
4967 
4968     /* Allocate the output register now.  */
4969     if (ots->val_type != TEMP_VAL_REG) {
4970         TCGRegSet allocated_regs = s->reserved_regs;
4971         TCGReg oreg;
4972 
4973         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4974             /* Make sure to not spill the input register. */
4975             tcg_regset_set_reg(allocated_regs, its->reg);
4976         }
4977         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4978                              output_pref(op, 0), ots->indirect_base);
4979         set_temp_val_reg(s, ots, oreg);
4980     }
4981 
4982     switch (its->val_type) {
4983     case TEMP_VAL_REG:
4984         /*
4985          * The dup constriaints must be broad, covering all possible VECE.
4986          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4987          * to fail, indicating that extra moves are required for that case.
4988          */
4989         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4990             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4991                 goto done;
4992             }
4993             /* Try again from memory or a vector input register.  */
4994         }
4995         if (!its->mem_coherent) {
4996             /*
4997              * The input register is not synced, and so an extra store
4998              * would be required to use memory.  Attempt an integer-vector
4999              * register move first.  We do not have a TCGRegSet for this.
5000              */
5001             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5002                 break;
5003             }
5004             /* Sync the temp back to its slot and load from there.  */
5005             temp_sync(s, its, s->reserved_regs, 0, 0);
5006         }
5007         /* fall through */
5008 
5009     case TEMP_VAL_MEM:
5010         lowpart_ofs = 0;
5011         if (HOST_BIG_ENDIAN) {
5012             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5013         }
5014         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5015                              its->mem_offset + lowpart_ofs)) {
5016             goto done;
5017         }
5018         /* Load the input into the destination vector register. */
5019         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5020         break;
5021 
5022     default:
5023         g_assert_not_reached();
5024     }
5025 
5026     /* We now have a vector input register, so dup must succeed. */
5027     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5028     tcg_debug_assert(ok);
5029 
5030  done:
5031     ots->mem_coherent = 0;
5032     if (IS_DEAD_ARG(1)) {
5033         temp_dead(s, its);
5034     }
5035     if (NEED_SYNC_ARG(0)) {
5036         temp_sync(s, ots, s->reserved_regs, 0, 0);
5037     }
5038     if (IS_DEAD_ARG(0)) {
5039         temp_dead(s, ots);
5040     }
5041 }
5042 
5043 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5044 {
5045     const TCGLifeData arg_life = op->life;
5046     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5047     TCGRegSet i_allocated_regs;
5048     TCGRegSet o_allocated_regs;
5049     int i, k, nb_iargs, nb_oargs;
5050     TCGReg reg;
5051     TCGArg arg;
5052     const TCGArgConstraint *args_ct;
5053     const TCGArgConstraint *arg_ct;
5054     TCGTemp *ts;
5055     TCGArg new_args[TCG_MAX_OP_ARGS];
5056     int const_args[TCG_MAX_OP_ARGS];
5057     TCGCond op_cond;
5058 
5059     nb_oargs = def->nb_oargs;
5060     nb_iargs = def->nb_iargs;
5061 
5062     /* copy constants */
5063     memcpy(new_args + nb_oargs + nb_iargs,
5064            op->args + nb_oargs + nb_iargs,
5065            sizeof(TCGArg) * def->nb_cargs);
5066 
5067     i_allocated_regs = s->reserved_regs;
5068     o_allocated_regs = s->reserved_regs;
5069 
5070     switch (op->opc) {
5071     case INDEX_op_brcond:
5072         op_cond = op->args[2];
5073         break;
5074     case INDEX_op_setcond:
5075     case INDEX_op_negsetcond:
5076     case INDEX_op_cmp_vec:
5077         op_cond = op->args[3];
5078         break;
5079     case INDEX_op_brcond2_i32:
5080         op_cond = op->args[4];
5081         break;
5082     case INDEX_op_movcond:
5083     case INDEX_op_setcond2_i32:
5084     case INDEX_op_cmpsel_vec:
5085         op_cond = op->args[5];
5086         break;
5087     default:
5088         /* No condition within opcode. */
5089         op_cond = TCG_COND_ALWAYS;
5090         break;
5091     }
5092 
5093     args_ct = opcode_args_ct(op);
5094 
5095     /* satisfy input constraints */
5096     for (k = 0; k < nb_iargs; k++) {
5097         TCGRegSet i_preferred_regs, i_required_regs;
5098         bool allocate_new_reg, copyto_new_reg;
5099         TCGTemp *ts2;
5100         int i1, i2;
5101 
5102         i = args_ct[nb_oargs + k].sort_index;
5103         arg = op->args[i];
5104         arg_ct = &args_ct[i];
5105         ts = arg_temp(arg);
5106 
5107         if (ts->val_type == TEMP_VAL_CONST) {
5108 #ifdef TCG_REG_ZERO
5109             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5110                 /* Hardware zero register: indicate register via non-const. */
5111                 const_args[i] = 0;
5112                 new_args[i] = TCG_REG_ZERO;
5113                 continue;
5114             }
5115 #endif
5116 
5117             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5118                                        op_cond, TCGOP_VECE(op))) {
5119                 /* constant is OK for instruction */
5120                 const_args[i] = 1;
5121                 new_args[i] = ts->val;
5122                 continue;
5123             }
5124         }
5125 
5126         reg = ts->reg;
5127         i_preferred_regs = 0;
5128         i_required_regs = arg_ct->regs;
5129         allocate_new_reg = false;
5130         copyto_new_reg = false;
5131 
5132         switch (arg_ct->pair) {
5133         case 0: /* not paired */
5134             if (arg_ct->ialias) {
5135                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5136 
5137                 /*
5138                  * If the input is readonly, then it cannot also be an
5139                  * output and aliased to itself.  If the input is not
5140                  * dead after the instruction, we must allocate a new
5141                  * register and move it.
5142                  */
5143                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5144                     || args_ct[arg_ct->alias_index].newreg) {
5145                     allocate_new_reg = true;
5146                 } else if (ts->val_type == TEMP_VAL_REG) {
5147                     /*
5148                      * Check if the current register has already been
5149                      * allocated for another input.
5150                      */
5151                     allocate_new_reg =
5152                         tcg_regset_test_reg(i_allocated_regs, reg);
5153                 }
5154             }
5155             if (!allocate_new_reg) {
5156                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5157                           i_preferred_regs);
5158                 reg = ts->reg;
5159                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5160             }
5161             if (allocate_new_reg) {
5162                 /*
5163                  * Allocate a new register matching the constraint
5164                  * and move the temporary register into it.
5165                  */
5166                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5167                           i_allocated_regs, 0);
5168                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5169                                     i_preferred_regs, ts->indirect_base);
5170                 copyto_new_reg = true;
5171             }
5172             break;
5173 
5174         case 1:
5175             /* First of an input pair; if i1 == i2, the second is an output. */
5176             i1 = i;
5177             i2 = arg_ct->pair_index;
5178             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5179 
5180             /*
5181              * It is easier to default to allocating a new pair
5182              * and to identify a few cases where it's not required.
5183              */
5184             if (arg_ct->ialias) {
5185                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5186                 if (IS_DEAD_ARG(i1) &&
5187                     IS_DEAD_ARG(i2) &&
5188                     !temp_readonly(ts) &&
5189                     ts->val_type == TEMP_VAL_REG &&
5190                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5191                     tcg_regset_test_reg(i_required_regs, reg) &&
5192                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5193                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5194                     (ts2
5195                      ? ts2->val_type == TEMP_VAL_REG &&
5196                        ts2->reg == reg + 1 &&
5197                        !temp_readonly(ts2)
5198                      : s->reg_to_temp[reg + 1] == NULL)) {
5199                     break;
5200                 }
5201             } else {
5202                 /* Without aliasing, the pair must also be an input. */
5203                 tcg_debug_assert(ts2);
5204                 if (ts->val_type == TEMP_VAL_REG &&
5205                     ts2->val_type == TEMP_VAL_REG &&
5206                     ts2->reg == reg + 1 &&
5207                     tcg_regset_test_reg(i_required_regs, reg)) {
5208                     break;
5209                 }
5210             }
5211             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5212                                      0, ts->indirect_base);
5213             goto do_pair;
5214 
5215         case 2: /* pair second */
5216             reg = new_args[arg_ct->pair_index] + 1;
5217             goto do_pair;
5218 
5219         case 3: /* ialias with second output, no first input */
5220             tcg_debug_assert(arg_ct->ialias);
5221             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5222 
5223             if (IS_DEAD_ARG(i) &&
5224                 !temp_readonly(ts) &&
5225                 ts->val_type == TEMP_VAL_REG &&
5226                 reg > 0 &&
5227                 s->reg_to_temp[reg - 1] == NULL &&
5228                 tcg_regset_test_reg(i_required_regs, reg) &&
5229                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5230                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5231                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5232                 break;
5233             }
5234             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5235                                      i_allocated_regs, 0,
5236                                      ts->indirect_base);
5237             tcg_regset_set_reg(i_allocated_regs, reg);
5238             reg += 1;
5239             goto do_pair;
5240 
5241         do_pair:
5242             /*
5243              * If an aliased input is not dead after the instruction,
5244              * we must allocate a new register and move it.
5245              */
5246             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5247                 TCGRegSet t_allocated_regs = i_allocated_regs;
5248 
5249                 /*
5250                  * Because of the alias, and the continued life, make sure
5251                  * that the temp is somewhere *other* than the reg pair,
5252                  * and we get a copy in reg.
5253                  */
5254                 tcg_regset_set_reg(t_allocated_regs, reg);
5255                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5256                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5257                     /* If ts was already in reg, copy it somewhere else. */
5258                     TCGReg nr;
5259                     bool ok;
5260 
5261                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5262                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5263                                        t_allocated_regs, 0, ts->indirect_base);
5264                     ok = tcg_out_mov(s, ts->type, nr, reg);
5265                     tcg_debug_assert(ok);
5266 
5267                     set_temp_val_reg(s, ts, nr);
5268                 } else {
5269                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5270                               t_allocated_regs, 0);
5271                     copyto_new_reg = true;
5272                 }
5273             } else {
5274                 /* Preferably allocate to reg, otherwise copy. */
5275                 i_required_regs = (TCGRegSet)1 << reg;
5276                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5277                           i_preferred_regs);
5278                 copyto_new_reg = ts->reg != reg;
5279             }
5280             break;
5281 
5282         default:
5283             g_assert_not_reached();
5284         }
5285 
5286         if (copyto_new_reg) {
5287             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5288                 /*
5289                  * Cross register class move not supported.  Sync the
5290                  * temp back to its slot and load from there.
5291                  */
5292                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5293                 tcg_out_ld(s, ts->type, reg,
5294                            ts->mem_base->reg, ts->mem_offset);
5295             }
5296         }
5297         new_args[i] = reg;
5298         const_args[i] = 0;
5299         tcg_regset_set_reg(i_allocated_regs, reg);
5300     }
5301 
5302     /* mark dead temporaries and free the associated registers */
5303     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5304         if (IS_DEAD_ARG(i)) {
5305             temp_dead(s, arg_temp(op->args[i]));
5306         }
5307     }
5308 
5309     if (def->flags & TCG_OPF_COND_BRANCH) {
5310         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5311     } else if (def->flags & TCG_OPF_BB_END) {
5312         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5313     } else {
5314         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5315             /* XXX: permit generic clobber register list ? */
5316             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5317                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5318                     tcg_reg_free(s, i, i_allocated_regs);
5319                 }
5320             }
5321         }
5322         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5323             /* sync globals if the op has side effects and might trigger
5324                an exception. */
5325             sync_globals(s, i_allocated_regs);
5326         }
5327 
5328         /* satisfy the output constraints */
5329         for (k = 0; k < nb_oargs; k++) {
5330             i = args_ct[k].sort_index;
5331             arg = op->args[i];
5332             arg_ct = &args_ct[i];
5333             ts = arg_temp(arg);
5334 
5335             /* ENV should not be modified.  */
5336             tcg_debug_assert(!temp_readonly(ts));
5337 
5338             switch (arg_ct->pair) {
5339             case 0: /* not paired */
5340                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5341                     reg = new_args[arg_ct->alias_index];
5342                 } else if (arg_ct->newreg) {
5343                     reg = tcg_reg_alloc(s, arg_ct->regs,
5344                                         i_allocated_regs | o_allocated_regs,
5345                                         output_pref(op, k), ts->indirect_base);
5346                 } else {
5347                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5348                                         output_pref(op, k), ts->indirect_base);
5349                 }
5350                 break;
5351 
5352             case 1: /* first of pair */
5353                 if (arg_ct->oalias) {
5354                     reg = new_args[arg_ct->alias_index];
5355                 } else if (arg_ct->newreg) {
5356                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5357                                              i_allocated_regs | o_allocated_regs,
5358                                              output_pref(op, k),
5359                                              ts->indirect_base);
5360                 } else {
5361                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5362                                              output_pref(op, k),
5363                                              ts->indirect_base);
5364                 }
5365                 break;
5366 
5367             case 2: /* second of pair */
5368                 if (arg_ct->oalias) {
5369                     reg = new_args[arg_ct->alias_index];
5370                 } else {
5371                     reg = new_args[arg_ct->pair_index] + 1;
5372                 }
5373                 break;
5374 
5375             case 3: /* first of pair, aliasing with a second input */
5376                 tcg_debug_assert(!arg_ct->newreg);
5377                 reg = new_args[arg_ct->pair_index] - 1;
5378                 break;
5379 
5380             default:
5381                 g_assert_not_reached();
5382             }
5383             tcg_regset_set_reg(o_allocated_regs, reg);
5384             set_temp_val_reg(s, ts, reg);
5385             ts->mem_coherent = 0;
5386             new_args[i] = reg;
5387         }
5388     }
5389 
5390     /* emit instruction */
5391     TCGType type = TCGOP_TYPE(op);
5392     switch (op->opc) {
5393     case INDEX_op_ext_i32_i64:
5394         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5395         break;
5396     case INDEX_op_extu_i32_i64:
5397         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5398         break;
5399     case INDEX_op_extrl_i64_i32:
5400         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5401         break;
5402 
5403     case INDEX_op_add:
5404     case INDEX_op_and:
5405     case INDEX_op_andc:
5406     case INDEX_op_clz:
5407     case INDEX_op_ctz:
5408     case INDEX_op_divs:
5409     case INDEX_op_divu:
5410     case INDEX_op_eqv:
5411     case INDEX_op_mul:
5412     case INDEX_op_mulsh:
5413     case INDEX_op_muluh:
5414     case INDEX_op_nand:
5415     case INDEX_op_nor:
5416     case INDEX_op_or:
5417     case INDEX_op_orc:
5418     case INDEX_op_rems:
5419     case INDEX_op_remu:
5420     case INDEX_op_rotl:
5421     case INDEX_op_rotr:
5422     case INDEX_op_sar:
5423     case INDEX_op_shl:
5424     case INDEX_op_shr:
5425     case INDEX_op_xor:
5426         {
5427             const TCGOutOpBinary *out =
5428                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5429 
5430             /* Constants should never appear in the first source operand. */
5431             tcg_debug_assert(!const_args[1]);
5432             if (const_args[2]) {
5433                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5434             } else {
5435                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5436             }
5437         }
5438         break;
5439 
5440     case INDEX_op_sub:
5441         {
5442             const TCGOutOpSubtract *out = &outop_sub;
5443 
5444             /*
5445              * Constants should never appear in the second source operand.
5446              * These are folded to add with negative constant.
5447              */
5448             tcg_debug_assert(!const_args[2]);
5449             if (const_args[1]) {
5450                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5451             } else {
5452                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5453             }
5454         }
5455         break;
5456 
5457     case INDEX_op_ctpop:
5458     case INDEX_op_neg:
5459     case INDEX_op_not:
5460         {
5461             const TCGOutOpUnary *out =
5462                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5463 
5464             /* Constants should have been folded. */
5465             tcg_debug_assert(!const_args[1]);
5466             out->out_rr(s, type, new_args[0], new_args[1]);
5467         }
5468         break;
5469 
5470     case INDEX_op_divs2:
5471     case INDEX_op_divu2:
5472         {
5473             const TCGOutOpDivRem *out =
5474                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5475 
5476             /* Only used by x86 and s390x, which use matching constraints. */
5477             tcg_debug_assert(new_args[0] == new_args[2]);
5478             tcg_debug_assert(new_args[1] == new_args[3]);
5479             tcg_debug_assert(!const_args[4]);
5480             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5481         }
5482         break;
5483 
5484     case INDEX_op_muls2:
5485     case INDEX_op_mulu2:
5486         {
5487             const TCGOutOpMul2 *out =
5488                 container_of(all_outop[op->opc], TCGOutOpMul2, base);
5489 
5490             tcg_debug_assert(!const_args[2]);
5491             tcg_debug_assert(!const_args[3]);
5492             out->out_rrrr(s, type, new_args[0], new_args[1],
5493                           new_args[2], new_args[3]);
5494         }
5495         break;
5496 
5497     case INDEX_op_brcond:
5498         {
5499             const TCGOutOpBrcond *out = &outop_brcond;
5500             TCGCond cond = new_args[2];
5501             TCGLabel *label = arg_label(new_args[3]);
5502 
5503             tcg_debug_assert(!const_args[0]);
5504             if (const_args[1]) {
5505                 out->out_ri(s, type, cond, new_args[0], new_args[1], label);
5506             } else {
5507                 out->out_rr(s, type, cond, new_args[0], new_args[1], label);
5508             }
5509         }
5510         break;
5511 
5512     case INDEX_op_movcond:
5513         {
5514             const TCGOutOpMovcond *out = &outop_movcond;
5515             TCGCond cond = new_args[5];
5516 
5517             tcg_debug_assert(!const_args[1]);
5518             out->out(s, type, cond, new_args[0],
5519                      new_args[1], new_args[2], const_args[2],
5520                      new_args[3], const_args[3],
5521                      new_args[4], const_args[4]);
5522         }
5523         break;
5524 
5525     case INDEX_op_setcond:
5526     case INDEX_op_negsetcond:
5527         {
5528             const TCGOutOpSetcond *out =
5529                 container_of(all_outop[op->opc], TCGOutOpSetcond, base);
5530             TCGCond cond = new_args[3];
5531 
5532             tcg_debug_assert(!const_args[1]);
5533             if (const_args[2]) {
5534                 out->out_rri(s, type, cond,
5535                              new_args[0], new_args[1], new_args[2]);
5536             } else {
5537                 out->out_rrr(s, type, cond,
5538                              new_args[0], new_args[1], new_args[2]);
5539             }
5540         }
5541         break;
5542 
5543     default:
5544         if (def->flags & TCG_OPF_VECTOR) {
5545             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5546                            TCGOP_VECE(op), new_args, const_args);
5547         } else {
5548             tcg_out_op(s, op->opc, type, new_args, const_args);
5549         }
5550         break;
5551     }
5552 
5553     /* move the outputs in the correct register if needed */
5554     for(i = 0; i < nb_oargs; i++) {
5555         ts = arg_temp(op->args[i]);
5556 
5557         /* ENV should not be modified.  */
5558         tcg_debug_assert(!temp_readonly(ts));
5559 
5560         if (NEED_SYNC_ARG(i)) {
5561             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5562         } else if (IS_DEAD_ARG(i)) {
5563             temp_dead(s, ts);
5564         }
5565     }
5566 }
5567 
5568 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5569 {
5570     const TCGLifeData arg_life = op->life;
5571     TCGTemp *ots, *itsl, *itsh;
5572     TCGType vtype = TCGOP_TYPE(op);
5573 
5574     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5575     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5576     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5577 
5578     ots = arg_temp(op->args[0]);
5579     itsl = arg_temp(op->args[1]);
5580     itsh = arg_temp(op->args[2]);
5581 
5582     /* ENV should not be modified.  */
5583     tcg_debug_assert(!temp_readonly(ots));
5584 
5585     /* Allocate the output register now.  */
5586     if (ots->val_type != TEMP_VAL_REG) {
5587         TCGRegSet allocated_regs = s->reserved_regs;
5588         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5589         TCGReg oreg;
5590 
5591         /* Make sure to not spill the input registers. */
5592         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5593             tcg_regset_set_reg(allocated_regs, itsl->reg);
5594         }
5595         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5596             tcg_regset_set_reg(allocated_regs, itsh->reg);
5597         }
5598 
5599         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5600                              output_pref(op, 0), ots->indirect_base);
5601         set_temp_val_reg(s, ots, oreg);
5602     }
5603 
5604     /* Promote dup2 of immediates to dupi_vec. */
5605     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5606         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5607         MemOp vece = MO_64;
5608 
5609         if (val == dup_const(MO_8, val)) {
5610             vece = MO_8;
5611         } else if (val == dup_const(MO_16, val)) {
5612             vece = MO_16;
5613         } else if (val == dup_const(MO_32, val)) {
5614             vece = MO_32;
5615         }
5616 
5617         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5618         goto done;
5619     }
5620 
5621     /* If the two inputs form one 64-bit value, try dupm_vec. */
5622     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5623         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5624         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5625         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5626 
5627         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5628         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5629 
5630         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5631                              its->mem_base->reg, its->mem_offset)) {
5632             goto done;
5633         }
5634     }
5635 
5636     /* Fall back to generic expansion. */
5637     return false;
5638 
5639  done:
5640     ots->mem_coherent = 0;
5641     if (IS_DEAD_ARG(1)) {
5642         temp_dead(s, itsl);
5643     }
5644     if (IS_DEAD_ARG(2)) {
5645         temp_dead(s, itsh);
5646     }
5647     if (NEED_SYNC_ARG(0)) {
5648         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5649     } else if (IS_DEAD_ARG(0)) {
5650         temp_dead(s, ots);
5651     }
5652     return true;
5653 }
5654 
5655 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5656                          TCGRegSet allocated_regs)
5657 {
5658     if (ts->val_type == TEMP_VAL_REG) {
5659         if (ts->reg != reg) {
5660             tcg_reg_free(s, reg, allocated_regs);
5661             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5662                 /*
5663                  * Cross register class move not supported.  Sync the
5664                  * temp back to its slot and load from there.
5665                  */
5666                 temp_sync(s, ts, allocated_regs, 0, 0);
5667                 tcg_out_ld(s, ts->type, reg,
5668                            ts->mem_base->reg, ts->mem_offset);
5669             }
5670         }
5671     } else {
5672         TCGRegSet arg_set = 0;
5673 
5674         tcg_reg_free(s, reg, allocated_regs);
5675         tcg_regset_set_reg(arg_set, reg);
5676         temp_load(s, ts, arg_set, allocated_regs, 0);
5677     }
5678 }
5679 
5680 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5681                          TCGRegSet allocated_regs)
5682 {
5683     /*
5684      * When the destination is on the stack, load up the temp and store.
5685      * If there are many call-saved registers, the temp might live to
5686      * see another use; otherwise it'll be discarded.
5687      */
5688     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5689     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5690                arg_slot_stk_ofs(arg_slot));
5691 }
5692 
5693 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5694                             TCGTemp *ts, TCGRegSet *allocated_regs)
5695 {
5696     if (arg_slot_reg_p(l->arg_slot)) {
5697         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5698         load_arg_reg(s, reg, ts, *allocated_regs);
5699         tcg_regset_set_reg(*allocated_regs, reg);
5700     } else {
5701         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5702     }
5703 }
5704 
5705 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5706                          intptr_t ref_off, TCGRegSet *allocated_regs)
5707 {
5708     TCGReg reg;
5709 
5710     if (arg_slot_reg_p(arg_slot)) {
5711         reg = tcg_target_call_iarg_regs[arg_slot];
5712         tcg_reg_free(s, reg, *allocated_regs);
5713         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5714         tcg_regset_set_reg(*allocated_regs, reg);
5715     } else {
5716         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5717                             *allocated_regs, 0, false);
5718         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5719         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5720                    arg_slot_stk_ofs(arg_slot));
5721     }
5722 }
5723 
5724 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5725 {
5726     const int nb_oargs = TCGOP_CALLO(op);
5727     const int nb_iargs = TCGOP_CALLI(op);
5728     const TCGLifeData arg_life = op->life;
5729     const TCGHelperInfo *info = tcg_call_info(op);
5730     TCGRegSet allocated_regs = s->reserved_regs;
5731     int i;
5732 
5733     /*
5734      * Move inputs into place in reverse order,
5735      * so that we place stacked arguments first.
5736      */
5737     for (i = nb_iargs - 1; i >= 0; --i) {
5738         const TCGCallArgumentLoc *loc = &info->in[i];
5739         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5740 
5741         switch (loc->kind) {
5742         case TCG_CALL_ARG_NORMAL:
5743         case TCG_CALL_ARG_EXTEND_U:
5744         case TCG_CALL_ARG_EXTEND_S:
5745             load_arg_normal(s, loc, ts, &allocated_regs);
5746             break;
5747         case TCG_CALL_ARG_BY_REF:
5748             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5749             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5750                          arg_slot_stk_ofs(loc->ref_slot),
5751                          &allocated_regs);
5752             break;
5753         case TCG_CALL_ARG_BY_REF_N:
5754             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5755             break;
5756         default:
5757             g_assert_not_reached();
5758         }
5759     }
5760 
5761     /* Mark dead temporaries and free the associated registers.  */
5762     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5763         if (IS_DEAD_ARG(i)) {
5764             temp_dead(s, arg_temp(op->args[i]));
5765         }
5766     }
5767 
5768     /* Clobber call registers.  */
5769     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5770         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5771             tcg_reg_free(s, i, allocated_regs);
5772         }
5773     }
5774 
5775     /*
5776      * Save globals if they might be written by the helper,
5777      * sync them if they might be read.
5778      */
5779     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5780         /* Nothing to do */
5781     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5782         sync_globals(s, allocated_regs);
5783     } else {
5784         save_globals(s, allocated_regs);
5785     }
5786 
5787     /*
5788      * If the ABI passes a pointer to the returned struct as the first
5789      * argument, load that now.  Pass a pointer to the output home slot.
5790      */
5791     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5792         TCGTemp *ts = arg_temp(op->args[0]);
5793 
5794         if (!ts->mem_allocated) {
5795             temp_allocate_frame(s, ts);
5796         }
5797         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5798     }
5799 
5800     tcg_out_call(s, tcg_call_func(op), info);
5801 
5802     /* Assign output registers and emit moves if needed.  */
5803     switch (info->out_kind) {
5804     case TCG_CALL_RET_NORMAL:
5805         for (i = 0; i < nb_oargs; i++) {
5806             TCGTemp *ts = arg_temp(op->args[i]);
5807             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5808 
5809             /* ENV should not be modified.  */
5810             tcg_debug_assert(!temp_readonly(ts));
5811 
5812             set_temp_val_reg(s, ts, reg);
5813             ts->mem_coherent = 0;
5814         }
5815         break;
5816 
5817     case TCG_CALL_RET_BY_VEC:
5818         {
5819             TCGTemp *ts = arg_temp(op->args[0]);
5820 
5821             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5822             tcg_debug_assert(ts->temp_subindex == 0);
5823             if (!ts->mem_allocated) {
5824                 temp_allocate_frame(s, ts);
5825             }
5826             tcg_out_st(s, TCG_TYPE_V128,
5827                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5828                        ts->mem_base->reg, ts->mem_offset);
5829         }
5830         /* fall through to mark all parts in memory */
5831 
5832     case TCG_CALL_RET_BY_REF:
5833         /* The callee has performed a write through the reference. */
5834         for (i = 0; i < nb_oargs; i++) {
5835             TCGTemp *ts = arg_temp(op->args[i]);
5836             ts->val_type = TEMP_VAL_MEM;
5837         }
5838         break;
5839 
5840     default:
5841         g_assert_not_reached();
5842     }
5843 
5844     /* Flush or discard output registers as needed. */
5845     for (i = 0; i < nb_oargs; i++) {
5846         TCGTemp *ts = arg_temp(op->args[i]);
5847         if (NEED_SYNC_ARG(i)) {
5848             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5849         } else if (IS_DEAD_ARG(i)) {
5850             temp_dead(s, ts);
5851         }
5852     }
5853 }
5854 
5855 /**
5856  * atom_and_align_for_opc:
5857  * @s: tcg context
5858  * @opc: memory operation code
5859  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5860  * @allow_two_ops: true if we are prepared to issue two operations
5861  *
5862  * Return the alignment and atomicity to use for the inline fast path
5863  * for the given memory operation.  The alignment may be larger than
5864  * that specified in @opc, and the correct alignment will be diagnosed
5865  * by the slow path helper.
5866  *
5867  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5868  * and issue two loads or stores for subalignment.
5869  */
5870 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5871                                            MemOp host_atom, bool allow_two_ops)
5872 {
5873     MemOp align = memop_alignment_bits(opc);
5874     MemOp size = opc & MO_SIZE;
5875     MemOp half = size ? size - 1 : 0;
5876     MemOp atom = opc & MO_ATOM_MASK;
5877     MemOp atmax;
5878 
5879     switch (atom) {
5880     case MO_ATOM_NONE:
5881         /* The operation requires no specific atomicity. */
5882         atmax = MO_8;
5883         break;
5884 
5885     case MO_ATOM_IFALIGN:
5886         atmax = size;
5887         break;
5888 
5889     case MO_ATOM_IFALIGN_PAIR:
5890         atmax = half;
5891         break;
5892 
5893     case MO_ATOM_WITHIN16:
5894         atmax = size;
5895         if (size == MO_128) {
5896             /* Misalignment implies !within16, and therefore no atomicity. */
5897         } else if (host_atom != MO_ATOM_WITHIN16) {
5898             /* The host does not implement within16, so require alignment. */
5899             align = MAX(align, size);
5900         }
5901         break;
5902 
5903     case MO_ATOM_WITHIN16_PAIR:
5904         atmax = size;
5905         /*
5906          * Misalignment implies !within16, and therefore half atomicity.
5907          * Any host prepared for two operations can implement this with
5908          * half alignment.
5909          */
5910         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5911             align = MAX(align, half);
5912         }
5913         break;
5914 
5915     case MO_ATOM_SUBALIGN:
5916         atmax = size;
5917         if (host_atom != MO_ATOM_SUBALIGN) {
5918             /* If unaligned but not odd, there are subobjects up to half. */
5919             if (allow_two_ops) {
5920                 align = MAX(align, half);
5921             } else {
5922                 align = MAX(align, size);
5923             }
5924         }
5925         break;
5926 
5927     default:
5928         g_assert_not_reached();
5929     }
5930 
5931     return (TCGAtomAlign){ .atom = atmax, .align = align };
5932 }
5933 
5934 /*
5935  * Similarly for qemu_ld/st slow path helpers.
5936  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5937  * using only the provided backend tcg_out_* functions.
5938  */
5939 
5940 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5941 {
5942     int ofs = arg_slot_stk_ofs(slot);
5943 
5944     /*
5945      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5946      * require extension to uint64_t, adjust the address for uint32_t.
5947      */
5948     if (HOST_BIG_ENDIAN &&
5949         TCG_TARGET_REG_BITS == 64 &&
5950         type == TCG_TYPE_I32) {
5951         ofs += 4;
5952     }
5953     return ofs;
5954 }
5955 
5956 static void tcg_out_helper_load_slots(TCGContext *s,
5957                                       unsigned nmov, TCGMovExtend *mov,
5958                                       const TCGLdstHelperParam *parm)
5959 {
5960     unsigned i;
5961     TCGReg dst3;
5962 
5963     /*
5964      * Start from the end, storing to the stack first.
5965      * This frees those registers, so we need not consider overlap.
5966      */
5967     for (i = nmov; i-- > 0; ) {
5968         unsigned slot = mov[i].dst;
5969 
5970         if (arg_slot_reg_p(slot)) {
5971             goto found_reg;
5972         }
5973 
5974         TCGReg src = mov[i].src;
5975         TCGType dst_type = mov[i].dst_type;
5976         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5977 
5978         /* The argument is going onto the stack; extend into scratch. */
5979         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5980             tcg_debug_assert(parm->ntmp != 0);
5981             mov[i].dst = src = parm->tmp[0];
5982             tcg_out_movext1(s, &mov[i]);
5983         }
5984 
5985         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5986                    tcg_out_helper_stk_ofs(dst_type, slot));
5987     }
5988     return;
5989 
5990  found_reg:
5991     /*
5992      * The remaining arguments are in registers.
5993      * Convert slot numbers to argument registers.
5994      */
5995     nmov = i + 1;
5996     for (i = 0; i < nmov; ++i) {
5997         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5998     }
5999 
6000     switch (nmov) {
6001     case 4:
6002         /* The backend must have provided enough temps for the worst case. */
6003         tcg_debug_assert(parm->ntmp >= 2);
6004 
6005         dst3 = mov[3].dst;
6006         for (unsigned j = 0; j < 3; ++j) {
6007             if (dst3 == mov[j].src) {
6008                 /*
6009                  * Conflict. Copy the source to a temporary, perform the
6010                  * remaining moves, then the extension from our scratch
6011                  * on the way out.
6012                  */
6013                 TCGReg scratch = parm->tmp[1];
6014 
6015                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
6016                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
6017                 tcg_out_movext1_new_src(s, &mov[3], scratch);
6018                 break;
6019             }
6020         }
6021 
6022         /* No conflicts: perform this move and continue. */
6023         tcg_out_movext1(s, &mov[3]);
6024         /* fall through */
6025 
6026     case 3:
6027         tcg_out_movext3(s, mov, mov + 1, mov + 2,
6028                         parm->ntmp ? parm->tmp[0] : -1);
6029         break;
6030     case 2:
6031         tcg_out_movext2(s, mov, mov + 1,
6032                         parm->ntmp ? parm->tmp[0] : -1);
6033         break;
6034     case 1:
6035         tcg_out_movext1(s, mov);
6036         break;
6037     default:
6038         g_assert_not_reached();
6039     }
6040 }
6041 
6042 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
6043                                     TCGType type, tcg_target_long imm,
6044                                     const TCGLdstHelperParam *parm)
6045 {
6046     if (arg_slot_reg_p(slot)) {
6047         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
6048     } else {
6049         int ofs = tcg_out_helper_stk_ofs(type, slot);
6050         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
6051             tcg_debug_assert(parm->ntmp != 0);
6052             tcg_out_movi(s, type, parm->tmp[0], imm);
6053             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
6054         }
6055     }
6056 }
6057 
6058 static void tcg_out_helper_load_common_args(TCGContext *s,
6059                                             const TCGLabelQemuLdst *ldst,
6060                                             const TCGLdstHelperParam *parm,
6061                                             const TCGHelperInfo *info,
6062                                             unsigned next_arg)
6063 {
6064     TCGMovExtend ptr_mov = {
6065         .dst_type = TCG_TYPE_PTR,
6066         .src_type = TCG_TYPE_PTR,
6067         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6068     };
6069     const TCGCallArgumentLoc *loc = &info->in[0];
6070     TCGType type;
6071     unsigned slot;
6072     tcg_target_ulong imm;
6073 
6074     /*
6075      * Handle env, which is always first.
6076      */
6077     ptr_mov.dst = loc->arg_slot;
6078     ptr_mov.src = TCG_AREG0;
6079     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6080 
6081     /*
6082      * Handle oi.
6083      */
6084     imm = ldst->oi;
6085     loc = &info->in[next_arg];
6086     type = TCG_TYPE_I32;
6087     switch (loc->kind) {
6088     case TCG_CALL_ARG_NORMAL:
6089         break;
6090     case TCG_CALL_ARG_EXTEND_U:
6091     case TCG_CALL_ARG_EXTEND_S:
6092         /* No extension required for MemOpIdx. */
6093         tcg_debug_assert(imm <= INT32_MAX);
6094         type = TCG_TYPE_REG;
6095         break;
6096     default:
6097         g_assert_not_reached();
6098     }
6099     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6100     next_arg++;
6101 
6102     /*
6103      * Handle ra.
6104      */
6105     loc = &info->in[next_arg];
6106     slot = loc->arg_slot;
6107     if (parm->ra_gen) {
6108         int arg_reg = -1;
6109         TCGReg ra_reg;
6110 
6111         if (arg_slot_reg_p(slot)) {
6112             arg_reg = tcg_target_call_iarg_regs[slot];
6113         }
6114         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6115 
6116         ptr_mov.dst = slot;
6117         ptr_mov.src = ra_reg;
6118         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6119     } else {
6120         imm = (uintptr_t)ldst->raddr;
6121         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6122     }
6123 }
6124 
6125 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6126                                        const TCGCallArgumentLoc *loc,
6127                                        TCGType dst_type, TCGType src_type,
6128                                        TCGReg lo, TCGReg hi)
6129 {
6130     MemOp reg_mo;
6131 
6132     if (dst_type <= TCG_TYPE_REG) {
6133         MemOp src_ext;
6134 
6135         switch (loc->kind) {
6136         case TCG_CALL_ARG_NORMAL:
6137             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6138             break;
6139         case TCG_CALL_ARG_EXTEND_U:
6140             dst_type = TCG_TYPE_REG;
6141             src_ext = MO_UL;
6142             break;
6143         case TCG_CALL_ARG_EXTEND_S:
6144             dst_type = TCG_TYPE_REG;
6145             src_ext = MO_SL;
6146             break;
6147         default:
6148             g_assert_not_reached();
6149         }
6150 
6151         mov[0].dst = loc->arg_slot;
6152         mov[0].dst_type = dst_type;
6153         mov[0].src = lo;
6154         mov[0].src_type = src_type;
6155         mov[0].src_ext = src_ext;
6156         return 1;
6157     }
6158 
6159     if (TCG_TARGET_REG_BITS == 32) {
6160         assert(dst_type == TCG_TYPE_I64);
6161         reg_mo = MO_32;
6162     } else {
6163         assert(dst_type == TCG_TYPE_I128);
6164         reg_mo = MO_64;
6165     }
6166 
6167     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6168     mov[0].src = lo;
6169     mov[0].dst_type = TCG_TYPE_REG;
6170     mov[0].src_type = TCG_TYPE_REG;
6171     mov[0].src_ext = reg_mo;
6172 
6173     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6174     mov[1].src = hi;
6175     mov[1].dst_type = TCG_TYPE_REG;
6176     mov[1].src_type = TCG_TYPE_REG;
6177     mov[1].src_ext = reg_mo;
6178 
6179     return 2;
6180 }
6181 
6182 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6183                                    const TCGLdstHelperParam *parm)
6184 {
6185     const TCGHelperInfo *info;
6186     const TCGCallArgumentLoc *loc;
6187     TCGMovExtend mov[2];
6188     unsigned next_arg, nmov;
6189     MemOp mop = get_memop(ldst->oi);
6190 
6191     switch (mop & MO_SIZE) {
6192     case MO_8:
6193     case MO_16:
6194     case MO_32:
6195         info = &info_helper_ld32_mmu;
6196         break;
6197     case MO_64:
6198         info = &info_helper_ld64_mmu;
6199         break;
6200     case MO_128:
6201         info = &info_helper_ld128_mmu;
6202         break;
6203     default:
6204         g_assert_not_reached();
6205     }
6206 
6207     /* Defer env argument. */
6208     next_arg = 1;
6209 
6210     loc = &info->in[next_arg];
6211     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6212         /*
6213          * 32-bit host with 32-bit guest: zero-extend the guest address
6214          * to 64-bits for the helper by storing the low part, then
6215          * load a zero for the high part.
6216          */
6217         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6218                                TCG_TYPE_I32, TCG_TYPE_I32,
6219                                ldst->addr_reg, -1);
6220         tcg_out_helper_load_slots(s, 1, mov, parm);
6221 
6222         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6223                                 TCG_TYPE_I32, 0, parm);
6224         next_arg += 2;
6225     } else {
6226         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6227                                       ldst->addr_reg, -1);
6228         tcg_out_helper_load_slots(s, nmov, mov, parm);
6229         next_arg += nmov;
6230     }
6231 
6232     switch (info->out_kind) {
6233     case TCG_CALL_RET_NORMAL:
6234     case TCG_CALL_RET_BY_VEC:
6235         break;
6236     case TCG_CALL_RET_BY_REF:
6237         /*
6238          * The return reference is in the first argument slot.
6239          * We need memory in which to return: re-use the top of stack.
6240          */
6241         {
6242             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6243 
6244             if (arg_slot_reg_p(0)) {
6245                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6246                                  TCG_REG_CALL_STACK, ofs_slot0);
6247             } else {
6248                 tcg_debug_assert(parm->ntmp != 0);
6249                 tcg_out_addi_ptr(s, parm->tmp[0],
6250                                  TCG_REG_CALL_STACK, ofs_slot0);
6251                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6252                            TCG_REG_CALL_STACK, ofs_slot0);
6253             }
6254         }
6255         break;
6256     default:
6257         g_assert_not_reached();
6258     }
6259 
6260     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6261 }
6262 
6263 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6264                                   bool load_sign,
6265                                   const TCGLdstHelperParam *parm)
6266 {
6267     MemOp mop = get_memop(ldst->oi);
6268     TCGMovExtend mov[2];
6269     int ofs_slot0;
6270 
6271     switch (ldst->type) {
6272     case TCG_TYPE_I64:
6273         if (TCG_TARGET_REG_BITS == 32) {
6274             break;
6275         }
6276         /* fall through */
6277 
6278     case TCG_TYPE_I32:
6279         mov[0].dst = ldst->datalo_reg;
6280         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6281         mov[0].dst_type = ldst->type;
6282         mov[0].src_type = TCG_TYPE_REG;
6283 
6284         /*
6285          * If load_sign, then we allowed the helper to perform the
6286          * appropriate sign extension to tcg_target_ulong, and all
6287          * we need now is a plain move.
6288          *
6289          * If they do not, then we expect the relevant extension
6290          * instruction to be no more expensive than a move, and
6291          * we thus save the icache etc by only using one of two
6292          * helper functions.
6293          */
6294         if (load_sign || !(mop & MO_SIGN)) {
6295             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6296                 mov[0].src_ext = MO_32;
6297             } else {
6298                 mov[0].src_ext = MO_64;
6299             }
6300         } else {
6301             mov[0].src_ext = mop & MO_SSIZE;
6302         }
6303         tcg_out_movext1(s, mov);
6304         return;
6305 
6306     case TCG_TYPE_I128:
6307         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6308         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6309         switch (TCG_TARGET_CALL_RET_I128) {
6310         case TCG_CALL_RET_NORMAL:
6311             break;
6312         case TCG_CALL_RET_BY_VEC:
6313             tcg_out_st(s, TCG_TYPE_V128,
6314                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6315                        TCG_REG_CALL_STACK, ofs_slot0);
6316             /* fall through */
6317         case TCG_CALL_RET_BY_REF:
6318             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6319                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6320             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6321                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6322             return;
6323         default:
6324             g_assert_not_reached();
6325         }
6326         break;
6327 
6328     default:
6329         g_assert_not_reached();
6330     }
6331 
6332     mov[0].dst = ldst->datalo_reg;
6333     mov[0].src =
6334         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6335     mov[0].dst_type = TCG_TYPE_REG;
6336     mov[0].src_type = TCG_TYPE_REG;
6337     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6338 
6339     mov[1].dst = ldst->datahi_reg;
6340     mov[1].src =
6341         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6342     mov[1].dst_type = TCG_TYPE_REG;
6343     mov[1].src_type = TCG_TYPE_REG;
6344     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6345 
6346     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6347 }
6348 
6349 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6350                                    const TCGLdstHelperParam *parm)
6351 {
6352     const TCGHelperInfo *info;
6353     const TCGCallArgumentLoc *loc;
6354     TCGMovExtend mov[4];
6355     TCGType data_type;
6356     unsigned next_arg, nmov, n;
6357     MemOp mop = get_memop(ldst->oi);
6358 
6359     switch (mop & MO_SIZE) {
6360     case MO_8:
6361     case MO_16:
6362     case MO_32:
6363         info = &info_helper_st32_mmu;
6364         data_type = TCG_TYPE_I32;
6365         break;
6366     case MO_64:
6367         info = &info_helper_st64_mmu;
6368         data_type = TCG_TYPE_I64;
6369         break;
6370     case MO_128:
6371         info = &info_helper_st128_mmu;
6372         data_type = TCG_TYPE_I128;
6373         break;
6374     default:
6375         g_assert_not_reached();
6376     }
6377 
6378     /* Defer env argument. */
6379     next_arg = 1;
6380     nmov = 0;
6381 
6382     /* Handle addr argument. */
6383     loc = &info->in[next_arg];
6384     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6385     if (TCG_TARGET_REG_BITS == 32) {
6386         /*
6387          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6388          * to 64-bits for the helper by storing the low part.  Later,
6389          * after we have processed the register inputs, we will load a
6390          * zero for the high part.
6391          */
6392         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6393                                TCG_TYPE_I32, TCG_TYPE_I32,
6394                                ldst->addr_reg, -1);
6395         next_arg += 2;
6396         nmov += 1;
6397     } else {
6398         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6399                                    ldst->addr_reg, -1);
6400         next_arg += n;
6401         nmov += n;
6402     }
6403 
6404     /* Handle data argument. */
6405     loc = &info->in[next_arg];
6406     switch (loc->kind) {
6407     case TCG_CALL_ARG_NORMAL:
6408     case TCG_CALL_ARG_EXTEND_U:
6409     case TCG_CALL_ARG_EXTEND_S:
6410         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6411                                    ldst->datalo_reg, ldst->datahi_reg);
6412         next_arg += n;
6413         nmov += n;
6414         tcg_out_helper_load_slots(s, nmov, mov, parm);
6415         break;
6416 
6417     case TCG_CALL_ARG_BY_REF:
6418         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6419         tcg_debug_assert(data_type == TCG_TYPE_I128);
6420         tcg_out_st(s, TCG_TYPE_I64,
6421                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6422                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6423         tcg_out_st(s, TCG_TYPE_I64,
6424                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6425                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6426 
6427         tcg_out_helper_load_slots(s, nmov, mov, parm);
6428 
6429         if (arg_slot_reg_p(loc->arg_slot)) {
6430             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6431                              TCG_REG_CALL_STACK,
6432                              arg_slot_stk_ofs(loc->ref_slot));
6433         } else {
6434             tcg_debug_assert(parm->ntmp != 0);
6435             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6436                              arg_slot_stk_ofs(loc->ref_slot));
6437             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6438                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6439         }
6440         next_arg += 2;
6441         break;
6442 
6443     default:
6444         g_assert_not_reached();
6445     }
6446 
6447     if (TCG_TARGET_REG_BITS == 32) {
6448         /* Zero extend the address by loading a zero for the high part. */
6449         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6450         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6451     }
6452 
6453     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6454 }
6455 
6456 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6457 {
6458     int i, start_words, num_insns;
6459     TCGOp *op;
6460 
6461     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6462                  && qemu_log_in_addr_range(pc_start))) {
6463         FILE *logfile = qemu_log_trylock();
6464         if (logfile) {
6465             fprintf(logfile, "OP:\n");
6466             tcg_dump_ops(s, logfile, false);
6467             fprintf(logfile, "\n");
6468             qemu_log_unlock(logfile);
6469         }
6470     }
6471 
6472 #ifdef CONFIG_DEBUG_TCG
6473     /* Ensure all labels referenced have been emitted.  */
6474     {
6475         TCGLabel *l;
6476         bool error = false;
6477 
6478         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6479             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6480                 qemu_log_mask(CPU_LOG_TB_OP,
6481                               "$L%d referenced but not present.\n", l->id);
6482                 error = true;
6483             }
6484         }
6485         assert(!error);
6486     }
6487 #endif
6488 
6489     /* Do not reuse any EBB that may be allocated within the TB. */
6490     tcg_temp_ebb_reset_freed(s);
6491 
6492     tcg_optimize(s);
6493 
6494     reachable_code_pass(s);
6495     liveness_pass_0(s);
6496     liveness_pass_1(s);
6497 
6498     if (s->nb_indirects > 0) {
6499         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6500                      && qemu_log_in_addr_range(pc_start))) {
6501             FILE *logfile = qemu_log_trylock();
6502             if (logfile) {
6503                 fprintf(logfile, "OP before indirect lowering:\n");
6504                 tcg_dump_ops(s, logfile, false);
6505                 fprintf(logfile, "\n");
6506                 qemu_log_unlock(logfile);
6507             }
6508         }
6509 
6510         /* Replace indirect temps with direct temps.  */
6511         if (liveness_pass_2(s)) {
6512             /* If changes were made, re-run liveness.  */
6513             liveness_pass_1(s);
6514         }
6515     }
6516 
6517     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6518                  && qemu_log_in_addr_range(pc_start))) {
6519         FILE *logfile = qemu_log_trylock();
6520         if (logfile) {
6521             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6522             tcg_dump_ops(s, logfile, true);
6523             fprintf(logfile, "\n");
6524             qemu_log_unlock(logfile);
6525         }
6526     }
6527 
6528     /* Initialize goto_tb jump offsets. */
6529     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6530     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6531     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6532     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6533 
6534     tcg_reg_alloc_start(s);
6535 
6536     /*
6537      * Reset the buffer pointers when restarting after overflow.
6538      * TODO: Move this into translate-all.c with the rest of the
6539      * buffer management.  Having only this done here is confusing.
6540      */
6541     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6542     s->code_ptr = s->code_buf;
6543     s->data_gen_ptr = NULL;
6544 
6545     QSIMPLEQ_INIT(&s->ldst_labels);
6546     s->pool_labels = NULL;
6547 
6548     start_words = s->insn_start_words;
6549     s->gen_insn_data =
6550         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6551 
6552     tcg_out_tb_start(s);
6553 
6554     num_insns = -1;
6555     QTAILQ_FOREACH(op, &s->ops, link) {
6556         TCGOpcode opc = op->opc;
6557 
6558         switch (opc) {
6559         case INDEX_op_mov:
6560         case INDEX_op_mov_vec:
6561             tcg_reg_alloc_mov(s, op);
6562             break;
6563         case INDEX_op_dup_vec:
6564             tcg_reg_alloc_dup(s, op);
6565             break;
6566         case INDEX_op_insn_start:
6567             if (num_insns >= 0) {
6568                 size_t off = tcg_current_code_size(s);
6569                 s->gen_insn_end_off[num_insns] = off;
6570                 /* Assert that we do not overflow our stored offset.  */
6571                 assert(s->gen_insn_end_off[num_insns] == off);
6572             }
6573             num_insns++;
6574             for (i = 0; i < start_words; ++i) {
6575                 s->gen_insn_data[num_insns * start_words + i] =
6576                     tcg_get_insn_start_param(op, i);
6577             }
6578             break;
6579         case INDEX_op_discard:
6580             temp_dead(s, arg_temp(op->args[0]));
6581             break;
6582         case INDEX_op_set_label:
6583             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6584             tcg_out_label(s, arg_label(op->args[0]));
6585             break;
6586         case INDEX_op_call:
6587             tcg_reg_alloc_call(s, op);
6588             break;
6589         case INDEX_op_exit_tb:
6590             tcg_out_exit_tb(s, op->args[0]);
6591             break;
6592         case INDEX_op_goto_tb:
6593             tcg_out_goto_tb(s, op->args[0]);
6594             break;
6595         case INDEX_op_dup2_vec:
6596             if (tcg_reg_alloc_dup2(s, op)) {
6597                 break;
6598             }
6599             /* fall through */
6600         default:
6601             /* Sanity check that we've not introduced any unhandled opcodes. */
6602             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6603                                               TCGOP_FLAGS(op)));
6604             /* Note: in order to speed up the code, it would be much
6605                faster to have specialized register allocator functions for
6606                some common argument patterns */
6607             tcg_reg_alloc_op(s, op);
6608             break;
6609         }
6610         /* Test for (pending) buffer overflow.  The assumption is that any
6611            one operation beginning below the high water mark cannot overrun
6612            the buffer completely.  Thus we can test for overflow after
6613            generating code without having to check during generation.  */
6614         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6615             return -1;
6616         }
6617         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6618         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6619             return -2;
6620         }
6621     }
6622     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6623     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6624 
6625     /* Generate TB finalization at the end of block */
6626     i = tcg_out_ldst_finalize(s);
6627     if (i < 0) {
6628         return i;
6629     }
6630     i = tcg_out_pool_finalize(s);
6631     if (i < 0) {
6632         return i;
6633     }
6634     if (!tcg_resolve_relocs(s)) {
6635         return -2;
6636     }
6637 
6638 #ifndef CONFIG_TCG_INTERPRETER
6639     /* flush instruction cache */
6640     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6641                         (uintptr_t)s->code_buf,
6642                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6643 #endif
6644 
6645     return tcg_current_code_size(s);
6646 }
6647 
6648 #ifdef ELF_HOST_MACHINE
6649 /* In order to use this feature, the backend needs to do three things:
6650 
6651    (1) Define ELF_HOST_MACHINE to indicate both what value to
6652        put into the ELF image and to indicate support for the feature.
6653 
6654    (2) Define tcg_register_jit.  This should create a buffer containing
6655        the contents of a .debug_frame section that describes the post-
6656        prologue unwind info for the tcg machine.
6657 
6658    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6659 */
6660 
6661 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6662 typedef enum {
6663     JIT_NOACTION = 0,
6664     JIT_REGISTER_FN,
6665     JIT_UNREGISTER_FN
6666 } jit_actions_t;
6667 
6668 struct jit_code_entry {
6669     struct jit_code_entry *next_entry;
6670     struct jit_code_entry *prev_entry;
6671     const void *symfile_addr;
6672     uint64_t symfile_size;
6673 };
6674 
6675 struct jit_descriptor {
6676     uint32_t version;
6677     uint32_t action_flag;
6678     struct jit_code_entry *relevant_entry;
6679     struct jit_code_entry *first_entry;
6680 };
6681 
6682 void __jit_debug_register_code(void) __attribute__((noinline));
6683 void __jit_debug_register_code(void)
6684 {
6685     asm("");
6686 }
6687 
6688 /* Must statically initialize the version, because GDB may check
6689    the version before we can set it.  */
6690 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6691 
6692 /* End GDB interface.  */
6693 
6694 static int find_string(const char *strtab, const char *str)
6695 {
6696     const char *p = strtab + 1;
6697 
6698     while (1) {
6699         if (strcmp(p, str) == 0) {
6700             return p - strtab;
6701         }
6702         p += strlen(p) + 1;
6703     }
6704 }
6705 
6706 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6707                                  const void *debug_frame,
6708                                  size_t debug_frame_size)
6709 {
6710     struct __attribute__((packed)) DebugInfo {
6711         uint32_t  len;
6712         uint16_t  version;
6713         uint32_t  abbrev;
6714         uint8_t   ptr_size;
6715         uint8_t   cu_die;
6716         uint16_t  cu_lang;
6717         uintptr_t cu_low_pc;
6718         uintptr_t cu_high_pc;
6719         uint8_t   fn_die;
6720         char      fn_name[16];
6721         uintptr_t fn_low_pc;
6722         uintptr_t fn_high_pc;
6723         uint8_t   cu_eoc;
6724     };
6725 
6726     struct ElfImage {
6727         ElfW(Ehdr) ehdr;
6728         ElfW(Phdr) phdr;
6729         ElfW(Shdr) shdr[7];
6730         ElfW(Sym)  sym[2];
6731         struct DebugInfo di;
6732         uint8_t    da[24];
6733         char       str[80];
6734     };
6735 
6736     struct ElfImage *img;
6737 
6738     static const struct ElfImage img_template = {
6739         .ehdr = {
6740             .e_ident[EI_MAG0] = ELFMAG0,
6741             .e_ident[EI_MAG1] = ELFMAG1,
6742             .e_ident[EI_MAG2] = ELFMAG2,
6743             .e_ident[EI_MAG3] = ELFMAG3,
6744             .e_ident[EI_CLASS] = ELF_CLASS,
6745             .e_ident[EI_DATA] = ELF_DATA,
6746             .e_ident[EI_VERSION] = EV_CURRENT,
6747             .e_type = ET_EXEC,
6748             .e_machine = ELF_HOST_MACHINE,
6749             .e_version = EV_CURRENT,
6750             .e_phoff = offsetof(struct ElfImage, phdr),
6751             .e_shoff = offsetof(struct ElfImage, shdr),
6752             .e_ehsize = sizeof(ElfW(Shdr)),
6753             .e_phentsize = sizeof(ElfW(Phdr)),
6754             .e_phnum = 1,
6755             .e_shentsize = sizeof(ElfW(Shdr)),
6756             .e_shnum = ARRAY_SIZE(img->shdr),
6757             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6758 #ifdef ELF_HOST_FLAGS
6759             .e_flags = ELF_HOST_FLAGS,
6760 #endif
6761 #ifdef ELF_OSABI
6762             .e_ident[EI_OSABI] = ELF_OSABI,
6763 #endif
6764         },
6765         .phdr = {
6766             .p_type = PT_LOAD,
6767             .p_flags = PF_X,
6768         },
6769         .shdr = {
6770             [0] = { .sh_type = SHT_NULL },
6771             /* Trick: The contents of code_gen_buffer are not present in
6772                this fake ELF file; that got allocated elsewhere.  Therefore
6773                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6774                will not look for contents.  We can record any address.  */
6775             [1] = { /* .text */
6776                 .sh_type = SHT_NOBITS,
6777                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6778             },
6779             [2] = { /* .debug_info */
6780                 .sh_type = SHT_PROGBITS,
6781                 .sh_offset = offsetof(struct ElfImage, di),
6782                 .sh_size = sizeof(struct DebugInfo),
6783             },
6784             [3] = { /* .debug_abbrev */
6785                 .sh_type = SHT_PROGBITS,
6786                 .sh_offset = offsetof(struct ElfImage, da),
6787                 .sh_size = sizeof(img->da),
6788             },
6789             [4] = { /* .debug_frame */
6790                 .sh_type = SHT_PROGBITS,
6791                 .sh_offset = sizeof(struct ElfImage),
6792             },
6793             [5] = { /* .symtab */
6794                 .sh_type = SHT_SYMTAB,
6795                 .sh_offset = offsetof(struct ElfImage, sym),
6796                 .sh_size = sizeof(img->sym),
6797                 .sh_info = 1,
6798                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6799                 .sh_entsize = sizeof(ElfW(Sym)),
6800             },
6801             [6] = { /* .strtab */
6802                 .sh_type = SHT_STRTAB,
6803                 .sh_offset = offsetof(struct ElfImage, str),
6804                 .sh_size = sizeof(img->str),
6805             }
6806         },
6807         .sym = {
6808             [1] = { /* code_gen_buffer */
6809                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6810                 .st_shndx = 1,
6811             }
6812         },
6813         .di = {
6814             .len = sizeof(struct DebugInfo) - 4,
6815             .version = 2,
6816             .ptr_size = sizeof(void *),
6817             .cu_die = 1,
6818             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6819             .fn_die = 2,
6820             .fn_name = "code_gen_buffer"
6821         },
6822         .da = {
6823             1,          /* abbrev number (the cu) */
6824             0x11, 1,    /* DW_TAG_compile_unit, has children */
6825             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6826             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6827             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6828             0, 0,       /* end of abbrev */
6829             2,          /* abbrev number (the fn) */
6830             0x2e, 0,    /* DW_TAG_subprogram, no children */
6831             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6832             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6833             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6834             0, 0,       /* end of abbrev */
6835             0           /* no more abbrev */
6836         },
6837         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6838                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6839     };
6840 
6841     /* We only need a single jit entry; statically allocate it.  */
6842     static struct jit_code_entry one_entry;
6843 
6844     uintptr_t buf = (uintptr_t)buf_ptr;
6845     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6846     DebugFrameHeader *dfh;
6847 
6848     img = g_malloc(img_size);
6849     *img = img_template;
6850 
6851     img->phdr.p_vaddr = buf;
6852     img->phdr.p_paddr = buf;
6853     img->phdr.p_memsz = buf_size;
6854 
6855     img->shdr[1].sh_name = find_string(img->str, ".text");
6856     img->shdr[1].sh_addr = buf;
6857     img->shdr[1].sh_size = buf_size;
6858 
6859     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6860     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6861 
6862     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6863     img->shdr[4].sh_size = debug_frame_size;
6864 
6865     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6866     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6867 
6868     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6869     img->sym[1].st_value = buf;
6870     img->sym[1].st_size = buf_size;
6871 
6872     img->di.cu_low_pc = buf;
6873     img->di.cu_high_pc = buf + buf_size;
6874     img->di.fn_low_pc = buf;
6875     img->di.fn_high_pc = buf + buf_size;
6876 
6877     dfh = (DebugFrameHeader *)(img + 1);
6878     memcpy(dfh, debug_frame, debug_frame_size);
6879     dfh->fde.func_start = buf;
6880     dfh->fde.func_len = buf_size;
6881 
6882 #ifdef DEBUG_JIT
6883     /* Enable this block to be able to debug the ELF image file creation.
6884        One can use readelf, objdump, or other inspection utilities.  */
6885     {
6886         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6887         FILE *f = fopen(jit, "w+b");
6888         if (f) {
6889             if (fwrite(img, img_size, 1, f) != img_size) {
6890                 /* Avoid stupid unused return value warning for fwrite.  */
6891             }
6892             fclose(f);
6893         }
6894     }
6895 #endif
6896 
6897     one_entry.symfile_addr = img;
6898     one_entry.symfile_size = img_size;
6899 
6900     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6901     __jit_debug_descriptor.relevant_entry = &one_entry;
6902     __jit_debug_descriptor.first_entry = &one_entry;
6903     __jit_debug_register_code();
6904 }
6905 #else
6906 /* No support for the feature.  Provide the entry point expected by exec.c,
6907    and implement the internal function we declared earlier.  */
6908 
6909 static void tcg_register_jit_int(const void *buf, size_t size,
6910                                  const void *debug_frame,
6911                                  size_t debug_frame_size)
6912 {
6913 }
6914 
6915 void tcg_register_jit(const void *buf, size_t buf_size)
6916 {
6917 }
6918 #endif /* ELF_HOST_MACHINE */
6919 
6920 #if !TCG_TARGET_MAYBE_vec
6921 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6922 {
6923     g_assert_not_reached();
6924 }
6925 #endif
6926