xref: /openbmc/qemu/tcg/tcg.c (revision b6d69fcefbd45ca33b896abfbc8e27e0f713bdf0)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpBrcond {
990     TCGOutOp base;
991     void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond,
992                    TCGReg a1, TCGReg a2, TCGLabel *label);
993     void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond,
994                    TCGReg a1, tcg_target_long a2, TCGLabel *label);
995 } TCGOutOpBrcond;
996 
997 typedef struct TCGOutOpDivRem {
998     TCGOutOp base;
999     void (*out_rr01r)(TCGContext *s, TCGType type,
1000                       TCGReg a0, TCGReg a1, TCGReg a4);
1001 } TCGOutOpDivRem;
1002 
1003 typedef struct TCGOutOpMul2 {
1004     TCGOutOp base;
1005     void (*out_rrrr)(TCGContext *s, TCGType type,
1006                      TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3);
1007 } TCGOutOpMul2;
1008 
1009 typedef struct TCGOutOpUnary {
1010     TCGOutOp base;
1011     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
1012 } TCGOutOpUnary;
1013 
1014 typedef struct TCGOutOpSetcond {
1015     TCGOutOp base;
1016     void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond,
1017                     TCGReg ret, TCGReg a1, TCGReg a2);
1018     void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond,
1019                     TCGReg ret, TCGReg a1, tcg_target_long a2);
1020 } TCGOutOpSetcond;
1021 
1022 typedef struct TCGOutOpSubtract {
1023     TCGOutOp base;
1024     void (*out_rrr)(TCGContext *s, TCGType type,
1025                     TCGReg a0, TCGReg a1, TCGReg a2);
1026     void (*out_rir)(TCGContext *s, TCGType type,
1027                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1028 } TCGOutOpSubtract;
1029 
1030 #include "tcg-target.c.inc"
1031 
1032 #ifndef CONFIG_TCG_INTERPRETER
1033 /* Validate CPUTLBDescFast placement. */
1034 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1035                         sizeof(CPUNegativeOffsetState))
1036                   < MIN_TLB_MASK_TABLE_OFS);
1037 #endif
1038 
1039 /*
1040  * Register V as the TCGOutOp for O.
1041  * This verifies that V is of type T, otherwise give a nice compiler error.
1042  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1043  */
1044 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1045 
1046 /* Register allocation descriptions for every TCGOpcode. */
1047 static const TCGOutOp * const all_outop[NB_OPS] = {
1048     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1049     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1050     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1051     OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond),
1052     OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz),
1053     OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop),
1054     OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz),
1055     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1056     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1057     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1058     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1059     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1060     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1061     OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2),
1062     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1063     OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2),
1064     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1065     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1066     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1067     OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond),
1068     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1069     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1070     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1071     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1072     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1073     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1074     OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl),
1075     OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr),
1076     OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1077     OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond),
1078     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1079     OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1080     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1081     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1082 };
1083 
1084 #undef OUTOP
1085 
1086 /*
1087  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1088  * and registered the target's TCG globals) must register with this function
1089  * before initiating translation.
1090  *
1091  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1092  * of tcg_region_init() for the reasoning behind this.
1093  *
1094  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1095  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1096  * is not used anymore for translation once this function is called.
1097  *
1098  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1099  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1100  * modes.
1101  */
1102 #ifdef CONFIG_USER_ONLY
1103 void tcg_register_thread(void)
1104 {
1105     tcg_ctx = &tcg_init_ctx;
1106 }
1107 #else
1108 void tcg_register_thread(void)
1109 {
1110     TCGContext *s = g_malloc(sizeof(*s));
1111     unsigned int i, n;
1112 
1113     *s = tcg_init_ctx;
1114 
1115     /* Relink mem_base.  */
1116     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1117         if (tcg_init_ctx.temps[i].mem_base) {
1118             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1119             tcg_debug_assert(b >= 0 && b < n);
1120             s->temps[i].mem_base = &s->temps[b];
1121         }
1122     }
1123 
1124     /* Claim an entry in tcg_ctxs */
1125     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1126     g_assert(n < tcg_max_ctxs);
1127     qatomic_set(&tcg_ctxs[n], s);
1128 
1129     if (n > 0) {
1130         tcg_region_initial_alloc(s);
1131     }
1132 
1133     tcg_ctx = s;
1134 }
1135 #endif /* !CONFIG_USER_ONLY */
1136 
1137 /* pool based memory allocation */
1138 void *tcg_malloc_internal(TCGContext *s, int size)
1139 {
1140     TCGPool *p;
1141     int pool_size;
1142 
1143     if (size > TCG_POOL_CHUNK_SIZE) {
1144         /* big malloc: insert a new pool (XXX: could optimize) */
1145         p = g_malloc(sizeof(TCGPool) + size);
1146         p->size = size;
1147         p->next = s->pool_first_large;
1148         s->pool_first_large = p;
1149         return p->data;
1150     } else {
1151         p = s->pool_current;
1152         if (!p) {
1153             p = s->pool_first;
1154             if (!p)
1155                 goto new_pool;
1156         } else {
1157             if (!p->next) {
1158             new_pool:
1159                 pool_size = TCG_POOL_CHUNK_SIZE;
1160                 p = g_malloc(sizeof(TCGPool) + pool_size);
1161                 p->size = pool_size;
1162                 p->next = NULL;
1163                 if (s->pool_current) {
1164                     s->pool_current->next = p;
1165                 } else {
1166                     s->pool_first = p;
1167                 }
1168             } else {
1169                 p = p->next;
1170             }
1171         }
1172     }
1173     s->pool_current = p;
1174     s->pool_cur = p->data + size;
1175     s->pool_end = p->data + p->size;
1176     return p->data;
1177 }
1178 
1179 void tcg_pool_reset(TCGContext *s)
1180 {
1181     TCGPool *p, *t;
1182     for (p = s->pool_first_large; p; p = t) {
1183         t = p->next;
1184         g_free(p);
1185     }
1186     s->pool_first_large = NULL;
1187     s->pool_cur = s->pool_end = NULL;
1188     s->pool_current = NULL;
1189 }
1190 
1191 /*
1192  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1193  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1194  * We only use these for layout in tcg_out_ld_helper_ret and
1195  * tcg_out_st_helper_args, and share them between several of
1196  * the helpers, with the end result that it's easier to build manually.
1197  */
1198 
1199 #if TCG_TARGET_REG_BITS == 32
1200 # define dh_typecode_ttl  dh_typecode_i32
1201 #else
1202 # define dh_typecode_ttl  dh_typecode_i64
1203 #endif
1204 
1205 static TCGHelperInfo info_helper_ld32_mmu = {
1206     .flags = TCG_CALL_NO_WG,
1207     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1208               | dh_typemask(env, 1)
1209               | dh_typemask(i64, 2)  /* uint64_t addr */
1210               | dh_typemask(i32, 3)  /* unsigned oi */
1211               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1212 };
1213 
1214 static TCGHelperInfo info_helper_ld64_mmu = {
1215     .flags = TCG_CALL_NO_WG,
1216     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1217               | dh_typemask(env, 1)
1218               | dh_typemask(i64, 2)  /* uint64_t addr */
1219               | dh_typemask(i32, 3)  /* unsigned oi */
1220               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1221 };
1222 
1223 static TCGHelperInfo info_helper_ld128_mmu = {
1224     .flags = TCG_CALL_NO_WG,
1225     .typemask = dh_typemask(i128, 0) /* return Int128 */
1226               | dh_typemask(env, 1)
1227               | dh_typemask(i64, 2)  /* uint64_t addr */
1228               | dh_typemask(i32, 3)  /* unsigned oi */
1229               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1230 };
1231 
1232 static TCGHelperInfo info_helper_st32_mmu = {
1233     .flags = TCG_CALL_NO_WG,
1234     .typemask = dh_typemask(void, 0)
1235               | dh_typemask(env, 1)
1236               | dh_typemask(i64, 2)  /* uint64_t addr */
1237               | dh_typemask(i32, 3)  /* uint32_t data */
1238               | dh_typemask(i32, 4)  /* unsigned oi */
1239               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1240 };
1241 
1242 static TCGHelperInfo info_helper_st64_mmu = {
1243     .flags = TCG_CALL_NO_WG,
1244     .typemask = dh_typemask(void, 0)
1245               | dh_typemask(env, 1)
1246               | dh_typemask(i64, 2)  /* uint64_t addr */
1247               | dh_typemask(i64, 3)  /* uint64_t data */
1248               | dh_typemask(i32, 4)  /* unsigned oi */
1249               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1250 };
1251 
1252 static TCGHelperInfo info_helper_st128_mmu = {
1253     .flags = TCG_CALL_NO_WG,
1254     .typemask = dh_typemask(void, 0)
1255               | dh_typemask(env, 1)
1256               | dh_typemask(i64, 2)  /* uint64_t addr */
1257               | dh_typemask(i128, 3) /* Int128 data */
1258               | dh_typemask(i32, 4)  /* unsigned oi */
1259               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1260 };
1261 
1262 #ifdef CONFIG_TCG_INTERPRETER
1263 static ffi_type *typecode_to_ffi(int argmask)
1264 {
1265     /*
1266      * libffi does not support __int128_t, so we have forced Int128
1267      * to use the structure definition instead of the builtin type.
1268      */
1269     static ffi_type *ffi_type_i128_elements[3] = {
1270         &ffi_type_uint64,
1271         &ffi_type_uint64,
1272         NULL
1273     };
1274     static ffi_type ffi_type_i128 = {
1275         .size = 16,
1276         .alignment = __alignof__(Int128),
1277         .type = FFI_TYPE_STRUCT,
1278         .elements = ffi_type_i128_elements,
1279     };
1280 
1281     switch (argmask) {
1282     case dh_typecode_void:
1283         return &ffi_type_void;
1284     case dh_typecode_i32:
1285         return &ffi_type_uint32;
1286     case dh_typecode_s32:
1287         return &ffi_type_sint32;
1288     case dh_typecode_i64:
1289         return &ffi_type_uint64;
1290     case dh_typecode_s64:
1291         return &ffi_type_sint64;
1292     case dh_typecode_ptr:
1293         return &ffi_type_pointer;
1294     case dh_typecode_i128:
1295         return &ffi_type_i128;
1296     }
1297     g_assert_not_reached();
1298 }
1299 
1300 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1301 {
1302     unsigned typemask = info->typemask;
1303     struct {
1304         ffi_cif cif;
1305         ffi_type *args[];
1306     } *ca;
1307     ffi_status status;
1308     int nargs;
1309 
1310     /* Ignoring the return type, find the last non-zero field. */
1311     nargs = 32 - clz32(typemask >> 3);
1312     nargs = DIV_ROUND_UP(nargs, 3);
1313     assert(nargs <= MAX_CALL_IARGS);
1314 
1315     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1316     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1317     ca->cif.nargs = nargs;
1318 
1319     if (nargs != 0) {
1320         ca->cif.arg_types = ca->args;
1321         for (int j = 0; j < nargs; ++j) {
1322             int typecode = extract32(typemask, (j + 1) * 3, 3);
1323             ca->args[j] = typecode_to_ffi(typecode);
1324         }
1325     }
1326 
1327     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1328                           ca->cif.rtype, ca->cif.arg_types);
1329     assert(status == FFI_OK);
1330 
1331     return &ca->cif;
1332 }
1333 
1334 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1335 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1336 #else
1337 #define HELPER_INFO_INIT(I)      (&(I)->init)
1338 #define HELPER_INFO_INIT_VAL(I)  1
1339 #endif /* CONFIG_TCG_INTERPRETER */
1340 
1341 static inline bool arg_slot_reg_p(unsigned arg_slot)
1342 {
1343     /*
1344      * Split the sizeof away from the comparison to avoid Werror from
1345      * "unsigned < 0 is always false", when iarg_regs is empty.
1346      */
1347     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1348     return arg_slot < nreg;
1349 }
1350 
1351 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1352 {
1353     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1354     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1355 
1356     tcg_debug_assert(stk_slot < max);
1357     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1358 }
1359 
1360 typedef struct TCGCumulativeArgs {
1361     int arg_idx;                /* tcg_gen_callN args[] */
1362     int info_in_idx;            /* TCGHelperInfo in[] */
1363     int arg_slot;               /* regs+stack slot */
1364     int ref_slot;               /* stack slots for references */
1365 } TCGCumulativeArgs;
1366 
1367 static void layout_arg_even(TCGCumulativeArgs *cum)
1368 {
1369     cum->arg_slot += cum->arg_slot & 1;
1370 }
1371 
1372 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1373                          TCGCallArgumentKind kind)
1374 {
1375     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1376 
1377     *loc = (TCGCallArgumentLoc){
1378         .kind = kind,
1379         .arg_idx = cum->arg_idx,
1380         .arg_slot = cum->arg_slot,
1381     };
1382     cum->info_in_idx++;
1383     cum->arg_slot++;
1384 }
1385 
1386 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1387                                 TCGHelperInfo *info, int n)
1388 {
1389     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1390 
1391     for (int i = 0; i < n; ++i) {
1392         /* Layout all using the same arg_idx, adjusting the subindex. */
1393         loc[i] = (TCGCallArgumentLoc){
1394             .kind = TCG_CALL_ARG_NORMAL,
1395             .arg_idx = cum->arg_idx,
1396             .tmp_subindex = i,
1397             .arg_slot = cum->arg_slot + i,
1398         };
1399     }
1400     cum->info_in_idx += n;
1401     cum->arg_slot += n;
1402 }
1403 
1404 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1405 {
1406     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1407     int n = 128 / TCG_TARGET_REG_BITS;
1408 
1409     /* The first subindex carries the pointer. */
1410     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1411 
1412     /*
1413      * The callee is allowed to clobber memory associated with
1414      * structure pass by-reference.  Therefore we must make copies.
1415      * Allocate space from "ref_slot", which will be adjusted to
1416      * follow the parameters on the stack.
1417      */
1418     loc[0].ref_slot = cum->ref_slot;
1419 
1420     /*
1421      * Subsequent words also go into the reference slot, but
1422      * do not accumulate into the regular arguments.
1423      */
1424     for (int i = 1; i < n; ++i) {
1425         loc[i] = (TCGCallArgumentLoc){
1426             .kind = TCG_CALL_ARG_BY_REF_N,
1427             .arg_idx = cum->arg_idx,
1428             .tmp_subindex = i,
1429             .ref_slot = cum->ref_slot + i,
1430         };
1431     }
1432     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1433     cum->ref_slot += n;
1434 }
1435 
1436 static void init_call_layout(TCGHelperInfo *info)
1437 {
1438     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1439     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1440     unsigned typemask = info->typemask;
1441     unsigned typecode;
1442     TCGCumulativeArgs cum = { };
1443 
1444     /*
1445      * Parse and place any function return value.
1446      */
1447     typecode = typemask & 7;
1448     switch (typecode) {
1449     case dh_typecode_void:
1450         info->nr_out = 0;
1451         break;
1452     case dh_typecode_i32:
1453     case dh_typecode_s32:
1454     case dh_typecode_ptr:
1455         info->nr_out = 1;
1456         info->out_kind = TCG_CALL_RET_NORMAL;
1457         break;
1458     case dh_typecode_i64:
1459     case dh_typecode_s64:
1460         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1461         info->out_kind = TCG_CALL_RET_NORMAL;
1462         /* Query the last register now to trigger any assert early. */
1463         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1464         break;
1465     case dh_typecode_i128:
1466         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1467         info->out_kind = TCG_TARGET_CALL_RET_I128;
1468         switch (TCG_TARGET_CALL_RET_I128) {
1469         case TCG_CALL_RET_NORMAL:
1470             /* Query the last register now to trigger any assert early. */
1471             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1472             break;
1473         case TCG_CALL_RET_BY_VEC:
1474             /* Query the single register now to trigger any assert early. */
1475             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1476             break;
1477         case TCG_CALL_RET_BY_REF:
1478             /*
1479              * Allocate the first argument to the output.
1480              * We don't need to store this anywhere, just make it
1481              * unavailable for use in the input loop below.
1482              */
1483             cum.arg_slot = 1;
1484             break;
1485         default:
1486             qemu_build_not_reached();
1487         }
1488         break;
1489     default:
1490         g_assert_not_reached();
1491     }
1492 
1493     /*
1494      * Parse and place function arguments.
1495      */
1496     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1497         TCGCallArgumentKind kind;
1498         TCGType type;
1499 
1500         typecode = typemask & 7;
1501         switch (typecode) {
1502         case dh_typecode_i32:
1503         case dh_typecode_s32:
1504             type = TCG_TYPE_I32;
1505             break;
1506         case dh_typecode_i64:
1507         case dh_typecode_s64:
1508             type = TCG_TYPE_I64;
1509             break;
1510         case dh_typecode_ptr:
1511             type = TCG_TYPE_PTR;
1512             break;
1513         case dh_typecode_i128:
1514             type = TCG_TYPE_I128;
1515             break;
1516         default:
1517             g_assert_not_reached();
1518         }
1519 
1520         switch (type) {
1521         case TCG_TYPE_I32:
1522             switch (TCG_TARGET_CALL_ARG_I32) {
1523             case TCG_CALL_ARG_EVEN:
1524                 layout_arg_even(&cum);
1525                 /* fall through */
1526             case TCG_CALL_ARG_NORMAL:
1527                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1528                 break;
1529             case TCG_CALL_ARG_EXTEND:
1530                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1531                 layout_arg_1(&cum, info, kind);
1532                 break;
1533             default:
1534                 qemu_build_not_reached();
1535             }
1536             break;
1537 
1538         case TCG_TYPE_I64:
1539             switch (TCG_TARGET_CALL_ARG_I64) {
1540             case TCG_CALL_ARG_EVEN:
1541                 layout_arg_even(&cum);
1542                 /* fall through */
1543             case TCG_CALL_ARG_NORMAL:
1544                 if (TCG_TARGET_REG_BITS == 32) {
1545                     layout_arg_normal_n(&cum, info, 2);
1546                 } else {
1547                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1548                 }
1549                 break;
1550             default:
1551                 qemu_build_not_reached();
1552             }
1553             break;
1554 
1555         case TCG_TYPE_I128:
1556             switch (TCG_TARGET_CALL_ARG_I128) {
1557             case TCG_CALL_ARG_EVEN:
1558                 layout_arg_even(&cum);
1559                 /* fall through */
1560             case TCG_CALL_ARG_NORMAL:
1561                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1562                 break;
1563             case TCG_CALL_ARG_BY_REF:
1564                 layout_arg_by_ref(&cum, info);
1565                 break;
1566             default:
1567                 qemu_build_not_reached();
1568             }
1569             break;
1570 
1571         default:
1572             g_assert_not_reached();
1573         }
1574     }
1575     info->nr_in = cum.info_in_idx;
1576 
1577     /* Validate that we didn't overrun the input array. */
1578     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1579     /* Validate the backend has enough argument space. */
1580     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1581 
1582     /*
1583      * Relocate the "ref_slot" area to the end of the parameters.
1584      * Minimizing this stack offset helps code size for x86,
1585      * which has a signed 8-bit offset encoding.
1586      */
1587     if (cum.ref_slot != 0) {
1588         int ref_base = 0;
1589 
1590         if (cum.arg_slot > max_reg_slots) {
1591             int align = __alignof(Int128) / sizeof(tcg_target_long);
1592 
1593             ref_base = cum.arg_slot - max_reg_slots;
1594             if (align > 1) {
1595                 ref_base = ROUND_UP(ref_base, align);
1596             }
1597         }
1598         assert(ref_base + cum.ref_slot <= max_stk_slots);
1599         ref_base += max_reg_slots;
1600 
1601         if (ref_base != 0) {
1602             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1603                 TCGCallArgumentLoc *loc = &info->in[i];
1604                 switch (loc->kind) {
1605                 case TCG_CALL_ARG_BY_REF:
1606                 case TCG_CALL_ARG_BY_REF_N:
1607                     loc->ref_slot += ref_base;
1608                     break;
1609                 default:
1610                     break;
1611                 }
1612             }
1613         }
1614     }
1615 }
1616 
1617 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1618 static void process_constraint_sets(void);
1619 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1620                                             TCGReg reg, const char *name);
1621 
1622 static void tcg_context_init(unsigned max_threads)
1623 {
1624     TCGContext *s = &tcg_init_ctx;
1625     int n, i;
1626     TCGTemp *ts;
1627 
1628     memset(s, 0, sizeof(*s));
1629     s->nb_globals = 0;
1630 
1631     init_call_layout(&info_helper_ld32_mmu);
1632     init_call_layout(&info_helper_ld64_mmu);
1633     init_call_layout(&info_helper_ld128_mmu);
1634     init_call_layout(&info_helper_st32_mmu);
1635     init_call_layout(&info_helper_st64_mmu);
1636     init_call_layout(&info_helper_st128_mmu);
1637 
1638     tcg_target_init(s);
1639     process_constraint_sets();
1640 
1641     /* Reverse the order of the saved registers, assuming they're all at
1642        the start of tcg_target_reg_alloc_order.  */
1643     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1644         int r = tcg_target_reg_alloc_order[n];
1645         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1646             break;
1647         }
1648     }
1649     for (i = 0; i < n; ++i) {
1650         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1651     }
1652     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1653         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1654     }
1655 
1656     tcg_ctx = s;
1657     /*
1658      * In user-mode we simply share the init context among threads, since we
1659      * use a single region. See the documentation tcg_region_init() for the
1660      * reasoning behind this.
1661      * In system-mode we will have at most max_threads TCG threads.
1662      */
1663 #ifdef CONFIG_USER_ONLY
1664     tcg_ctxs = &tcg_ctx;
1665     tcg_cur_ctxs = 1;
1666     tcg_max_ctxs = 1;
1667 #else
1668     tcg_max_ctxs = max_threads;
1669     tcg_ctxs = g_new0(TCGContext *, max_threads);
1670 #endif
1671 
1672     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1673     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1674     tcg_env = temp_tcgv_ptr(ts);
1675 }
1676 
1677 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1678 {
1679     tcg_context_init(max_threads);
1680     tcg_region_init(tb_size, splitwx, max_threads);
1681 }
1682 
1683 /*
1684  * Allocate TBs right before their corresponding translated code, making
1685  * sure that TBs and code are on different cache lines.
1686  */
1687 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1688 {
1689     uintptr_t align = qemu_icache_linesize;
1690     TranslationBlock *tb;
1691     void *next;
1692 
1693  retry:
1694     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1695     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1696 
1697     if (unlikely(next > s->code_gen_highwater)) {
1698         if (tcg_region_alloc(s)) {
1699             return NULL;
1700         }
1701         goto retry;
1702     }
1703     qatomic_set(&s->code_gen_ptr, next);
1704     return tb;
1705 }
1706 
1707 void tcg_prologue_init(void)
1708 {
1709     TCGContext *s = tcg_ctx;
1710     size_t prologue_size;
1711 
1712     s->code_ptr = s->code_gen_ptr;
1713     s->code_buf = s->code_gen_ptr;
1714     s->data_gen_ptr = NULL;
1715 
1716 #ifndef CONFIG_TCG_INTERPRETER
1717     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1718 #endif
1719 
1720     s->pool_labels = NULL;
1721 
1722     qemu_thread_jit_write();
1723     /* Generate the prologue.  */
1724     tcg_target_qemu_prologue(s);
1725 
1726     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1727     {
1728         int result = tcg_out_pool_finalize(s);
1729         tcg_debug_assert(result == 0);
1730     }
1731 
1732     prologue_size = tcg_current_code_size(s);
1733     perf_report_prologue(s->code_gen_ptr, prologue_size);
1734 
1735 #ifndef CONFIG_TCG_INTERPRETER
1736     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1737                         (uintptr_t)s->code_buf, prologue_size);
1738 #endif
1739 
1740     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1741         FILE *logfile = qemu_log_trylock();
1742         if (logfile) {
1743             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1744             if (s->data_gen_ptr) {
1745                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1746                 size_t data_size = prologue_size - code_size;
1747                 size_t i;
1748 
1749                 disas(logfile, s->code_gen_ptr, code_size);
1750 
1751                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1752                     if (sizeof(tcg_target_ulong) == 8) {
1753                         fprintf(logfile,
1754                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1755                                 (uintptr_t)s->data_gen_ptr + i,
1756                                 *(uint64_t *)(s->data_gen_ptr + i));
1757                     } else {
1758                         fprintf(logfile,
1759                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1760                                 (uintptr_t)s->data_gen_ptr + i,
1761                                 *(uint32_t *)(s->data_gen_ptr + i));
1762                     }
1763                 }
1764             } else {
1765                 disas(logfile, s->code_gen_ptr, prologue_size);
1766             }
1767             fprintf(logfile, "\n");
1768             qemu_log_unlock(logfile);
1769         }
1770     }
1771 
1772 #ifndef CONFIG_TCG_INTERPRETER
1773     /*
1774      * Assert that goto_ptr is implemented completely, setting an epilogue.
1775      * For tci, we use NULL as the signal to return from the interpreter,
1776      * so skip this check.
1777      */
1778     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1779 #endif
1780 
1781     tcg_region_prologue_set(s);
1782 }
1783 
1784 void tcg_func_start(TCGContext *s)
1785 {
1786     tcg_pool_reset(s);
1787     s->nb_temps = s->nb_globals;
1788 
1789     /* No temps have been previously allocated for size or locality.  */
1790     tcg_temp_ebb_reset_freed(s);
1791 
1792     /* No constant temps have been previously allocated. */
1793     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1794         if (s->const_table[i]) {
1795             g_hash_table_remove_all(s->const_table[i]);
1796         }
1797     }
1798 
1799     s->nb_ops = 0;
1800     s->nb_labels = 0;
1801     s->current_frame_offset = s->frame_start;
1802 
1803 #ifdef CONFIG_DEBUG_TCG
1804     s->goto_tb_issue_mask = 0;
1805 #endif
1806 
1807     QTAILQ_INIT(&s->ops);
1808     QTAILQ_INIT(&s->free_ops);
1809     s->emit_before_op = NULL;
1810     QSIMPLEQ_INIT(&s->labels);
1811 
1812     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1813     tcg_debug_assert(s->insn_start_words > 0);
1814 }
1815 
1816 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1817 {
1818     int n = s->nb_temps++;
1819 
1820     if (n >= TCG_MAX_TEMPS) {
1821         tcg_raise_tb_overflow(s);
1822     }
1823     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1824 }
1825 
1826 static TCGTemp *tcg_global_alloc(TCGContext *s)
1827 {
1828     TCGTemp *ts;
1829 
1830     tcg_debug_assert(s->nb_globals == s->nb_temps);
1831     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1832     s->nb_globals++;
1833     ts = tcg_temp_alloc(s);
1834     ts->kind = TEMP_GLOBAL;
1835 
1836     return ts;
1837 }
1838 
1839 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1840                                             TCGReg reg, const char *name)
1841 {
1842     TCGTemp *ts;
1843 
1844     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1845 
1846     ts = tcg_global_alloc(s);
1847     ts->base_type = type;
1848     ts->type = type;
1849     ts->kind = TEMP_FIXED;
1850     ts->reg = reg;
1851     ts->name = name;
1852     tcg_regset_set_reg(s->reserved_regs, reg);
1853 
1854     return ts;
1855 }
1856 
1857 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1858 {
1859     s->frame_start = start;
1860     s->frame_end = start + size;
1861     s->frame_temp
1862         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1863 }
1864 
1865 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1866                                             const char *name, TCGType type)
1867 {
1868     TCGContext *s = tcg_ctx;
1869     TCGTemp *base_ts = tcgv_ptr_temp(base);
1870     TCGTemp *ts = tcg_global_alloc(s);
1871     int indirect_reg = 0;
1872 
1873     switch (base_ts->kind) {
1874     case TEMP_FIXED:
1875         break;
1876     case TEMP_GLOBAL:
1877         /* We do not support double-indirect registers.  */
1878         tcg_debug_assert(!base_ts->indirect_reg);
1879         base_ts->indirect_base = 1;
1880         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1881                             ? 2 : 1);
1882         indirect_reg = 1;
1883         break;
1884     default:
1885         g_assert_not_reached();
1886     }
1887 
1888     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1889         TCGTemp *ts2 = tcg_global_alloc(s);
1890         char buf[64];
1891 
1892         ts->base_type = TCG_TYPE_I64;
1893         ts->type = TCG_TYPE_I32;
1894         ts->indirect_reg = indirect_reg;
1895         ts->mem_allocated = 1;
1896         ts->mem_base = base_ts;
1897         ts->mem_offset = offset;
1898         pstrcpy(buf, sizeof(buf), name);
1899         pstrcat(buf, sizeof(buf), "_0");
1900         ts->name = strdup(buf);
1901 
1902         tcg_debug_assert(ts2 == ts + 1);
1903         ts2->base_type = TCG_TYPE_I64;
1904         ts2->type = TCG_TYPE_I32;
1905         ts2->indirect_reg = indirect_reg;
1906         ts2->mem_allocated = 1;
1907         ts2->mem_base = base_ts;
1908         ts2->mem_offset = offset + 4;
1909         ts2->temp_subindex = 1;
1910         pstrcpy(buf, sizeof(buf), name);
1911         pstrcat(buf, sizeof(buf), "_1");
1912         ts2->name = strdup(buf);
1913     } else {
1914         ts->base_type = type;
1915         ts->type = type;
1916         ts->indirect_reg = indirect_reg;
1917         ts->mem_allocated = 1;
1918         ts->mem_base = base_ts;
1919         ts->mem_offset = offset;
1920         ts->name = name;
1921     }
1922     return ts;
1923 }
1924 
1925 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1926 {
1927     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1928     return temp_tcgv_i32(ts);
1929 }
1930 
1931 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1932 {
1933     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1934     return temp_tcgv_i64(ts);
1935 }
1936 
1937 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1938 {
1939     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1940     return temp_tcgv_ptr(ts);
1941 }
1942 
1943 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1944 {
1945     TCGContext *s = tcg_ctx;
1946     TCGTemp *ts;
1947     int n;
1948 
1949     if (kind == TEMP_EBB) {
1950         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1951 
1952         if (idx < TCG_MAX_TEMPS) {
1953             /* There is already an available temp with the right type.  */
1954             clear_bit(idx, s->free_temps[type].l);
1955 
1956             ts = &s->temps[idx];
1957             ts->temp_allocated = 1;
1958             tcg_debug_assert(ts->base_type == type);
1959             tcg_debug_assert(ts->kind == kind);
1960             return ts;
1961         }
1962     } else {
1963         tcg_debug_assert(kind == TEMP_TB);
1964     }
1965 
1966     switch (type) {
1967     case TCG_TYPE_I32:
1968     case TCG_TYPE_V64:
1969     case TCG_TYPE_V128:
1970     case TCG_TYPE_V256:
1971         n = 1;
1972         break;
1973     case TCG_TYPE_I64:
1974         n = 64 / TCG_TARGET_REG_BITS;
1975         break;
1976     case TCG_TYPE_I128:
1977         n = 128 / TCG_TARGET_REG_BITS;
1978         break;
1979     default:
1980         g_assert_not_reached();
1981     }
1982 
1983     ts = tcg_temp_alloc(s);
1984     ts->base_type = type;
1985     ts->temp_allocated = 1;
1986     ts->kind = kind;
1987 
1988     if (n == 1) {
1989         ts->type = type;
1990     } else {
1991         ts->type = TCG_TYPE_REG;
1992 
1993         for (int i = 1; i < n; ++i) {
1994             TCGTemp *ts2 = tcg_temp_alloc(s);
1995 
1996             tcg_debug_assert(ts2 == ts + i);
1997             ts2->base_type = type;
1998             ts2->type = TCG_TYPE_REG;
1999             ts2->temp_allocated = 1;
2000             ts2->temp_subindex = i;
2001             ts2->kind = kind;
2002         }
2003     }
2004     return ts;
2005 }
2006 
2007 TCGv_i32 tcg_temp_new_i32(void)
2008 {
2009     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
2010 }
2011 
2012 TCGv_i32 tcg_temp_ebb_new_i32(void)
2013 {
2014     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
2015 }
2016 
2017 TCGv_i64 tcg_temp_new_i64(void)
2018 {
2019     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
2020 }
2021 
2022 TCGv_i64 tcg_temp_ebb_new_i64(void)
2023 {
2024     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
2025 }
2026 
2027 TCGv_ptr tcg_temp_new_ptr(void)
2028 {
2029     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
2030 }
2031 
2032 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2033 {
2034     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2035 }
2036 
2037 TCGv_i128 tcg_temp_new_i128(void)
2038 {
2039     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2040 }
2041 
2042 TCGv_i128 tcg_temp_ebb_new_i128(void)
2043 {
2044     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2045 }
2046 
2047 TCGv_vec tcg_temp_new_vec(TCGType type)
2048 {
2049     TCGTemp *t;
2050 
2051 #ifdef CONFIG_DEBUG_TCG
2052     switch (type) {
2053     case TCG_TYPE_V64:
2054         assert(TCG_TARGET_HAS_v64);
2055         break;
2056     case TCG_TYPE_V128:
2057         assert(TCG_TARGET_HAS_v128);
2058         break;
2059     case TCG_TYPE_V256:
2060         assert(TCG_TARGET_HAS_v256);
2061         break;
2062     default:
2063         g_assert_not_reached();
2064     }
2065 #endif
2066 
2067     t = tcg_temp_new_internal(type, TEMP_EBB);
2068     return temp_tcgv_vec(t);
2069 }
2070 
2071 /* Create a new temp of the same type as an existing temp.  */
2072 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2073 {
2074     TCGTemp *t = tcgv_vec_temp(match);
2075 
2076     tcg_debug_assert(t->temp_allocated != 0);
2077 
2078     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2079     return temp_tcgv_vec(t);
2080 }
2081 
2082 void tcg_temp_free_internal(TCGTemp *ts)
2083 {
2084     TCGContext *s = tcg_ctx;
2085 
2086     switch (ts->kind) {
2087     case TEMP_CONST:
2088     case TEMP_TB:
2089         /* Silently ignore free. */
2090         break;
2091     case TEMP_EBB:
2092         tcg_debug_assert(ts->temp_allocated != 0);
2093         ts->temp_allocated = 0;
2094         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2095         break;
2096     default:
2097         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2098         g_assert_not_reached();
2099     }
2100 }
2101 
2102 void tcg_temp_free_i32(TCGv_i32 arg)
2103 {
2104     tcg_temp_free_internal(tcgv_i32_temp(arg));
2105 }
2106 
2107 void tcg_temp_free_i64(TCGv_i64 arg)
2108 {
2109     tcg_temp_free_internal(tcgv_i64_temp(arg));
2110 }
2111 
2112 void tcg_temp_free_i128(TCGv_i128 arg)
2113 {
2114     tcg_temp_free_internal(tcgv_i128_temp(arg));
2115 }
2116 
2117 void tcg_temp_free_ptr(TCGv_ptr arg)
2118 {
2119     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2120 }
2121 
2122 void tcg_temp_free_vec(TCGv_vec arg)
2123 {
2124     tcg_temp_free_internal(tcgv_vec_temp(arg));
2125 }
2126 
2127 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2128 {
2129     TCGContext *s = tcg_ctx;
2130     GHashTable *h = s->const_table[type];
2131     TCGTemp *ts;
2132 
2133     if (h == NULL) {
2134         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2135         s->const_table[type] = h;
2136     }
2137 
2138     ts = g_hash_table_lookup(h, &val);
2139     if (ts == NULL) {
2140         int64_t *val_ptr;
2141 
2142         ts = tcg_temp_alloc(s);
2143 
2144         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2145             TCGTemp *ts2 = tcg_temp_alloc(s);
2146 
2147             tcg_debug_assert(ts2 == ts + 1);
2148 
2149             ts->base_type = TCG_TYPE_I64;
2150             ts->type = TCG_TYPE_I32;
2151             ts->kind = TEMP_CONST;
2152             ts->temp_allocated = 1;
2153 
2154             ts2->base_type = TCG_TYPE_I64;
2155             ts2->type = TCG_TYPE_I32;
2156             ts2->kind = TEMP_CONST;
2157             ts2->temp_allocated = 1;
2158             ts2->temp_subindex = 1;
2159 
2160             /*
2161              * Retain the full value of the 64-bit constant in the low
2162              * part, so that the hash table works.  Actual uses will
2163              * truncate the value to the low part.
2164              */
2165             ts[HOST_BIG_ENDIAN].val = val;
2166             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2167             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2168         } else {
2169             ts->base_type = type;
2170             ts->type = type;
2171             ts->kind = TEMP_CONST;
2172             ts->temp_allocated = 1;
2173             ts->val = val;
2174             val_ptr = &ts->val;
2175         }
2176         g_hash_table_insert(h, val_ptr, ts);
2177     }
2178 
2179     return ts;
2180 }
2181 
2182 TCGv_i32 tcg_constant_i32(int32_t val)
2183 {
2184     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2185 }
2186 
2187 TCGv_i64 tcg_constant_i64(int64_t val)
2188 {
2189     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2190 }
2191 
2192 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2193 {
2194     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2195 }
2196 
2197 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2198 {
2199     val = dup_const(vece, val);
2200     return temp_tcgv_vec(tcg_constant_internal(type, val));
2201 }
2202 
2203 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2204 {
2205     TCGTemp *t = tcgv_vec_temp(match);
2206 
2207     tcg_debug_assert(t->temp_allocated != 0);
2208     return tcg_constant_vec(t->base_type, vece, val);
2209 }
2210 
2211 #ifdef CONFIG_DEBUG_TCG
2212 size_t temp_idx(TCGTemp *ts)
2213 {
2214     ptrdiff_t n = ts - tcg_ctx->temps;
2215     assert(n >= 0 && n < tcg_ctx->nb_temps);
2216     return n;
2217 }
2218 
2219 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2220 {
2221     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2222 
2223     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2224     assert(o % sizeof(TCGTemp) == 0);
2225 
2226     return (void *)tcg_ctx + (uintptr_t)v;
2227 }
2228 #endif /* CONFIG_DEBUG_TCG */
2229 
2230 /*
2231  * Return true if OP may appear in the opcode stream with TYPE.
2232  * Test the runtime variable that controls each opcode.
2233  */
2234 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2235 {
2236     bool has_type;
2237 
2238     switch (type) {
2239     case TCG_TYPE_I32:
2240         has_type = true;
2241         break;
2242     case TCG_TYPE_I64:
2243         has_type = TCG_TARGET_REG_BITS == 64;
2244         break;
2245     case TCG_TYPE_V64:
2246         has_type = TCG_TARGET_HAS_v64;
2247         break;
2248     case TCG_TYPE_V128:
2249         has_type = TCG_TARGET_HAS_v128;
2250         break;
2251     case TCG_TYPE_V256:
2252         has_type = TCG_TARGET_HAS_v256;
2253         break;
2254     default:
2255         has_type = false;
2256         break;
2257     }
2258 
2259     switch (op) {
2260     case INDEX_op_discard:
2261     case INDEX_op_set_label:
2262     case INDEX_op_call:
2263     case INDEX_op_br:
2264     case INDEX_op_mb:
2265     case INDEX_op_insn_start:
2266     case INDEX_op_exit_tb:
2267     case INDEX_op_goto_tb:
2268     case INDEX_op_goto_ptr:
2269     case INDEX_op_qemu_ld_i32:
2270     case INDEX_op_qemu_st_i32:
2271     case INDEX_op_qemu_ld_i64:
2272     case INDEX_op_qemu_st_i64:
2273         return true;
2274 
2275     case INDEX_op_qemu_st8_i32:
2276         return TCG_TARGET_HAS_qemu_st8_i32;
2277 
2278     case INDEX_op_qemu_ld_i128:
2279     case INDEX_op_qemu_st_i128:
2280         return TCG_TARGET_HAS_qemu_ldst_i128;
2281 
2282     case INDEX_op_add:
2283     case INDEX_op_and:
2284     case INDEX_op_brcond:
2285     case INDEX_op_mov:
2286     case INDEX_op_negsetcond:
2287     case INDEX_op_or:
2288     case INDEX_op_setcond:
2289     case INDEX_op_xor:
2290         return has_type;
2291 
2292     case INDEX_op_movcond_i32:
2293     case INDEX_op_ld8u_i32:
2294     case INDEX_op_ld8s_i32:
2295     case INDEX_op_ld16u_i32:
2296     case INDEX_op_ld16s_i32:
2297     case INDEX_op_ld_i32:
2298     case INDEX_op_st8_i32:
2299     case INDEX_op_st16_i32:
2300     case INDEX_op_st_i32:
2301     case INDEX_op_extract_i32:
2302     case INDEX_op_sextract_i32:
2303     case INDEX_op_deposit_i32:
2304         return true;
2305 
2306     case INDEX_op_extract2_i32:
2307         return TCG_TARGET_HAS_extract2_i32;
2308     case INDEX_op_add2_i32:
2309         return TCG_TARGET_HAS_add2_i32;
2310     case INDEX_op_sub2_i32:
2311         return TCG_TARGET_HAS_sub2_i32;
2312     case INDEX_op_bswap16_i32:
2313         return TCG_TARGET_HAS_bswap16_i32;
2314     case INDEX_op_bswap32_i32:
2315         return TCG_TARGET_HAS_bswap32_i32;
2316 
2317     case INDEX_op_brcond2_i32:
2318     case INDEX_op_setcond2_i32:
2319         return TCG_TARGET_REG_BITS == 32;
2320 
2321     case INDEX_op_movcond_i64:
2322     case INDEX_op_ld8u_i64:
2323     case INDEX_op_ld8s_i64:
2324     case INDEX_op_ld16u_i64:
2325     case INDEX_op_ld16s_i64:
2326     case INDEX_op_ld32u_i64:
2327     case INDEX_op_ld32s_i64:
2328     case INDEX_op_ld_i64:
2329     case INDEX_op_st8_i64:
2330     case INDEX_op_st16_i64:
2331     case INDEX_op_st32_i64:
2332     case INDEX_op_st_i64:
2333     case INDEX_op_ext_i32_i64:
2334     case INDEX_op_extu_i32_i64:
2335     case INDEX_op_extract_i64:
2336     case INDEX_op_sextract_i64:
2337     case INDEX_op_deposit_i64:
2338         return TCG_TARGET_REG_BITS == 64;
2339 
2340     case INDEX_op_extract2_i64:
2341         return TCG_TARGET_HAS_extract2_i64;
2342     case INDEX_op_extrl_i64_i32:
2343     case INDEX_op_extrh_i64_i32:
2344         return TCG_TARGET_HAS_extr_i64_i32;
2345     case INDEX_op_bswap16_i64:
2346         return TCG_TARGET_HAS_bswap16_i64;
2347     case INDEX_op_bswap32_i64:
2348         return TCG_TARGET_HAS_bswap32_i64;
2349     case INDEX_op_bswap64_i64:
2350         return TCG_TARGET_HAS_bswap64_i64;
2351     case INDEX_op_add2_i64:
2352         return TCG_TARGET_HAS_add2_i64;
2353     case INDEX_op_sub2_i64:
2354         return TCG_TARGET_HAS_sub2_i64;
2355 
2356     case INDEX_op_mov_vec:
2357     case INDEX_op_dup_vec:
2358     case INDEX_op_dupm_vec:
2359     case INDEX_op_ld_vec:
2360     case INDEX_op_st_vec:
2361     case INDEX_op_add_vec:
2362     case INDEX_op_sub_vec:
2363     case INDEX_op_and_vec:
2364     case INDEX_op_or_vec:
2365     case INDEX_op_xor_vec:
2366     case INDEX_op_cmp_vec:
2367         return has_type;
2368     case INDEX_op_dup2_vec:
2369         return has_type && TCG_TARGET_REG_BITS == 32;
2370     case INDEX_op_not_vec:
2371         return has_type && TCG_TARGET_HAS_not_vec;
2372     case INDEX_op_neg_vec:
2373         return has_type && TCG_TARGET_HAS_neg_vec;
2374     case INDEX_op_abs_vec:
2375         return has_type && TCG_TARGET_HAS_abs_vec;
2376     case INDEX_op_andc_vec:
2377         return has_type && TCG_TARGET_HAS_andc_vec;
2378     case INDEX_op_orc_vec:
2379         return has_type && TCG_TARGET_HAS_orc_vec;
2380     case INDEX_op_nand_vec:
2381         return has_type && TCG_TARGET_HAS_nand_vec;
2382     case INDEX_op_nor_vec:
2383         return has_type && TCG_TARGET_HAS_nor_vec;
2384     case INDEX_op_eqv_vec:
2385         return has_type && TCG_TARGET_HAS_eqv_vec;
2386     case INDEX_op_mul_vec:
2387         return has_type && TCG_TARGET_HAS_mul_vec;
2388     case INDEX_op_shli_vec:
2389     case INDEX_op_shri_vec:
2390     case INDEX_op_sari_vec:
2391         return has_type && TCG_TARGET_HAS_shi_vec;
2392     case INDEX_op_shls_vec:
2393     case INDEX_op_shrs_vec:
2394     case INDEX_op_sars_vec:
2395         return has_type && TCG_TARGET_HAS_shs_vec;
2396     case INDEX_op_shlv_vec:
2397     case INDEX_op_shrv_vec:
2398     case INDEX_op_sarv_vec:
2399         return has_type && TCG_TARGET_HAS_shv_vec;
2400     case INDEX_op_rotli_vec:
2401         return has_type && TCG_TARGET_HAS_roti_vec;
2402     case INDEX_op_rotls_vec:
2403         return has_type && TCG_TARGET_HAS_rots_vec;
2404     case INDEX_op_rotlv_vec:
2405     case INDEX_op_rotrv_vec:
2406         return has_type && TCG_TARGET_HAS_rotv_vec;
2407     case INDEX_op_ssadd_vec:
2408     case INDEX_op_usadd_vec:
2409     case INDEX_op_sssub_vec:
2410     case INDEX_op_ussub_vec:
2411         return has_type && TCG_TARGET_HAS_sat_vec;
2412     case INDEX_op_smin_vec:
2413     case INDEX_op_umin_vec:
2414     case INDEX_op_smax_vec:
2415     case INDEX_op_umax_vec:
2416         return has_type && TCG_TARGET_HAS_minmax_vec;
2417     case INDEX_op_bitsel_vec:
2418         return has_type && TCG_TARGET_HAS_bitsel_vec;
2419     case INDEX_op_cmpsel_vec:
2420         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2421 
2422     default:
2423         if (op < INDEX_op_last_generic) {
2424             const TCGOutOp *outop;
2425             TCGConstraintSetIndex con_set;
2426 
2427             if (!has_type) {
2428                 return false;
2429             }
2430 
2431             outop = all_outop[op];
2432             tcg_debug_assert(outop != NULL);
2433 
2434             con_set = outop->static_constraint;
2435             if (con_set == C_Dynamic) {
2436                 con_set = outop->dynamic_constraint(type, flags);
2437             }
2438             if (con_set >= 0) {
2439                 return true;
2440             }
2441             tcg_debug_assert(con_set == C_NotImplemented);
2442             return false;
2443         }
2444         tcg_debug_assert(op < NB_OPS);
2445         return true;
2446 
2447     case INDEX_op_last_generic:
2448         g_assert_not_reached();
2449     }
2450 }
2451 
2452 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2453 {
2454     unsigned width;
2455 
2456     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2457     width = (type == TCG_TYPE_I32 ? 32 : 64);
2458 
2459     tcg_debug_assert(ofs < width);
2460     tcg_debug_assert(len > 0);
2461     tcg_debug_assert(len <= width - ofs);
2462 
2463     return TCG_TARGET_deposit_valid(type, ofs, len);
2464 }
2465 
2466 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2467 
2468 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2469                           TCGTemp *ret, TCGTemp **args)
2470 {
2471     TCGv_i64 extend_free[MAX_CALL_IARGS];
2472     int n_extend = 0;
2473     TCGOp *op;
2474     int i, n, pi = 0, total_args;
2475 
2476     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2477         init_call_layout(info);
2478         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2479     }
2480 
2481     total_args = info->nr_out + info->nr_in + 2;
2482     op = tcg_op_alloc(INDEX_op_call, total_args);
2483 
2484 #ifdef CONFIG_PLUGIN
2485     /* Flag helpers that may affect guest state */
2486     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2487         tcg_ctx->plugin_insn->calls_helpers = true;
2488     }
2489 #endif
2490 
2491     TCGOP_CALLO(op) = n = info->nr_out;
2492     switch (n) {
2493     case 0:
2494         tcg_debug_assert(ret == NULL);
2495         break;
2496     case 1:
2497         tcg_debug_assert(ret != NULL);
2498         op->args[pi++] = temp_arg(ret);
2499         break;
2500     case 2:
2501     case 4:
2502         tcg_debug_assert(ret != NULL);
2503         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2504         tcg_debug_assert(ret->temp_subindex == 0);
2505         for (i = 0; i < n; ++i) {
2506             op->args[pi++] = temp_arg(ret + i);
2507         }
2508         break;
2509     default:
2510         g_assert_not_reached();
2511     }
2512 
2513     TCGOP_CALLI(op) = n = info->nr_in;
2514     for (i = 0; i < n; i++) {
2515         const TCGCallArgumentLoc *loc = &info->in[i];
2516         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2517 
2518         switch (loc->kind) {
2519         case TCG_CALL_ARG_NORMAL:
2520         case TCG_CALL_ARG_BY_REF:
2521         case TCG_CALL_ARG_BY_REF_N:
2522             op->args[pi++] = temp_arg(ts);
2523             break;
2524 
2525         case TCG_CALL_ARG_EXTEND_U:
2526         case TCG_CALL_ARG_EXTEND_S:
2527             {
2528                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2529                 TCGv_i32 orig = temp_tcgv_i32(ts);
2530 
2531                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2532                     tcg_gen_ext_i32_i64(temp, orig);
2533                 } else {
2534                     tcg_gen_extu_i32_i64(temp, orig);
2535                 }
2536                 op->args[pi++] = tcgv_i64_arg(temp);
2537                 extend_free[n_extend++] = temp;
2538             }
2539             break;
2540 
2541         default:
2542             g_assert_not_reached();
2543         }
2544     }
2545     op->args[pi++] = (uintptr_t)func;
2546     op->args[pi++] = (uintptr_t)info;
2547     tcg_debug_assert(pi == total_args);
2548 
2549     if (tcg_ctx->emit_before_op) {
2550         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2551     } else {
2552         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2553     }
2554 
2555     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2556     for (i = 0; i < n_extend; ++i) {
2557         tcg_temp_free_i64(extend_free[i]);
2558     }
2559 }
2560 
2561 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2562 {
2563     tcg_gen_callN(func, info, ret, NULL);
2564 }
2565 
2566 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2567 {
2568     tcg_gen_callN(func, info, ret, &t1);
2569 }
2570 
2571 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2572                    TCGTemp *t1, TCGTemp *t2)
2573 {
2574     TCGTemp *args[2] = { t1, t2 };
2575     tcg_gen_callN(func, info, ret, args);
2576 }
2577 
2578 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2579                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2580 {
2581     TCGTemp *args[3] = { t1, t2, t3 };
2582     tcg_gen_callN(func, info, ret, args);
2583 }
2584 
2585 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2586                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2587 {
2588     TCGTemp *args[4] = { t1, t2, t3, t4 };
2589     tcg_gen_callN(func, info, ret, args);
2590 }
2591 
2592 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2593                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2594 {
2595     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2596     tcg_gen_callN(func, info, ret, args);
2597 }
2598 
2599 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2600                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2601                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2602 {
2603     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2604     tcg_gen_callN(func, info, ret, args);
2605 }
2606 
2607 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2608                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2609                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2610 {
2611     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2612     tcg_gen_callN(func, info, ret, args);
2613 }
2614 
2615 static void tcg_reg_alloc_start(TCGContext *s)
2616 {
2617     int i, n;
2618 
2619     for (i = 0, n = s->nb_temps; i < n; i++) {
2620         TCGTemp *ts = &s->temps[i];
2621         TCGTempVal val = TEMP_VAL_MEM;
2622 
2623         switch (ts->kind) {
2624         case TEMP_CONST:
2625             val = TEMP_VAL_CONST;
2626             break;
2627         case TEMP_FIXED:
2628             val = TEMP_VAL_REG;
2629             break;
2630         case TEMP_GLOBAL:
2631             break;
2632         case TEMP_EBB:
2633             val = TEMP_VAL_DEAD;
2634             /* fall through */
2635         case TEMP_TB:
2636             ts->mem_allocated = 0;
2637             break;
2638         default:
2639             g_assert_not_reached();
2640         }
2641         ts->val_type = val;
2642     }
2643 
2644     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2645 }
2646 
2647 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2648                                  TCGTemp *ts)
2649 {
2650     int idx = temp_idx(ts);
2651 
2652     switch (ts->kind) {
2653     case TEMP_FIXED:
2654     case TEMP_GLOBAL:
2655         pstrcpy(buf, buf_size, ts->name);
2656         break;
2657     case TEMP_TB:
2658         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2659         break;
2660     case TEMP_EBB:
2661         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2662         break;
2663     case TEMP_CONST:
2664         switch (ts->type) {
2665         case TCG_TYPE_I32:
2666             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2667             break;
2668 #if TCG_TARGET_REG_BITS > 32
2669         case TCG_TYPE_I64:
2670             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2671             break;
2672 #endif
2673         case TCG_TYPE_V64:
2674         case TCG_TYPE_V128:
2675         case TCG_TYPE_V256:
2676             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2677                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2678             break;
2679         default:
2680             g_assert_not_reached();
2681         }
2682         break;
2683     }
2684     return buf;
2685 }
2686 
2687 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2688                              int buf_size, TCGArg arg)
2689 {
2690     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2691 }
2692 
2693 static const char * const cond_name[] =
2694 {
2695     [TCG_COND_NEVER] = "never",
2696     [TCG_COND_ALWAYS] = "always",
2697     [TCG_COND_EQ] = "eq",
2698     [TCG_COND_NE] = "ne",
2699     [TCG_COND_LT] = "lt",
2700     [TCG_COND_GE] = "ge",
2701     [TCG_COND_LE] = "le",
2702     [TCG_COND_GT] = "gt",
2703     [TCG_COND_LTU] = "ltu",
2704     [TCG_COND_GEU] = "geu",
2705     [TCG_COND_LEU] = "leu",
2706     [TCG_COND_GTU] = "gtu",
2707     [TCG_COND_TSTEQ] = "tsteq",
2708     [TCG_COND_TSTNE] = "tstne",
2709 };
2710 
2711 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2712 {
2713     [MO_UB]   = "ub",
2714     [MO_SB]   = "sb",
2715     [MO_LEUW] = "leuw",
2716     [MO_LESW] = "lesw",
2717     [MO_LEUL] = "leul",
2718     [MO_LESL] = "lesl",
2719     [MO_LEUQ] = "leq",
2720     [MO_BEUW] = "beuw",
2721     [MO_BESW] = "besw",
2722     [MO_BEUL] = "beul",
2723     [MO_BESL] = "besl",
2724     [MO_BEUQ] = "beq",
2725     [MO_128 + MO_BE] = "beo",
2726     [MO_128 + MO_LE] = "leo",
2727 };
2728 
2729 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2730     [MO_UNALN >> MO_ASHIFT]    = "un+",
2731     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2732     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2733     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2734     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2735     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2736     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2737     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2738 };
2739 
2740 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2741     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2742     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2743     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2744     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2745     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2746     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2747 };
2748 
2749 static const char bswap_flag_name[][6] = {
2750     [TCG_BSWAP_IZ] = "iz",
2751     [TCG_BSWAP_OZ] = "oz",
2752     [TCG_BSWAP_OS] = "os",
2753     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2754     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2755 };
2756 
2757 #ifdef CONFIG_PLUGIN
2758 static const char * const plugin_from_name[] = {
2759     "from-tb",
2760     "from-insn",
2761     "after-insn",
2762     "after-tb",
2763 };
2764 #endif
2765 
2766 static inline bool tcg_regset_single(TCGRegSet d)
2767 {
2768     return (d & (d - 1)) == 0;
2769 }
2770 
2771 static inline TCGReg tcg_regset_first(TCGRegSet d)
2772 {
2773     if (TCG_TARGET_NB_REGS <= 32) {
2774         return ctz32(d);
2775     } else {
2776         return ctz64(d);
2777     }
2778 }
2779 
2780 /* Return only the number of characters output -- no error return. */
2781 #define ne_fprintf(...) \
2782     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2783 
2784 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2785 {
2786     char buf[128];
2787     TCGOp *op;
2788 
2789     QTAILQ_FOREACH(op, &s->ops, link) {
2790         int i, k, nb_oargs, nb_iargs, nb_cargs;
2791         const TCGOpDef *def;
2792         TCGOpcode c;
2793         int col = 0;
2794 
2795         c = op->opc;
2796         def = &tcg_op_defs[c];
2797 
2798         if (c == INDEX_op_insn_start) {
2799             nb_oargs = 0;
2800             col += ne_fprintf(f, "\n ----");
2801 
2802             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2803                 col += ne_fprintf(f, " %016" PRIx64,
2804                                   tcg_get_insn_start_param(op, i));
2805             }
2806         } else if (c == INDEX_op_call) {
2807             const TCGHelperInfo *info = tcg_call_info(op);
2808             void *func = tcg_call_func(op);
2809 
2810             /* variable number of arguments */
2811             nb_oargs = TCGOP_CALLO(op);
2812             nb_iargs = TCGOP_CALLI(op);
2813             nb_cargs = def->nb_cargs;
2814 
2815             col += ne_fprintf(f, " %s ", def->name);
2816 
2817             /*
2818              * Print the function name from TCGHelperInfo, if available.
2819              * Note that plugins have a template function for the info,
2820              * but the actual function pointer comes from the plugin.
2821              */
2822             if (func == info->func) {
2823                 col += ne_fprintf(f, "%s", info->name);
2824             } else {
2825                 col += ne_fprintf(f, "plugin(%p)", func);
2826             }
2827 
2828             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2829             for (i = 0; i < nb_oargs; i++) {
2830                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2831                                                             op->args[i]));
2832             }
2833             for (i = 0; i < nb_iargs; i++) {
2834                 TCGArg arg = op->args[nb_oargs + i];
2835                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2836                 col += ne_fprintf(f, ",%s", t);
2837             }
2838         } else {
2839             if (def->flags & TCG_OPF_INT) {
2840                 col += ne_fprintf(f, " %s_i%d ",
2841                                   def->name,
2842                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2843             } else if (def->flags & TCG_OPF_VECTOR) {
2844                 col += ne_fprintf(f, "%s v%d,e%d,",
2845                                   def->name,
2846                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2847                                   8 << TCGOP_VECE(op));
2848             } else {
2849                 col += ne_fprintf(f, " %s ", def->name);
2850             }
2851 
2852             nb_oargs = def->nb_oargs;
2853             nb_iargs = def->nb_iargs;
2854             nb_cargs = def->nb_cargs;
2855 
2856             k = 0;
2857             for (i = 0; i < nb_oargs; i++) {
2858                 const char *sep =  k ? "," : "";
2859                 col += ne_fprintf(f, "%s%s", sep,
2860                                   tcg_get_arg_str(s, buf, sizeof(buf),
2861                                                   op->args[k++]));
2862             }
2863             for (i = 0; i < nb_iargs; i++) {
2864                 const char *sep =  k ? "," : "";
2865                 col += ne_fprintf(f, "%s%s", sep,
2866                                   tcg_get_arg_str(s, buf, sizeof(buf),
2867                                                   op->args[k++]));
2868             }
2869             switch (c) {
2870             case INDEX_op_brcond:
2871             case INDEX_op_setcond:
2872             case INDEX_op_negsetcond:
2873             case INDEX_op_movcond_i32:
2874             case INDEX_op_brcond2_i32:
2875             case INDEX_op_setcond2_i32:
2876             case INDEX_op_movcond_i64:
2877             case INDEX_op_cmp_vec:
2878             case INDEX_op_cmpsel_vec:
2879                 if (op->args[k] < ARRAY_SIZE(cond_name)
2880                     && cond_name[op->args[k]]) {
2881                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2882                 } else {
2883                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2884                 }
2885                 i = 1;
2886                 break;
2887             case INDEX_op_qemu_ld_i32:
2888             case INDEX_op_qemu_st_i32:
2889             case INDEX_op_qemu_st8_i32:
2890             case INDEX_op_qemu_ld_i64:
2891             case INDEX_op_qemu_st_i64:
2892             case INDEX_op_qemu_ld_i128:
2893             case INDEX_op_qemu_st_i128:
2894                 {
2895                     const char *s_al, *s_op, *s_at;
2896                     MemOpIdx oi = op->args[k++];
2897                     MemOp mop = get_memop(oi);
2898                     unsigned ix = get_mmuidx(oi);
2899 
2900                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2901                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2902                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2903                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2904 
2905                     /* If all fields are accounted for, print symbolically. */
2906                     if (!mop && s_al && s_op && s_at) {
2907                         col += ne_fprintf(f, ",%s%s%s,%u",
2908                                           s_at, s_al, s_op, ix);
2909                     } else {
2910                         mop = get_memop(oi);
2911                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2912                     }
2913                     i = 1;
2914                 }
2915                 break;
2916             case INDEX_op_bswap16_i32:
2917             case INDEX_op_bswap16_i64:
2918             case INDEX_op_bswap32_i32:
2919             case INDEX_op_bswap32_i64:
2920             case INDEX_op_bswap64_i64:
2921                 {
2922                     TCGArg flags = op->args[k];
2923                     const char *name = NULL;
2924 
2925                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2926                         name = bswap_flag_name[flags];
2927                     }
2928                     if (name) {
2929                         col += ne_fprintf(f, ",%s", name);
2930                     } else {
2931                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2932                     }
2933                     i = k = 1;
2934                 }
2935                 break;
2936 #ifdef CONFIG_PLUGIN
2937             case INDEX_op_plugin_cb:
2938                 {
2939                     TCGArg from = op->args[k++];
2940                     const char *name = NULL;
2941 
2942                     if (from < ARRAY_SIZE(plugin_from_name)) {
2943                         name = plugin_from_name[from];
2944                     }
2945                     if (name) {
2946                         col += ne_fprintf(f, "%s", name);
2947                     } else {
2948                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2949                     }
2950                     i = 1;
2951                 }
2952                 break;
2953 #endif
2954             default:
2955                 i = 0;
2956                 break;
2957             }
2958             switch (c) {
2959             case INDEX_op_set_label:
2960             case INDEX_op_br:
2961             case INDEX_op_brcond:
2962             case INDEX_op_brcond2_i32:
2963                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2964                                   arg_label(op->args[k])->id);
2965                 i++, k++;
2966                 break;
2967             case INDEX_op_mb:
2968                 {
2969                     TCGBar membar = op->args[k];
2970                     const char *b_op, *m_op;
2971 
2972                     switch (membar & TCG_BAR_SC) {
2973                     case 0:
2974                         b_op = "none";
2975                         break;
2976                     case TCG_BAR_LDAQ:
2977                         b_op = "acq";
2978                         break;
2979                     case TCG_BAR_STRL:
2980                         b_op = "rel";
2981                         break;
2982                     case TCG_BAR_SC:
2983                         b_op = "seq";
2984                         break;
2985                     default:
2986                         g_assert_not_reached();
2987                     }
2988 
2989                     switch (membar & TCG_MO_ALL) {
2990                     case 0:
2991                         m_op = "none";
2992                         break;
2993                     case TCG_MO_LD_LD:
2994                         m_op = "rr";
2995                         break;
2996                     case TCG_MO_LD_ST:
2997                         m_op = "rw";
2998                         break;
2999                     case TCG_MO_ST_LD:
3000                         m_op = "wr";
3001                         break;
3002                     case TCG_MO_ST_ST:
3003                         m_op = "ww";
3004                         break;
3005                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3006                         m_op = "rr+rw";
3007                         break;
3008                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3009                         m_op = "rr+wr";
3010                         break;
3011                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3012                         m_op = "rr+ww";
3013                         break;
3014                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3015                         m_op = "rw+wr";
3016                         break;
3017                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3018                         m_op = "rw+ww";
3019                         break;
3020                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3021                         m_op = "wr+ww";
3022                         break;
3023                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3024                         m_op = "rr+rw+wr";
3025                         break;
3026                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3027                         m_op = "rr+rw+ww";
3028                         break;
3029                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3030                         m_op = "rr+wr+ww";
3031                         break;
3032                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3033                         m_op = "rw+wr+ww";
3034                         break;
3035                     case TCG_MO_ALL:
3036                         m_op = "all";
3037                         break;
3038                     default:
3039                         g_assert_not_reached();
3040                     }
3041 
3042                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3043                     i++, k++;
3044                 }
3045                 break;
3046             default:
3047                 break;
3048             }
3049             for (; i < nb_cargs; i++, k++) {
3050                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3051                                   op->args[k]);
3052             }
3053         }
3054 
3055         if (have_prefs || op->life) {
3056             for (; col < 40; ++col) {
3057                 putc(' ', f);
3058             }
3059         }
3060 
3061         if (op->life) {
3062             unsigned life = op->life;
3063 
3064             if (life & (SYNC_ARG * 3)) {
3065                 ne_fprintf(f, "  sync:");
3066                 for (i = 0; i < 2; ++i) {
3067                     if (life & (SYNC_ARG << i)) {
3068                         ne_fprintf(f, " %d", i);
3069                     }
3070                 }
3071             }
3072             life /= DEAD_ARG;
3073             if (life) {
3074                 ne_fprintf(f, "  dead:");
3075                 for (i = 0; life; ++i, life >>= 1) {
3076                     if (life & 1) {
3077                         ne_fprintf(f, " %d", i);
3078                     }
3079                 }
3080             }
3081         }
3082 
3083         if (have_prefs) {
3084             for (i = 0; i < nb_oargs; ++i) {
3085                 TCGRegSet set = output_pref(op, i);
3086 
3087                 if (i == 0) {
3088                     ne_fprintf(f, "  pref=");
3089                 } else {
3090                     ne_fprintf(f, ",");
3091                 }
3092                 if (set == 0) {
3093                     ne_fprintf(f, "none");
3094                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3095                     ne_fprintf(f, "all");
3096 #ifdef CONFIG_DEBUG_TCG
3097                 } else if (tcg_regset_single(set)) {
3098                     TCGReg reg = tcg_regset_first(set);
3099                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3100 #endif
3101                 } else if (TCG_TARGET_NB_REGS <= 32) {
3102                     ne_fprintf(f, "0x%x", (uint32_t)set);
3103                 } else {
3104                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3105                 }
3106             }
3107         }
3108 
3109         putc('\n', f);
3110     }
3111 }
3112 
3113 /* we give more priority to constraints with less registers */
3114 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3115 {
3116     int n;
3117 
3118     arg_ct += k;
3119     n = ctpop64(arg_ct->regs);
3120 
3121     /*
3122      * Sort constraints of a single register first, which includes output
3123      * aliases (which must exactly match the input already allocated).
3124      */
3125     if (n == 1 || arg_ct->oalias) {
3126         return INT_MAX;
3127     }
3128 
3129     /*
3130      * Sort register pairs next, first then second immediately after.
3131      * Arbitrarily sort multiple pairs by the index of the first reg;
3132      * there shouldn't be many pairs.
3133      */
3134     switch (arg_ct->pair) {
3135     case 1:
3136     case 3:
3137         return (k + 1) * 2;
3138     case 2:
3139         return (arg_ct->pair_index + 1) * 2 - 1;
3140     }
3141 
3142     /* Finally, sort by decreasing register count. */
3143     assert(n > 1);
3144     return -n;
3145 }
3146 
3147 /* sort from highest priority to lowest */
3148 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3149 {
3150     int i, j;
3151 
3152     for (i = 0; i < n; i++) {
3153         a[start + i].sort_index = start + i;
3154     }
3155     if (n <= 1) {
3156         return;
3157     }
3158     for (i = 0; i < n - 1; i++) {
3159         for (j = i + 1; j < n; j++) {
3160             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3161             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3162             if (p1 < p2) {
3163                 int tmp = a[start + i].sort_index;
3164                 a[start + i].sort_index = a[start + j].sort_index;
3165                 a[start + j].sort_index = tmp;
3166             }
3167         }
3168     }
3169 }
3170 
3171 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3172 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3173 
3174 static void process_constraint_sets(void)
3175 {
3176     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3177         const TCGConstraintSet *tdefs = &constraint_sets[c];
3178         TCGArgConstraint *args_ct = all_cts[c];
3179         int nb_oargs = tdefs->nb_oargs;
3180         int nb_iargs = tdefs->nb_iargs;
3181         int nb_args = nb_oargs + nb_iargs;
3182         bool saw_alias_pair = false;
3183 
3184         for (int i = 0; i < nb_args; i++) {
3185             const char *ct_str = tdefs->args_ct_str[i];
3186             bool input_p = i >= nb_oargs;
3187             int o;
3188 
3189             switch (*ct_str) {
3190             case '0' ... '9':
3191                 o = *ct_str - '0';
3192                 tcg_debug_assert(input_p);
3193                 tcg_debug_assert(o < nb_oargs);
3194                 tcg_debug_assert(args_ct[o].regs != 0);
3195                 tcg_debug_assert(!args_ct[o].oalias);
3196                 args_ct[i] = args_ct[o];
3197                 /* The output sets oalias.  */
3198                 args_ct[o].oalias = 1;
3199                 args_ct[o].alias_index = i;
3200                 /* The input sets ialias. */
3201                 args_ct[i].ialias = 1;
3202                 args_ct[i].alias_index = o;
3203                 if (args_ct[i].pair) {
3204                     saw_alias_pair = true;
3205                 }
3206                 tcg_debug_assert(ct_str[1] == '\0');
3207                 continue;
3208 
3209             case '&':
3210                 tcg_debug_assert(!input_p);
3211                 args_ct[i].newreg = true;
3212                 ct_str++;
3213                 break;
3214 
3215             case 'p': /* plus */
3216                 /* Allocate to the register after the previous. */
3217                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3218                 o = i - 1;
3219                 tcg_debug_assert(!args_ct[o].pair);
3220                 tcg_debug_assert(!args_ct[o].ct);
3221                 args_ct[i] = (TCGArgConstraint){
3222                     .pair = 2,
3223                     .pair_index = o,
3224                     .regs = args_ct[o].regs << 1,
3225                     .newreg = args_ct[o].newreg,
3226                 };
3227                 args_ct[o].pair = 1;
3228                 args_ct[o].pair_index = i;
3229                 tcg_debug_assert(ct_str[1] == '\0');
3230                 continue;
3231 
3232             case 'm': /* minus */
3233                 /* Allocate to the register before the previous. */
3234                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3235                 o = i - 1;
3236                 tcg_debug_assert(!args_ct[o].pair);
3237                 tcg_debug_assert(!args_ct[o].ct);
3238                 args_ct[i] = (TCGArgConstraint){
3239                     .pair = 1,
3240                     .pair_index = o,
3241                     .regs = args_ct[o].regs >> 1,
3242                     .newreg = args_ct[o].newreg,
3243                 };
3244                 args_ct[o].pair = 2;
3245                 args_ct[o].pair_index = i;
3246                 tcg_debug_assert(ct_str[1] == '\0');
3247                 continue;
3248             }
3249 
3250             do {
3251                 switch (*ct_str) {
3252                 case 'i':
3253                     args_ct[i].ct |= TCG_CT_CONST;
3254                     break;
3255 #ifdef TCG_REG_ZERO
3256                 case 'z':
3257                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3258                     break;
3259 #endif
3260 
3261                 /* Include all of the target-specific constraints. */
3262 
3263 #undef CONST
3264 #define CONST(CASE, MASK) \
3265     case CASE: args_ct[i].ct |= MASK; break;
3266 #define REGS(CASE, MASK) \
3267     case CASE: args_ct[i].regs |= MASK; break;
3268 
3269 #include "tcg-target-con-str.h"
3270 
3271 #undef REGS
3272 #undef CONST
3273                 default:
3274                 case '0' ... '9':
3275                 case '&':
3276                 case 'p':
3277                 case 'm':
3278                     /* Typo in TCGConstraintSet constraint. */
3279                     g_assert_not_reached();
3280                 }
3281             } while (*++ct_str != '\0');
3282         }
3283 
3284         /*
3285          * Fix up output pairs that are aliased with inputs.
3286          * When we created the alias, we copied pair from the output.
3287          * There are three cases:
3288          *    (1a) Pairs of inputs alias pairs of outputs.
3289          *    (1b) One input aliases the first of a pair of outputs.
3290          *    (2)  One input aliases the second of a pair of outputs.
3291          *
3292          * Case 1a is handled by making sure that the pair_index'es are
3293          * properly updated so that they appear the same as a pair of inputs.
3294          *
3295          * Case 1b is handled by setting the pair_index of the input to
3296          * itself, simply so it doesn't point to an unrelated argument.
3297          * Since we don't encounter the "second" during the input allocation
3298          * phase, nothing happens with the second half of the input pair.
3299          *
3300          * Case 2 is handled by setting the second input to pair=3, the
3301          * first output to pair=3, and the pair_index'es to match.
3302          */
3303         if (saw_alias_pair) {
3304             for (int i = nb_oargs; i < nb_args; i++) {
3305                 int o, o2, i2;
3306 
3307                 /*
3308                  * Since [0-9pm] must be alone in the constraint string,
3309                  * the only way they can both be set is if the pair comes
3310                  * from the output alias.
3311                  */
3312                 if (!args_ct[i].ialias) {
3313                     continue;
3314                 }
3315                 switch (args_ct[i].pair) {
3316                 case 0:
3317                     break;
3318                 case 1:
3319                     o = args_ct[i].alias_index;
3320                     o2 = args_ct[o].pair_index;
3321                     tcg_debug_assert(args_ct[o].pair == 1);
3322                     tcg_debug_assert(args_ct[o2].pair == 2);
3323                     if (args_ct[o2].oalias) {
3324                         /* Case 1a */
3325                         i2 = args_ct[o2].alias_index;
3326                         tcg_debug_assert(args_ct[i2].pair == 2);
3327                         args_ct[i2].pair_index = i;
3328                         args_ct[i].pair_index = i2;
3329                     } else {
3330                         /* Case 1b */
3331                         args_ct[i].pair_index = i;
3332                     }
3333                     break;
3334                 case 2:
3335                     o = args_ct[i].alias_index;
3336                     o2 = args_ct[o].pair_index;
3337                     tcg_debug_assert(args_ct[o].pair == 2);
3338                     tcg_debug_assert(args_ct[o2].pair == 1);
3339                     if (args_ct[o2].oalias) {
3340                         /* Case 1a */
3341                         i2 = args_ct[o2].alias_index;
3342                         tcg_debug_assert(args_ct[i2].pair == 1);
3343                         args_ct[i2].pair_index = i;
3344                         args_ct[i].pair_index = i2;
3345                     } else {
3346                         /* Case 2 */
3347                         args_ct[i].pair = 3;
3348                         args_ct[o2].pair = 3;
3349                         args_ct[i].pair_index = o2;
3350                         args_ct[o2].pair_index = i;
3351                     }
3352                     break;
3353                 default:
3354                     g_assert_not_reached();
3355                 }
3356             }
3357         }
3358 
3359         /* sort the constraints (XXX: this is just an heuristic) */
3360         sort_constraints(args_ct, 0, nb_oargs);
3361         sort_constraints(args_ct, nb_oargs, nb_iargs);
3362     }
3363 }
3364 
3365 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3366 {
3367     TCGOpcode opc = op->opc;
3368     TCGType type = TCGOP_TYPE(op);
3369     unsigned flags = TCGOP_FLAGS(op);
3370     const TCGOpDef *def = &tcg_op_defs[opc];
3371     const TCGOutOp *outop = all_outop[opc];
3372     TCGConstraintSetIndex con_set;
3373 
3374     if (def->flags & TCG_OPF_NOT_PRESENT) {
3375         return empty_cts;
3376     }
3377 
3378     if (outop) {
3379         con_set = outop->static_constraint;
3380         if (con_set == C_Dynamic) {
3381             con_set = outop->dynamic_constraint(type, flags);
3382         }
3383     } else {
3384         con_set = tcg_target_op_def(opc, type, flags);
3385     }
3386     tcg_debug_assert(con_set >= 0);
3387     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3388 
3389     /* The constraint arguments must match TCGOpcode arguments. */
3390     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3391     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3392 
3393     return all_cts[con_set];
3394 }
3395 
3396 static void remove_label_use(TCGOp *op, int idx)
3397 {
3398     TCGLabel *label = arg_label(op->args[idx]);
3399     TCGLabelUse *use;
3400 
3401     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3402         if (use->op == op) {
3403             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3404             return;
3405         }
3406     }
3407     g_assert_not_reached();
3408 }
3409 
3410 void tcg_op_remove(TCGContext *s, TCGOp *op)
3411 {
3412     switch (op->opc) {
3413     case INDEX_op_br:
3414         remove_label_use(op, 0);
3415         break;
3416     case INDEX_op_brcond:
3417         remove_label_use(op, 3);
3418         break;
3419     case INDEX_op_brcond2_i32:
3420         remove_label_use(op, 5);
3421         break;
3422     default:
3423         break;
3424     }
3425 
3426     QTAILQ_REMOVE(&s->ops, op, link);
3427     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3428     s->nb_ops--;
3429 }
3430 
3431 void tcg_remove_ops_after(TCGOp *op)
3432 {
3433     TCGContext *s = tcg_ctx;
3434 
3435     while (true) {
3436         TCGOp *last = tcg_last_op();
3437         if (last == op) {
3438             return;
3439         }
3440         tcg_op_remove(s, last);
3441     }
3442 }
3443 
3444 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3445 {
3446     TCGContext *s = tcg_ctx;
3447     TCGOp *op = NULL;
3448 
3449     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3450         QTAILQ_FOREACH(op, &s->free_ops, link) {
3451             if (nargs <= op->nargs) {
3452                 QTAILQ_REMOVE(&s->free_ops, op, link);
3453                 nargs = op->nargs;
3454                 goto found;
3455             }
3456         }
3457     }
3458 
3459     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3460     nargs = MAX(4, nargs);
3461     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3462 
3463  found:
3464     memset(op, 0, offsetof(TCGOp, link));
3465     op->opc = opc;
3466     op->nargs = nargs;
3467 
3468     /* Check for bitfield overflow. */
3469     tcg_debug_assert(op->nargs == nargs);
3470 
3471     s->nb_ops++;
3472     return op;
3473 }
3474 
3475 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3476 {
3477     TCGOp *op = tcg_op_alloc(opc, nargs);
3478 
3479     if (tcg_ctx->emit_before_op) {
3480         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3481     } else {
3482         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3483     }
3484     return op;
3485 }
3486 
3487 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3488                             TCGOpcode opc, TCGType type, unsigned nargs)
3489 {
3490     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3491 
3492     TCGOP_TYPE(new_op) = type;
3493     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3494     return new_op;
3495 }
3496 
3497 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3498                            TCGOpcode opc, TCGType type, unsigned nargs)
3499 {
3500     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3501 
3502     TCGOP_TYPE(new_op) = type;
3503     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3504     return new_op;
3505 }
3506 
3507 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3508 {
3509     TCGLabelUse *u;
3510 
3511     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3512         TCGOp *op = u->op;
3513         switch (op->opc) {
3514         case INDEX_op_br:
3515             op->args[0] = label_arg(to);
3516             break;
3517         case INDEX_op_brcond:
3518             op->args[3] = label_arg(to);
3519             break;
3520         case INDEX_op_brcond2_i32:
3521             op->args[5] = label_arg(to);
3522             break;
3523         default:
3524             g_assert_not_reached();
3525         }
3526     }
3527 
3528     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3529 }
3530 
3531 /* Reachable analysis : remove unreachable code.  */
3532 static void __attribute__((noinline))
3533 reachable_code_pass(TCGContext *s)
3534 {
3535     TCGOp *op, *op_next, *op_prev;
3536     bool dead = false;
3537 
3538     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3539         bool remove = dead;
3540         TCGLabel *label;
3541 
3542         switch (op->opc) {
3543         case INDEX_op_set_label:
3544             label = arg_label(op->args[0]);
3545 
3546             /*
3547              * Note that the first op in the TB is always a load,
3548              * so there is always something before a label.
3549              */
3550             op_prev = QTAILQ_PREV(op, link);
3551 
3552             /*
3553              * If we find two sequential labels, move all branches to
3554              * reference the second label and remove the first label.
3555              * Do this before branch to next optimization, so that the
3556              * middle label is out of the way.
3557              */
3558             if (op_prev->opc == INDEX_op_set_label) {
3559                 move_label_uses(label, arg_label(op_prev->args[0]));
3560                 tcg_op_remove(s, op_prev);
3561                 op_prev = QTAILQ_PREV(op, link);
3562             }
3563 
3564             /*
3565              * Optimization can fold conditional branches to unconditional.
3566              * If we find a label which is preceded by an unconditional
3567              * branch to next, remove the branch.  We couldn't do this when
3568              * processing the branch because any dead code between the branch
3569              * and label had not yet been removed.
3570              */
3571             if (op_prev->opc == INDEX_op_br &&
3572                 label == arg_label(op_prev->args[0])) {
3573                 tcg_op_remove(s, op_prev);
3574                 /* Fall through means insns become live again.  */
3575                 dead = false;
3576             }
3577 
3578             if (QSIMPLEQ_EMPTY(&label->branches)) {
3579                 /*
3580                  * While there is an occasional backward branch, virtually
3581                  * all branches generated by the translators are forward.
3582                  * Which means that generally we will have already removed
3583                  * all references to the label that will be, and there is
3584                  * little to be gained by iterating.
3585                  */
3586                 remove = true;
3587             } else {
3588                 /* Once we see a label, insns become live again.  */
3589                 dead = false;
3590                 remove = false;
3591             }
3592             break;
3593 
3594         case INDEX_op_br:
3595         case INDEX_op_exit_tb:
3596         case INDEX_op_goto_ptr:
3597             /* Unconditional branches; everything following is dead.  */
3598             dead = true;
3599             break;
3600 
3601         case INDEX_op_call:
3602             /* Notice noreturn helper calls, raising exceptions.  */
3603             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3604                 dead = true;
3605             }
3606             break;
3607 
3608         case INDEX_op_insn_start:
3609             /* Never remove -- we need to keep these for unwind.  */
3610             remove = false;
3611             break;
3612 
3613         default:
3614             break;
3615         }
3616 
3617         if (remove) {
3618             tcg_op_remove(s, op);
3619         }
3620     }
3621 }
3622 
3623 #define TS_DEAD  1
3624 #define TS_MEM   2
3625 
3626 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3627 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3628 
3629 /* For liveness_pass_1, the register preferences for a given temp.  */
3630 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3631 {
3632     return ts->state_ptr;
3633 }
3634 
3635 /* For liveness_pass_1, reset the preferences for a given temp to the
3636  * maximal regset for its type.
3637  */
3638 static inline void la_reset_pref(TCGTemp *ts)
3639 {
3640     *la_temp_pref(ts)
3641         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3642 }
3643 
3644 /* liveness analysis: end of function: all temps are dead, and globals
3645    should be in memory. */
3646 static void la_func_end(TCGContext *s, int ng, int nt)
3647 {
3648     int i;
3649 
3650     for (i = 0; i < ng; ++i) {
3651         s->temps[i].state = TS_DEAD | TS_MEM;
3652         la_reset_pref(&s->temps[i]);
3653     }
3654     for (i = ng; i < nt; ++i) {
3655         s->temps[i].state = TS_DEAD;
3656         la_reset_pref(&s->temps[i]);
3657     }
3658 }
3659 
3660 /* liveness analysis: end of basic block: all temps are dead, globals
3661    and local temps should be in memory. */
3662 static void la_bb_end(TCGContext *s, int ng, int nt)
3663 {
3664     int i;
3665 
3666     for (i = 0; i < nt; ++i) {
3667         TCGTemp *ts = &s->temps[i];
3668         int state;
3669 
3670         switch (ts->kind) {
3671         case TEMP_FIXED:
3672         case TEMP_GLOBAL:
3673         case TEMP_TB:
3674             state = TS_DEAD | TS_MEM;
3675             break;
3676         case TEMP_EBB:
3677         case TEMP_CONST:
3678             state = TS_DEAD;
3679             break;
3680         default:
3681             g_assert_not_reached();
3682         }
3683         ts->state = state;
3684         la_reset_pref(ts);
3685     }
3686 }
3687 
3688 /* liveness analysis: sync globals back to memory.  */
3689 static void la_global_sync(TCGContext *s, int ng)
3690 {
3691     int i;
3692 
3693     for (i = 0; i < ng; ++i) {
3694         int state = s->temps[i].state;
3695         s->temps[i].state = state | TS_MEM;
3696         if (state == TS_DEAD) {
3697             /* If the global was previously dead, reset prefs.  */
3698             la_reset_pref(&s->temps[i]);
3699         }
3700     }
3701 }
3702 
3703 /*
3704  * liveness analysis: conditional branch: all temps are dead unless
3705  * explicitly live-across-conditional-branch, globals and local temps
3706  * should be synced.
3707  */
3708 static void la_bb_sync(TCGContext *s, int ng, int nt)
3709 {
3710     la_global_sync(s, ng);
3711 
3712     for (int i = ng; i < nt; ++i) {
3713         TCGTemp *ts = &s->temps[i];
3714         int state;
3715 
3716         switch (ts->kind) {
3717         case TEMP_TB:
3718             state = ts->state;
3719             ts->state = state | TS_MEM;
3720             if (state != TS_DEAD) {
3721                 continue;
3722             }
3723             break;
3724         case TEMP_EBB:
3725         case TEMP_CONST:
3726             continue;
3727         default:
3728             g_assert_not_reached();
3729         }
3730         la_reset_pref(&s->temps[i]);
3731     }
3732 }
3733 
3734 /* liveness analysis: sync globals back to memory and kill.  */
3735 static void la_global_kill(TCGContext *s, int ng)
3736 {
3737     int i;
3738 
3739     for (i = 0; i < ng; i++) {
3740         s->temps[i].state = TS_DEAD | TS_MEM;
3741         la_reset_pref(&s->temps[i]);
3742     }
3743 }
3744 
3745 /* liveness analysis: note live globals crossing calls.  */
3746 static void la_cross_call(TCGContext *s, int nt)
3747 {
3748     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3749     int i;
3750 
3751     for (i = 0; i < nt; i++) {
3752         TCGTemp *ts = &s->temps[i];
3753         if (!(ts->state & TS_DEAD)) {
3754             TCGRegSet *pset = la_temp_pref(ts);
3755             TCGRegSet set = *pset;
3756 
3757             set &= mask;
3758             /* If the combination is not possible, restart.  */
3759             if (set == 0) {
3760                 set = tcg_target_available_regs[ts->type] & mask;
3761             }
3762             *pset = set;
3763         }
3764     }
3765 }
3766 
3767 /*
3768  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3769  * to TEMP_EBB, if possible.
3770  */
3771 static void __attribute__((noinline))
3772 liveness_pass_0(TCGContext *s)
3773 {
3774     void * const multiple_ebb = (void *)(uintptr_t)-1;
3775     int nb_temps = s->nb_temps;
3776     TCGOp *op, *ebb;
3777 
3778     for (int i = s->nb_globals; i < nb_temps; ++i) {
3779         s->temps[i].state_ptr = NULL;
3780     }
3781 
3782     /*
3783      * Represent each EBB by the op at which it begins.  In the case of
3784      * the first EBB, this is the first op, otherwise it is a label.
3785      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3786      * within a single EBB, else MULTIPLE_EBB.
3787      */
3788     ebb = QTAILQ_FIRST(&s->ops);
3789     QTAILQ_FOREACH(op, &s->ops, link) {
3790         const TCGOpDef *def;
3791         int nb_oargs, nb_iargs;
3792 
3793         switch (op->opc) {
3794         case INDEX_op_set_label:
3795             ebb = op;
3796             continue;
3797         case INDEX_op_discard:
3798             continue;
3799         case INDEX_op_call:
3800             nb_oargs = TCGOP_CALLO(op);
3801             nb_iargs = TCGOP_CALLI(op);
3802             break;
3803         default:
3804             def = &tcg_op_defs[op->opc];
3805             nb_oargs = def->nb_oargs;
3806             nb_iargs = def->nb_iargs;
3807             break;
3808         }
3809 
3810         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3811             TCGTemp *ts = arg_temp(op->args[i]);
3812 
3813             if (ts->kind != TEMP_TB) {
3814                 continue;
3815             }
3816             if (ts->state_ptr == NULL) {
3817                 ts->state_ptr = ebb;
3818             } else if (ts->state_ptr != ebb) {
3819                 ts->state_ptr = multiple_ebb;
3820             }
3821         }
3822     }
3823 
3824     /*
3825      * For TEMP_TB that turned out not to be used beyond one EBB,
3826      * reduce the liveness to TEMP_EBB.
3827      */
3828     for (int i = s->nb_globals; i < nb_temps; ++i) {
3829         TCGTemp *ts = &s->temps[i];
3830         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3831             ts->kind = TEMP_EBB;
3832         }
3833     }
3834 }
3835 
3836 /* Liveness analysis : update the opc_arg_life array to tell if a
3837    given input arguments is dead. Instructions updating dead
3838    temporaries are removed. */
3839 static void __attribute__((noinline))
3840 liveness_pass_1(TCGContext *s)
3841 {
3842     int nb_globals = s->nb_globals;
3843     int nb_temps = s->nb_temps;
3844     TCGOp *op, *op_prev;
3845     TCGRegSet *prefs;
3846     int i;
3847 
3848     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3849     for (i = 0; i < nb_temps; ++i) {
3850         s->temps[i].state_ptr = prefs + i;
3851     }
3852 
3853     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3854     la_func_end(s, nb_globals, nb_temps);
3855 
3856     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3857         int nb_iargs, nb_oargs;
3858         TCGOpcode opc_new, opc_new2;
3859         TCGLifeData arg_life = 0;
3860         TCGTemp *ts;
3861         TCGOpcode opc = op->opc;
3862         const TCGOpDef *def = &tcg_op_defs[opc];
3863         const TCGArgConstraint *args_ct;
3864 
3865         switch (opc) {
3866         case INDEX_op_call:
3867             {
3868                 const TCGHelperInfo *info = tcg_call_info(op);
3869                 int call_flags = tcg_call_flags(op);
3870 
3871                 nb_oargs = TCGOP_CALLO(op);
3872                 nb_iargs = TCGOP_CALLI(op);
3873 
3874                 /* pure functions can be removed if their result is unused */
3875                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3876                     for (i = 0; i < nb_oargs; i++) {
3877                         ts = arg_temp(op->args[i]);
3878                         if (ts->state != TS_DEAD) {
3879                             goto do_not_remove_call;
3880                         }
3881                     }
3882                     goto do_remove;
3883                 }
3884             do_not_remove_call:
3885 
3886                 /* Output args are dead.  */
3887                 for (i = 0; i < nb_oargs; i++) {
3888                     ts = arg_temp(op->args[i]);
3889                     if (ts->state & TS_DEAD) {
3890                         arg_life |= DEAD_ARG << i;
3891                     }
3892                     if (ts->state & TS_MEM) {
3893                         arg_life |= SYNC_ARG << i;
3894                     }
3895                     ts->state = TS_DEAD;
3896                     la_reset_pref(ts);
3897                 }
3898 
3899                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3900                 memset(op->output_pref, 0, sizeof(op->output_pref));
3901 
3902                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3903                                     TCG_CALL_NO_READ_GLOBALS))) {
3904                     la_global_kill(s, nb_globals);
3905                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3906                     la_global_sync(s, nb_globals);
3907                 }
3908 
3909                 /* Record arguments that die in this helper.  */
3910                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3911                     ts = arg_temp(op->args[i]);
3912                     if (ts->state & TS_DEAD) {
3913                         arg_life |= DEAD_ARG << i;
3914                     }
3915                 }
3916 
3917                 /* For all live registers, remove call-clobbered prefs.  */
3918                 la_cross_call(s, nb_temps);
3919 
3920                 /*
3921                  * Input arguments are live for preceding opcodes.
3922                  *
3923                  * For those arguments that die, and will be allocated in
3924                  * registers, clear the register set for that arg, to be
3925                  * filled in below.  For args that will be on the stack,
3926                  * reset to any available reg.  Process arguments in reverse
3927                  * order so that if a temp is used more than once, the stack
3928                  * reset to max happens before the register reset to 0.
3929                  */
3930                 for (i = nb_iargs - 1; i >= 0; i--) {
3931                     const TCGCallArgumentLoc *loc = &info->in[i];
3932                     ts = arg_temp(op->args[nb_oargs + i]);
3933 
3934                     if (ts->state & TS_DEAD) {
3935                         switch (loc->kind) {
3936                         case TCG_CALL_ARG_NORMAL:
3937                         case TCG_CALL_ARG_EXTEND_U:
3938                         case TCG_CALL_ARG_EXTEND_S:
3939                             if (arg_slot_reg_p(loc->arg_slot)) {
3940                                 *la_temp_pref(ts) = 0;
3941                                 break;
3942                             }
3943                             /* fall through */
3944                         default:
3945                             *la_temp_pref(ts) =
3946                                 tcg_target_available_regs[ts->type];
3947                             break;
3948                         }
3949                         ts->state &= ~TS_DEAD;
3950                     }
3951                 }
3952 
3953                 /*
3954                  * For each input argument, add its input register to prefs.
3955                  * If a temp is used once, this produces a single set bit;
3956                  * if a temp is used multiple times, this produces a set.
3957                  */
3958                 for (i = 0; i < nb_iargs; i++) {
3959                     const TCGCallArgumentLoc *loc = &info->in[i];
3960                     ts = arg_temp(op->args[nb_oargs + i]);
3961 
3962                     switch (loc->kind) {
3963                     case TCG_CALL_ARG_NORMAL:
3964                     case TCG_CALL_ARG_EXTEND_U:
3965                     case TCG_CALL_ARG_EXTEND_S:
3966                         if (arg_slot_reg_p(loc->arg_slot)) {
3967                             tcg_regset_set_reg(*la_temp_pref(ts),
3968                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3969                         }
3970                         break;
3971                     default:
3972                         break;
3973                     }
3974                 }
3975             }
3976             break;
3977         case INDEX_op_insn_start:
3978             break;
3979         case INDEX_op_discard:
3980             /* mark the temporary as dead */
3981             ts = arg_temp(op->args[0]);
3982             ts->state = TS_DEAD;
3983             la_reset_pref(ts);
3984             break;
3985 
3986         case INDEX_op_add2_i32:
3987         case INDEX_op_add2_i64:
3988             opc_new = INDEX_op_add;
3989             goto do_addsub2;
3990         case INDEX_op_sub2_i32:
3991         case INDEX_op_sub2_i64:
3992             opc_new = INDEX_op_sub;
3993         do_addsub2:
3994             nb_iargs = 4;
3995             nb_oargs = 2;
3996             /* Test if the high part of the operation is dead, but not
3997                the low part.  The result can be optimized to a simple
3998                add or sub.  This happens often for x86_64 guest when the
3999                cpu mode is set to 32 bit.  */
4000             if (arg_temp(op->args[1])->state == TS_DEAD) {
4001                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4002                     goto do_remove;
4003                 }
4004                 /* Replace the opcode and adjust the args in place,
4005                    leaving 3 unused args at the end.  */
4006                 op->opc = opc = opc_new;
4007                 op->args[1] = op->args[2];
4008                 op->args[2] = op->args[4];
4009                 /* Fall through and mark the single-word operation live.  */
4010                 nb_iargs = 2;
4011                 nb_oargs = 1;
4012             }
4013             goto do_not_remove;
4014 
4015         case INDEX_op_muls2:
4016             opc_new = INDEX_op_mul;
4017             opc_new2 = INDEX_op_mulsh;
4018             goto do_mul2;
4019         case INDEX_op_mulu2:
4020             opc_new = INDEX_op_mul;
4021             opc_new2 = INDEX_op_muluh;
4022         do_mul2:
4023             nb_iargs = 2;
4024             nb_oargs = 2;
4025             if (arg_temp(op->args[1])->state == TS_DEAD) {
4026                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4027                     /* Both parts of the operation are dead.  */
4028                     goto do_remove;
4029                 }
4030                 /* The high part of the operation is dead; generate the low. */
4031                 op->opc = opc = opc_new;
4032                 op->args[1] = op->args[2];
4033                 op->args[2] = op->args[3];
4034             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4035                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4036                 /* The low part of the operation is dead; generate the high. */
4037                 op->opc = opc = opc_new2;
4038                 op->args[0] = op->args[1];
4039                 op->args[1] = op->args[2];
4040                 op->args[2] = op->args[3];
4041             } else {
4042                 goto do_not_remove;
4043             }
4044             /* Mark the single-word operation live.  */
4045             nb_oargs = 1;
4046             goto do_not_remove;
4047 
4048         default:
4049             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4050             nb_iargs = def->nb_iargs;
4051             nb_oargs = def->nb_oargs;
4052 
4053             /* Test if the operation can be removed because all
4054                its outputs are dead. We assume that nb_oargs == 0
4055                implies side effects */
4056             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4057                 for (i = 0; i < nb_oargs; i++) {
4058                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4059                         goto do_not_remove;
4060                     }
4061                 }
4062                 goto do_remove;
4063             }
4064             goto do_not_remove;
4065 
4066         do_remove:
4067             tcg_op_remove(s, op);
4068             break;
4069 
4070         do_not_remove:
4071             for (i = 0; i < nb_oargs; i++) {
4072                 ts = arg_temp(op->args[i]);
4073 
4074                 /* Remember the preference of the uses that followed.  */
4075                 if (i < ARRAY_SIZE(op->output_pref)) {
4076                     op->output_pref[i] = *la_temp_pref(ts);
4077                 }
4078 
4079                 /* Output args are dead.  */
4080                 if (ts->state & TS_DEAD) {
4081                     arg_life |= DEAD_ARG << i;
4082                 }
4083                 if (ts->state & TS_MEM) {
4084                     arg_life |= SYNC_ARG << i;
4085                 }
4086                 ts->state = TS_DEAD;
4087                 la_reset_pref(ts);
4088             }
4089 
4090             /* If end of basic block, update.  */
4091             if (def->flags & TCG_OPF_BB_EXIT) {
4092                 la_func_end(s, nb_globals, nb_temps);
4093             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4094                 la_bb_sync(s, nb_globals, nb_temps);
4095             } else if (def->flags & TCG_OPF_BB_END) {
4096                 la_bb_end(s, nb_globals, nb_temps);
4097             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4098                 la_global_sync(s, nb_globals);
4099                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4100                     la_cross_call(s, nb_temps);
4101                 }
4102             }
4103 
4104             /* Record arguments that die in this opcode.  */
4105             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4106                 ts = arg_temp(op->args[i]);
4107                 if (ts->state & TS_DEAD) {
4108                     arg_life |= DEAD_ARG << i;
4109                 }
4110             }
4111 
4112             /* Input arguments are live for preceding opcodes.  */
4113             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4114                 ts = arg_temp(op->args[i]);
4115                 if (ts->state & TS_DEAD) {
4116                     /* For operands that were dead, initially allow
4117                        all regs for the type.  */
4118                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4119                     ts->state &= ~TS_DEAD;
4120                 }
4121             }
4122 
4123             /* Incorporate constraints for this operand.  */
4124             switch (opc) {
4125             case INDEX_op_mov:
4126                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4127                    have proper constraints.  That said, special case
4128                    moves to propagate preferences backward.  */
4129                 if (IS_DEAD_ARG(1)) {
4130                     *la_temp_pref(arg_temp(op->args[0]))
4131                         = *la_temp_pref(arg_temp(op->args[1]));
4132                 }
4133                 break;
4134 
4135             default:
4136                 args_ct = opcode_args_ct(op);
4137                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4138                     const TCGArgConstraint *ct = &args_ct[i];
4139                     TCGRegSet set, *pset;
4140 
4141                     ts = arg_temp(op->args[i]);
4142                     pset = la_temp_pref(ts);
4143                     set = *pset;
4144 
4145                     set &= ct->regs;
4146                     if (ct->ialias) {
4147                         set &= output_pref(op, ct->alias_index);
4148                     }
4149                     /* If the combination is not possible, restart.  */
4150                     if (set == 0) {
4151                         set = ct->regs;
4152                     }
4153                     *pset = set;
4154                 }
4155                 break;
4156             }
4157             break;
4158         }
4159         op->life = arg_life;
4160     }
4161 }
4162 
4163 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4164 static bool __attribute__((noinline))
4165 liveness_pass_2(TCGContext *s)
4166 {
4167     int nb_globals = s->nb_globals;
4168     int nb_temps, i;
4169     bool changes = false;
4170     TCGOp *op, *op_next;
4171 
4172     /* Create a temporary for each indirect global.  */
4173     for (i = 0; i < nb_globals; ++i) {
4174         TCGTemp *its = &s->temps[i];
4175         if (its->indirect_reg) {
4176             TCGTemp *dts = tcg_temp_alloc(s);
4177             dts->type = its->type;
4178             dts->base_type = its->base_type;
4179             dts->temp_subindex = its->temp_subindex;
4180             dts->kind = TEMP_EBB;
4181             its->state_ptr = dts;
4182         } else {
4183             its->state_ptr = NULL;
4184         }
4185         /* All globals begin dead.  */
4186         its->state = TS_DEAD;
4187     }
4188     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4189         TCGTemp *its = &s->temps[i];
4190         its->state_ptr = NULL;
4191         its->state = TS_DEAD;
4192     }
4193 
4194     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4195         TCGOpcode opc = op->opc;
4196         const TCGOpDef *def = &tcg_op_defs[opc];
4197         TCGLifeData arg_life = op->life;
4198         int nb_iargs, nb_oargs, call_flags;
4199         TCGTemp *arg_ts, *dir_ts;
4200 
4201         if (opc == INDEX_op_call) {
4202             nb_oargs = TCGOP_CALLO(op);
4203             nb_iargs = TCGOP_CALLI(op);
4204             call_flags = tcg_call_flags(op);
4205         } else {
4206             nb_iargs = def->nb_iargs;
4207             nb_oargs = def->nb_oargs;
4208 
4209             /* Set flags similar to how calls require.  */
4210             if (def->flags & TCG_OPF_COND_BRANCH) {
4211                 /* Like reading globals: sync_globals */
4212                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4213             } else if (def->flags & TCG_OPF_BB_END) {
4214                 /* Like writing globals: save_globals */
4215                 call_flags = 0;
4216             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4217                 /* Like reading globals: sync_globals */
4218                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4219             } else {
4220                 /* No effect on globals.  */
4221                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4222                               TCG_CALL_NO_WRITE_GLOBALS);
4223             }
4224         }
4225 
4226         /* Make sure that input arguments are available.  */
4227         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4228             arg_ts = arg_temp(op->args[i]);
4229             dir_ts = arg_ts->state_ptr;
4230             if (dir_ts && arg_ts->state == TS_DEAD) {
4231                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4232                                   ? INDEX_op_ld_i32
4233                                   : INDEX_op_ld_i64);
4234                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4235                                                   arg_ts->type, 3);
4236 
4237                 lop->args[0] = temp_arg(dir_ts);
4238                 lop->args[1] = temp_arg(arg_ts->mem_base);
4239                 lop->args[2] = arg_ts->mem_offset;
4240 
4241                 /* Loaded, but synced with memory.  */
4242                 arg_ts->state = TS_MEM;
4243             }
4244         }
4245 
4246         /* Perform input replacement, and mark inputs that became dead.
4247            No action is required except keeping temp_state up to date
4248            so that we reload when needed.  */
4249         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4250             arg_ts = arg_temp(op->args[i]);
4251             dir_ts = arg_ts->state_ptr;
4252             if (dir_ts) {
4253                 op->args[i] = temp_arg(dir_ts);
4254                 changes = true;
4255                 if (IS_DEAD_ARG(i)) {
4256                     arg_ts->state = TS_DEAD;
4257                 }
4258             }
4259         }
4260 
4261         /* Liveness analysis should ensure that the following are
4262            all correct, for call sites and basic block end points.  */
4263         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4264             /* Nothing to do */
4265         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4266             for (i = 0; i < nb_globals; ++i) {
4267                 /* Liveness should see that globals are synced back,
4268                    that is, either TS_DEAD or TS_MEM.  */
4269                 arg_ts = &s->temps[i];
4270                 tcg_debug_assert(arg_ts->state_ptr == 0
4271                                  || arg_ts->state != 0);
4272             }
4273         } else {
4274             for (i = 0; i < nb_globals; ++i) {
4275                 /* Liveness should see that globals are saved back,
4276                    that is, TS_DEAD, waiting to be reloaded.  */
4277                 arg_ts = &s->temps[i];
4278                 tcg_debug_assert(arg_ts->state_ptr == 0
4279                                  || arg_ts->state == TS_DEAD);
4280             }
4281         }
4282 
4283         /* Outputs become available.  */
4284         if (opc == INDEX_op_mov) {
4285             arg_ts = arg_temp(op->args[0]);
4286             dir_ts = arg_ts->state_ptr;
4287             if (dir_ts) {
4288                 op->args[0] = temp_arg(dir_ts);
4289                 changes = true;
4290 
4291                 /* The output is now live and modified.  */
4292                 arg_ts->state = 0;
4293 
4294                 if (NEED_SYNC_ARG(0)) {
4295                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4296                                       ? INDEX_op_st_i32
4297                                       : INDEX_op_st_i64);
4298                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4299                                                      arg_ts->type, 3);
4300                     TCGTemp *out_ts = dir_ts;
4301 
4302                     if (IS_DEAD_ARG(0)) {
4303                         out_ts = arg_temp(op->args[1]);
4304                         arg_ts->state = TS_DEAD;
4305                         tcg_op_remove(s, op);
4306                     } else {
4307                         arg_ts->state = TS_MEM;
4308                     }
4309 
4310                     sop->args[0] = temp_arg(out_ts);
4311                     sop->args[1] = temp_arg(arg_ts->mem_base);
4312                     sop->args[2] = arg_ts->mem_offset;
4313                 } else {
4314                     tcg_debug_assert(!IS_DEAD_ARG(0));
4315                 }
4316             }
4317         } else {
4318             for (i = 0; i < nb_oargs; i++) {
4319                 arg_ts = arg_temp(op->args[i]);
4320                 dir_ts = arg_ts->state_ptr;
4321                 if (!dir_ts) {
4322                     continue;
4323                 }
4324                 op->args[i] = temp_arg(dir_ts);
4325                 changes = true;
4326 
4327                 /* The output is now live and modified.  */
4328                 arg_ts->state = 0;
4329 
4330                 /* Sync outputs upon their last write.  */
4331                 if (NEED_SYNC_ARG(i)) {
4332                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4333                                       ? INDEX_op_st_i32
4334                                       : INDEX_op_st_i64);
4335                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4336                                                      arg_ts->type, 3);
4337 
4338                     sop->args[0] = temp_arg(dir_ts);
4339                     sop->args[1] = temp_arg(arg_ts->mem_base);
4340                     sop->args[2] = arg_ts->mem_offset;
4341 
4342                     arg_ts->state = TS_MEM;
4343                 }
4344                 /* Drop outputs that are dead.  */
4345                 if (IS_DEAD_ARG(i)) {
4346                     arg_ts->state = TS_DEAD;
4347                 }
4348             }
4349         }
4350     }
4351 
4352     return changes;
4353 }
4354 
4355 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4356 {
4357     intptr_t off;
4358     int size, align;
4359 
4360     /* When allocating an object, look at the full type. */
4361     size = tcg_type_size(ts->base_type);
4362     switch (ts->base_type) {
4363     case TCG_TYPE_I32:
4364         align = 4;
4365         break;
4366     case TCG_TYPE_I64:
4367     case TCG_TYPE_V64:
4368         align = 8;
4369         break;
4370     case TCG_TYPE_I128:
4371     case TCG_TYPE_V128:
4372     case TCG_TYPE_V256:
4373         /*
4374          * Note that we do not require aligned storage for V256,
4375          * and that we provide alignment for I128 to match V128,
4376          * even if that's above what the host ABI requires.
4377          */
4378         align = 16;
4379         break;
4380     default:
4381         g_assert_not_reached();
4382     }
4383 
4384     /*
4385      * Assume the stack is sufficiently aligned.
4386      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4387      * and do not require 16 byte vector alignment.  This seems slightly
4388      * easier than fully parameterizing the above switch statement.
4389      */
4390     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4391     off = ROUND_UP(s->current_frame_offset, align);
4392 
4393     /* If we've exhausted the stack frame, restart with a smaller TB. */
4394     if (off + size > s->frame_end) {
4395         tcg_raise_tb_overflow(s);
4396     }
4397     s->current_frame_offset = off + size;
4398 #if defined(__sparc__)
4399     off += TCG_TARGET_STACK_BIAS;
4400 #endif
4401 
4402     /* If the object was subdivided, assign memory to all the parts. */
4403     if (ts->base_type != ts->type) {
4404         int part_size = tcg_type_size(ts->type);
4405         int part_count = size / part_size;
4406 
4407         /*
4408          * Each part is allocated sequentially in tcg_temp_new_internal.
4409          * Jump back to the first part by subtracting the current index.
4410          */
4411         ts -= ts->temp_subindex;
4412         for (int i = 0; i < part_count; ++i) {
4413             ts[i].mem_offset = off + i * part_size;
4414             ts[i].mem_base = s->frame_temp;
4415             ts[i].mem_allocated = 1;
4416         }
4417     } else {
4418         ts->mem_offset = off;
4419         ts->mem_base = s->frame_temp;
4420         ts->mem_allocated = 1;
4421     }
4422 }
4423 
4424 /* Assign @reg to @ts, and update reg_to_temp[]. */
4425 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4426 {
4427     if (ts->val_type == TEMP_VAL_REG) {
4428         TCGReg old = ts->reg;
4429         tcg_debug_assert(s->reg_to_temp[old] == ts);
4430         if (old == reg) {
4431             return;
4432         }
4433         s->reg_to_temp[old] = NULL;
4434     }
4435     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4436     s->reg_to_temp[reg] = ts;
4437     ts->val_type = TEMP_VAL_REG;
4438     ts->reg = reg;
4439 }
4440 
4441 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4442 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4443 {
4444     tcg_debug_assert(type != TEMP_VAL_REG);
4445     if (ts->val_type == TEMP_VAL_REG) {
4446         TCGReg reg = ts->reg;
4447         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4448         s->reg_to_temp[reg] = NULL;
4449     }
4450     ts->val_type = type;
4451 }
4452 
4453 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4454 
4455 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4456    mark it free; otherwise mark it dead.  */
4457 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4458 {
4459     TCGTempVal new_type;
4460 
4461     switch (ts->kind) {
4462     case TEMP_FIXED:
4463         return;
4464     case TEMP_GLOBAL:
4465     case TEMP_TB:
4466         new_type = TEMP_VAL_MEM;
4467         break;
4468     case TEMP_EBB:
4469         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4470         break;
4471     case TEMP_CONST:
4472         new_type = TEMP_VAL_CONST;
4473         break;
4474     default:
4475         g_assert_not_reached();
4476     }
4477     set_temp_val_nonreg(s, ts, new_type);
4478 }
4479 
4480 /* Mark a temporary as dead.  */
4481 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4482 {
4483     temp_free_or_dead(s, ts, 1);
4484 }
4485 
4486 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4487    registers needs to be allocated to store a constant.  If 'free_or_dead'
4488    is non-zero, subsequently release the temporary; if it is positive, the
4489    temp is dead; if it is negative, the temp is free.  */
4490 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4491                       TCGRegSet preferred_regs, int free_or_dead)
4492 {
4493     if (!temp_readonly(ts) && !ts->mem_coherent) {
4494         if (!ts->mem_allocated) {
4495             temp_allocate_frame(s, ts);
4496         }
4497         switch (ts->val_type) {
4498         case TEMP_VAL_CONST:
4499             /* If we're going to free the temp immediately, then we won't
4500                require it later in a register, so attempt to store the
4501                constant to memory directly.  */
4502             if (free_or_dead
4503                 && tcg_out_sti(s, ts->type, ts->val,
4504                                ts->mem_base->reg, ts->mem_offset)) {
4505                 break;
4506             }
4507             temp_load(s, ts, tcg_target_available_regs[ts->type],
4508                       allocated_regs, preferred_regs);
4509             /* fallthrough */
4510 
4511         case TEMP_VAL_REG:
4512             tcg_out_st(s, ts->type, ts->reg,
4513                        ts->mem_base->reg, ts->mem_offset);
4514             break;
4515 
4516         case TEMP_VAL_MEM:
4517             break;
4518 
4519         case TEMP_VAL_DEAD:
4520         default:
4521             g_assert_not_reached();
4522         }
4523         ts->mem_coherent = 1;
4524     }
4525     if (free_or_dead) {
4526         temp_free_or_dead(s, ts, free_or_dead);
4527     }
4528 }
4529 
4530 /* free register 'reg' by spilling the corresponding temporary if necessary */
4531 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4532 {
4533     TCGTemp *ts = s->reg_to_temp[reg];
4534     if (ts != NULL) {
4535         temp_sync(s, ts, allocated_regs, 0, -1);
4536     }
4537 }
4538 
4539 /**
4540  * tcg_reg_alloc:
4541  * @required_regs: Set of registers in which we must allocate.
4542  * @allocated_regs: Set of registers which must be avoided.
4543  * @preferred_regs: Set of registers we should prefer.
4544  * @rev: True if we search the registers in "indirect" order.
4545  *
4546  * The allocated register must be in @required_regs & ~@allocated_regs,
4547  * but if we can put it in @preferred_regs we may save a move later.
4548  */
4549 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4550                             TCGRegSet allocated_regs,
4551                             TCGRegSet preferred_regs, bool rev)
4552 {
4553     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4554     TCGRegSet reg_ct[2];
4555     const int *order;
4556 
4557     reg_ct[1] = required_regs & ~allocated_regs;
4558     tcg_debug_assert(reg_ct[1] != 0);
4559     reg_ct[0] = reg_ct[1] & preferred_regs;
4560 
4561     /* Skip the preferred_regs option if it cannot be satisfied,
4562        or if the preference made no difference.  */
4563     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4564 
4565     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4566 
4567     /* Try free registers, preferences first.  */
4568     for (j = f; j < 2; j++) {
4569         TCGRegSet set = reg_ct[j];
4570 
4571         if (tcg_regset_single(set)) {
4572             /* One register in the set.  */
4573             TCGReg reg = tcg_regset_first(set);
4574             if (s->reg_to_temp[reg] == NULL) {
4575                 return reg;
4576             }
4577         } else {
4578             for (i = 0; i < n; i++) {
4579                 TCGReg reg = order[i];
4580                 if (s->reg_to_temp[reg] == NULL &&
4581                     tcg_regset_test_reg(set, reg)) {
4582                     return reg;
4583                 }
4584             }
4585         }
4586     }
4587 
4588     /* We must spill something.  */
4589     for (j = f; j < 2; j++) {
4590         TCGRegSet set = reg_ct[j];
4591 
4592         if (tcg_regset_single(set)) {
4593             /* One register in the set.  */
4594             TCGReg reg = tcg_regset_first(set);
4595             tcg_reg_free(s, reg, allocated_regs);
4596             return reg;
4597         } else {
4598             for (i = 0; i < n; i++) {
4599                 TCGReg reg = order[i];
4600                 if (tcg_regset_test_reg(set, reg)) {
4601                     tcg_reg_free(s, reg, allocated_regs);
4602                     return reg;
4603                 }
4604             }
4605         }
4606     }
4607 
4608     g_assert_not_reached();
4609 }
4610 
4611 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4612                                  TCGRegSet allocated_regs,
4613                                  TCGRegSet preferred_regs, bool rev)
4614 {
4615     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4616     TCGRegSet reg_ct[2];
4617     const int *order;
4618 
4619     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4620     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4621     tcg_debug_assert(reg_ct[1] != 0);
4622     reg_ct[0] = reg_ct[1] & preferred_regs;
4623 
4624     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4625 
4626     /*
4627      * Skip the preferred_regs option if it cannot be satisfied,
4628      * or if the preference made no difference.
4629      */
4630     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4631 
4632     /*
4633      * Minimize the number of flushes by looking for 2 free registers first,
4634      * then a single flush, then two flushes.
4635      */
4636     for (fmin = 2; fmin >= 0; fmin--) {
4637         for (j = k; j < 2; j++) {
4638             TCGRegSet set = reg_ct[j];
4639 
4640             for (i = 0; i < n; i++) {
4641                 TCGReg reg = order[i];
4642 
4643                 if (tcg_regset_test_reg(set, reg)) {
4644                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4645                     if (f >= fmin) {
4646                         tcg_reg_free(s, reg, allocated_regs);
4647                         tcg_reg_free(s, reg + 1, allocated_regs);
4648                         return reg;
4649                     }
4650                 }
4651             }
4652         }
4653     }
4654     g_assert_not_reached();
4655 }
4656 
4657 /* Make sure the temporary is in a register.  If needed, allocate the register
4658    from DESIRED while avoiding ALLOCATED.  */
4659 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4660                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4661 {
4662     TCGReg reg;
4663 
4664     switch (ts->val_type) {
4665     case TEMP_VAL_REG:
4666         return;
4667     case TEMP_VAL_CONST:
4668         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4669                             preferred_regs, ts->indirect_base);
4670         if (ts->type <= TCG_TYPE_I64) {
4671             tcg_out_movi(s, ts->type, reg, ts->val);
4672         } else {
4673             uint64_t val = ts->val;
4674             MemOp vece = MO_64;
4675 
4676             /*
4677              * Find the minimal vector element that matches the constant.
4678              * The targets will, in general, have to do this search anyway,
4679              * do this generically.
4680              */
4681             if (val == dup_const(MO_8, val)) {
4682                 vece = MO_8;
4683             } else if (val == dup_const(MO_16, val)) {
4684                 vece = MO_16;
4685             } else if (val == dup_const(MO_32, val)) {
4686                 vece = MO_32;
4687             }
4688 
4689             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4690         }
4691         ts->mem_coherent = 0;
4692         break;
4693     case TEMP_VAL_MEM:
4694         if (!ts->mem_allocated) {
4695             temp_allocate_frame(s, ts);
4696         }
4697         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4698                             preferred_regs, ts->indirect_base);
4699         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4700         ts->mem_coherent = 1;
4701         break;
4702     case TEMP_VAL_DEAD:
4703     default:
4704         g_assert_not_reached();
4705     }
4706     set_temp_val_reg(s, ts, reg);
4707 }
4708 
4709 /* Save a temporary to memory. 'allocated_regs' is used in case a
4710    temporary registers needs to be allocated to store a constant.  */
4711 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4712 {
4713     /* The liveness analysis already ensures that globals are back
4714        in memory. Keep an tcg_debug_assert for safety. */
4715     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4716 }
4717 
4718 /* save globals to their canonical location and assume they can be
4719    modified be the following code. 'allocated_regs' is used in case a
4720    temporary registers needs to be allocated to store a constant. */
4721 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4722 {
4723     int i, n;
4724 
4725     for (i = 0, n = s->nb_globals; i < n; i++) {
4726         temp_save(s, &s->temps[i], allocated_regs);
4727     }
4728 }
4729 
4730 /* sync globals to their canonical location and assume they can be
4731    read by the following code. 'allocated_regs' is used in case a
4732    temporary registers needs to be allocated to store a constant. */
4733 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4734 {
4735     int i, n;
4736 
4737     for (i = 0, n = s->nb_globals; i < n; i++) {
4738         TCGTemp *ts = &s->temps[i];
4739         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4740                          || ts->kind == TEMP_FIXED
4741                          || ts->mem_coherent);
4742     }
4743 }
4744 
4745 /* at the end of a basic block, we assume all temporaries are dead and
4746    all globals are stored at their canonical location. */
4747 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4748 {
4749     int i;
4750 
4751     for (i = s->nb_globals; i < s->nb_temps; i++) {
4752         TCGTemp *ts = &s->temps[i];
4753 
4754         switch (ts->kind) {
4755         case TEMP_TB:
4756             temp_save(s, ts, allocated_regs);
4757             break;
4758         case TEMP_EBB:
4759             /* The liveness analysis already ensures that temps are dead.
4760                Keep an tcg_debug_assert for safety. */
4761             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4762             break;
4763         case TEMP_CONST:
4764             /* Similarly, we should have freed any allocated register. */
4765             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4766             break;
4767         default:
4768             g_assert_not_reached();
4769         }
4770     }
4771 
4772     save_globals(s, allocated_regs);
4773 }
4774 
4775 /*
4776  * At a conditional branch, we assume all temporaries are dead unless
4777  * explicitly live-across-conditional-branch; all globals and local
4778  * temps are synced to their location.
4779  */
4780 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4781 {
4782     sync_globals(s, allocated_regs);
4783 
4784     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4785         TCGTemp *ts = &s->temps[i];
4786         /*
4787          * The liveness analysis already ensures that temps are dead.
4788          * Keep tcg_debug_asserts for safety.
4789          */
4790         switch (ts->kind) {
4791         case TEMP_TB:
4792             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4793             break;
4794         case TEMP_EBB:
4795         case TEMP_CONST:
4796             break;
4797         default:
4798             g_assert_not_reached();
4799         }
4800     }
4801 }
4802 
4803 /*
4804  * Specialized code generation for INDEX_op_mov_* with a constant.
4805  */
4806 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4807                                   tcg_target_ulong val, TCGLifeData arg_life,
4808                                   TCGRegSet preferred_regs)
4809 {
4810     /* ENV should not be modified.  */
4811     tcg_debug_assert(!temp_readonly(ots));
4812 
4813     /* The movi is not explicitly generated here.  */
4814     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4815     ots->val = val;
4816     ots->mem_coherent = 0;
4817     if (NEED_SYNC_ARG(0)) {
4818         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4819     } else if (IS_DEAD_ARG(0)) {
4820         temp_dead(s, ots);
4821     }
4822 }
4823 
4824 /*
4825  * Specialized code generation for INDEX_op_mov_*.
4826  */
4827 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4828 {
4829     const TCGLifeData arg_life = op->life;
4830     TCGRegSet allocated_regs, preferred_regs;
4831     TCGTemp *ts, *ots;
4832     TCGType otype, itype;
4833     TCGReg oreg, ireg;
4834 
4835     allocated_regs = s->reserved_regs;
4836     preferred_regs = output_pref(op, 0);
4837     ots = arg_temp(op->args[0]);
4838     ts = arg_temp(op->args[1]);
4839 
4840     /* ENV should not be modified.  */
4841     tcg_debug_assert(!temp_readonly(ots));
4842 
4843     /* Note that otype != itype for no-op truncation.  */
4844     otype = ots->type;
4845     itype = ts->type;
4846 
4847     if (ts->val_type == TEMP_VAL_CONST) {
4848         /* propagate constant or generate sti */
4849         tcg_target_ulong val = ts->val;
4850         if (IS_DEAD_ARG(1)) {
4851             temp_dead(s, ts);
4852         }
4853         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4854         return;
4855     }
4856 
4857     /* If the source value is in memory we're going to be forced
4858        to have it in a register in order to perform the copy.  Copy
4859        the SOURCE value into its own register first, that way we
4860        don't have to reload SOURCE the next time it is used. */
4861     if (ts->val_type == TEMP_VAL_MEM) {
4862         temp_load(s, ts, tcg_target_available_regs[itype],
4863                   allocated_regs, preferred_regs);
4864     }
4865     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4866     ireg = ts->reg;
4867 
4868     if (IS_DEAD_ARG(0)) {
4869         /* mov to a non-saved dead register makes no sense (even with
4870            liveness analysis disabled). */
4871         tcg_debug_assert(NEED_SYNC_ARG(0));
4872         if (!ots->mem_allocated) {
4873             temp_allocate_frame(s, ots);
4874         }
4875         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4876         if (IS_DEAD_ARG(1)) {
4877             temp_dead(s, ts);
4878         }
4879         temp_dead(s, ots);
4880         return;
4881     }
4882 
4883     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4884         /*
4885          * The mov can be suppressed.  Kill input first, so that it
4886          * is unlinked from reg_to_temp, then set the output to the
4887          * reg that we saved from the input.
4888          */
4889         temp_dead(s, ts);
4890         oreg = ireg;
4891     } else {
4892         if (ots->val_type == TEMP_VAL_REG) {
4893             oreg = ots->reg;
4894         } else {
4895             /* Make sure to not spill the input register during allocation. */
4896             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4897                                  allocated_regs | ((TCGRegSet)1 << ireg),
4898                                  preferred_regs, ots->indirect_base);
4899         }
4900         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4901             /*
4902              * Cross register class move not supported.
4903              * Store the source register into the destination slot
4904              * and leave the destination temp as TEMP_VAL_MEM.
4905              */
4906             assert(!temp_readonly(ots));
4907             if (!ts->mem_allocated) {
4908                 temp_allocate_frame(s, ots);
4909             }
4910             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4911             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4912             ots->mem_coherent = 1;
4913             return;
4914         }
4915     }
4916     set_temp_val_reg(s, ots, oreg);
4917     ots->mem_coherent = 0;
4918 
4919     if (NEED_SYNC_ARG(0)) {
4920         temp_sync(s, ots, allocated_regs, 0, 0);
4921     }
4922 }
4923 
4924 /*
4925  * Specialized code generation for INDEX_op_dup_vec.
4926  */
4927 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4928 {
4929     const TCGLifeData arg_life = op->life;
4930     TCGRegSet dup_out_regs, dup_in_regs;
4931     const TCGArgConstraint *dup_args_ct;
4932     TCGTemp *its, *ots;
4933     TCGType itype, vtype;
4934     unsigned vece;
4935     int lowpart_ofs;
4936     bool ok;
4937 
4938     ots = arg_temp(op->args[0]);
4939     its = arg_temp(op->args[1]);
4940 
4941     /* ENV should not be modified.  */
4942     tcg_debug_assert(!temp_readonly(ots));
4943 
4944     itype = its->type;
4945     vece = TCGOP_VECE(op);
4946     vtype = TCGOP_TYPE(op);
4947 
4948     if (its->val_type == TEMP_VAL_CONST) {
4949         /* Propagate constant via movi -> dupi.  */
4950         tcg_target_ulong val = its->val;
4951         if (IS_DEAD_ARG(1)) {
4952             temp_dead(s, its);
4953         }
4954         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4955         return;
4956     }
4957 
4958     dup_args_ct = opcode_args_ct(op);
4959     dup_out_regs = dup_args_ct[0].regs;
4960     dup_in_regs = dup_args_ct[1].regs;
4961 
4962     /* Allocate the output register now.  */
4963     if (ots->val_type != TEMP_VAL_REG) {
4964         TCGRegSet allocated_regs = s->reserved_regs;
4965         TCGReg oreg;
4966 
4967         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4968             /* Make sure to not spill the input register. */
4969             tcg_regset_set_reg(allocated_regs, its->reg);
4970         }
4971         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4972                              output_pref(op, 0), ots->indirect_base);
4973         set_temp_val_reg(s, ots, oreg);
4974     }
4975 
4976     switch (its->val_type) {
4977     case TEMP_VAL_REG:
4978         /*
4979          * The dup constriaints must be broad, covering all possible VECE.
4980          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4981          * to fail, indicating that extra moves are required for that case.
4982          */
4983         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4984             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4985                 goto done;
4986             }
4987             /* Try again from memory or a vector input register.  */
4988         }
4989         if (!its->mem_coherent) {
4990             /*
4991              * The input register is not synced, and so an extra store
4992              * would be required to use memory.  Attempt an integer-vector
4993              * register move first.  We do not have a TCGRegSet for this.
4994              */
4995             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4996                 break;
4997             }
4998             /* Sync the temp back to its slot and load from there.  */
4999             temp_sync(s, its, s->reserved_regs, 0, 0);
5000         }
5001         /* fall through */
5002 
5003     case TEMP_VAL_MEM:
5004         lowpart_ofs = 0;
5005         if (HOST_BIG_ENDIAN) {
5006             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5007         }
5008         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5009                              its->mem_offset + lowpart_ofs)) {
5010             goto done;
5011         }
5012         /* Load the input into the destination vector register. */
5013         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5014         break;
5015 
5016     default:
5017         g_assert_not_reached();
5018     }
5019 
5020     /* We now have a vector input register, so dup must succeed. */
5021     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5022     tcg_debug_assert(ok);
5023 
5024  done:
5025     ots->mem_coherent = 0;
5026     if (IS_DEAD_ARG(1)) {
5027         temp_dead(s, its);
5028     }
5029     if (NEED_SYNC_ARG(0)) {
5030         temp_sync(s, ots, s->reserved_regs, 0, 0);
5031     }
5032     if (IS_DEAD_ARG(0)) {
5033         temp_dead(s, ots);
5034     }
5035 }
5036 
5037 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5038 {
5039     const TCGLifeData arg_life = op->life;
5040     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5041     TCGRegSet i_allocated_regs;
5042     TCGRegSet o_allocated_regs;
5043     int i, k, nb_iargs, nb_oargs;
5044     TCGReg reg;
5045     TCGArg arg;
5046     const TCGArgConstraint *args_ct;
5047     const TCGArgConstraint *arg_ct;
5048     TCGTemp *ts;
5049     TCGArg new_args[TCG_MAX_OP_ARGS];
5050     int const_args[TCG_MAX_OP_ARGS];
5051     TCGCond op_cond;
5052 
5053     nb_oargs = def->nb_oargs;
5054     nb_iargs = def->nb_iargs;
5055 
5056     /* copy constants */
5057     memcpy(new_args + nb_oargs + nb_iargs,
5058            op->args + nb_oargs + nb_iargs,
5059            sizeof(TCGArg) * def->nb_cargs);
5060 
5061     i_allocated_regs = s->reserved_regs;
5062     o_allocated_regs = s->reserved_regs;
5063 
5064     switch (op->opc) {
5065     case INDEX_op_brcond:
5066         op_cond = op->args[2];
5067         break;
5068     case INDEX_op_setcond:
5069     case INDEX_op_negsetcond:
5070     case INDEX_op_cmp_vec:
5071         op_cond = op->args[3];
5072         break;
5073     case INDEX_op_brcond2_i32:
5074         op_cond = op->args[4];
5075         break;
5076     case INDEX_op_movcond_i32:
5077     case INDEX_op_movcond_i64:
5078     case INDEX_op_setcond2_i32:
5079     case INDEX_op_cmpsel_vec:
5080         op_cond = op->args[5];
5081         break;
5082     default:
5083         /* No condition within opcode. */
5084         op_cond = TCG_COND_ALWAYS;
5085         break;
5086     }
5087 
5088     args_ct = opcode_args_ct(op);
5089 
5090     /* satisfy input constraints */
5091     for (k = 0; k < nb_iargs; k++) {
5092         TCGRegSet i_preferred_regs, i_required_regs;
5093         bool allocate_new_reg, copyto_new_reg;
5094         TCGTemp *ts2;
5095         int i1, i2;
5096 
5097         i = args_ct[nb_oargs + k].sort_index;
5098         arg = op->args[i];
5099         arg_ct = &args_ct[i];
5100         ts = arg_temp(arg);
5101 
5102         if (ts->val_type == TEMP_VAL_CONST) {
5103 #ifdef TCG_REG_ZERO
5104             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5105                 /* Hardware zero register: indicate register via non-const. */
5106                 const_args[i] = 0;
5107                 new_args[i] = TCG_REG_ZERO;
5108                 continue;
5109             }
5110 #endif
5111 
5112             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5113                                        op_cond, TCGOP_VECE(op))) {
5114                 /* constant is OK for instruction */
5115                 const_args[i] = 1;
5116                 new_args[i] = ts->val;
5117                 continue;
5118             }
5119         }
5120 
5121         reg = ts->reg;
5122         i_preferred_regs = 0;
5123         i_required_regs = arg_ct->regs;
5124         allocate_new_reg = false;
5125         copyto_new_reg = false;
5126 
5127         switch (arg_ct->pair) {
5128         case 0: /* not paired */
5129             if (arg_ct->ialias) {
5130                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5131 
5132                 /*
5133                  * If the input is readonly, then it cannot also be an
5134                  * output and aliased to itself.  If the input is not
5135                  * dead after the instruction, we must allocate a new
5136                  * register and move it.
5137                  */
5138                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5139                     || args_ct[arg_ct->alias_index].newreg) {
5140                     allocate_new_reg = true;
5141                 } else if (ts->val_type == TEMP_VAL_REG) {
5142                     /*
5143                      * Check if the current register has already been
5144                      * allocated for another input.
5145                      */
5146                     allocate_new_reg =
5147                         tcg_regset_test_reg(i_allocated_regs, reg);
5148                 }
5149             }
5150             if (!allocate_new_reg) {
5151                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5152                           i_preferred_regs);
5153                 reg = ts->reg;
5154                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5155             }
5156             if (allocate_new_reg) {
5157                 /*
5158                  * Allocate a new register matching the constraint
5159                  * and move the temporary register into it.
5160                  */
5161                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5162                           i_allocated_regs, 0);
5163                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5164                                     i_preferred_regs, ts->indirect_base);
5165                 copyto_new_reg = true;
5166             }
5167             break;
5168 
5169         case 1:
5170             /* First of an input pair; if i1 == i2, the second is an output. */
5171             i1 = i;
5172             i2 = arg_ct->pair_index;
5173             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5174 
5175             /*
5176              * It is easier to default to allocating a new pair
5177              * and to identify a few cases where it's not required.
5178              */
5179             if (arg_ct->ialias) {
5180                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5181                 if (IS_DEAD_ARG(i1) &&
5182                     IS_DEAD_ARG(i2) &&
5183                     !temp_readonly(ts) &&
5184                     ts->val_type == TEMP_VAL_REG &&
5185                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5186                     tcg_regset_test_reg(i_required_regs, reg) &&
5187                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5188                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5189                     (ts2
5190                      ? ts2->val_type == TEMP_VAL_REG &&
5191                        ts2->reg == reg + 1 &&
5192                        !temp_readonly(ts2)
5193                      : s->reg_to_temp[reg + 1] == NULL)) {
5194                     break;
5195                 }
5196             } else {
5197                 /* Without aliasing, the pair must also be an input. */
5198                 tcg_debug_assert(ts2);
5199                 if (ts->val_type == TEMP_VAL_REG &&
5200                     ts2->val_type == TEMP_VAL_REG &&
5201                     ts2->reg == reg + 1 &&
5202                     tcg_regset_test_reg(i_required_regs, reg)) {
5203                     break;
5204                 }
5205             }
5206             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5207                                      0, ts->indirect_base);
5208             goto do_pair;
5209 
5210         case 2: /* pair second */
5211             reg = new_args[arg_ct->pair_index] + 1;
5212             goto do_pair;
5213 
5214         case 3: /* ialias with second output, no first input */
5215             tcg_debug_assert(arg_ct->ialias);
5216             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5217 
5218             if (IS_DEAD_ARG(i) &&
5219                 !temp_readonly(ts) &&
5220                 ts->val_type == TEMP_VAL_REG &&
5221                 reg > 0 &&
5222                 s->reg_to_temp[reg - 1] == NULL &&
5223                 tcg_regset_test_reg(i_required_regs, reg) &&
5224                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5225                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5226                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5227                 break;
5228             }
5229             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5230                                      i_allocated_regs, 0,
5231                                      ts->indirect_base);
5232             tcg_regset_set_reg(i_allocated_regs, reg);
5233             reg += 1;
5234             goto do_pair;
5235 
5236         do_pair:
5237             /*
5238              * If an aliased input is not dead after the instruction,
5239              * we must allocate a new register and move it.
5240              */
5241             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5242                 TCGRegSet t_allocated_regs = i_allocated_regs;
5243 
5244                 /*
5245                  * Because of the alias, and the continued life, make sure
5246                  * that the temp is somewhere *other* than the reg pair,
5247                  * and we get a copy in reg.
5248                  */
5249                 tcg_regset_set_reg(t_allocated_regs, reg);
5250                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5251                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5252                     /* If ts was already in reg, copy it somewhere else. */
5253                     TCGReg nr;
5254                     bool ok;
5255 
5256                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5257                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5258                                        t_allocated_regs, 0, ts->indirect_base);
5259                     ok = tcg_out_mov(s, ts->type, nr, reg);
5260                     tcg_debug_assert(ok);
5261 
5262                     set_temp_val_reg(s, ts, nr);
5263                 } else {
5264                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5265                               t_allocated_regs, 0);
5266                     copyto_new_reg = true;
5267                 }
5268             } else {
5269                 /* Preferably allocate to reg, otherwise copy. */
5270                 i_required_regs = (TCGRegSet)1 << reg;
5271                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5272                           i_preferred_regs);
5273                 copyto_new_reg = ts->reg != reg;
5274             }
5275             break;
5276 
5277         default:
5278             g_assert_not_reached();
5279         }
5280 
5281         if (copyto_new_reg) {
5282             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5283                 /*
5284                  * Cross register class move not supported.  Sync the
5285                  * temp back to its slot and load from there.
5286                  */
5287                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5288                 tcg_out_ld(s, ts->type, reg,
5289                            ts->mem_base->reg, ts->mem_offset);
5290             }
5291         }
5292         new_args[i] = reg;
5293         const_args[i] = 0;
5294         tcg_regset_set_reg(i_allocated_regs, reg);
5295     }
5296 
5297     /* mark dead temporaries and free the associated registers */
5298     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5299         if (IS_DEAD_ARG(i)) {
5300             temp_dead(s, arg_temp(op->args[i]));
5301         }
5302     }
5303 
5304     if (def->flags & TCG_OPF_COND_BRANCH) {
5305         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5306     } else if (def->flags & TCG_OPF_BB_END) {
5307         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5308     } else {
5309         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5310             /* XXX: permit generic clobber register list ? */
5311             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5312                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5313                     tcg_reg_free(s, i, i_allocated_regs);
5314                 }
5315             }
5316         }
5317         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5318             /* sync globals if the op has side effects and might trigger
5319                an exception. */
5320             sync_globals(s, i_allocated_regs);
5321         }
5322 
5323         /* satisfy the output constraints */
5324         for (k = 0; k < nb_oargs; k++) {
5325             i = args_ct[k].sort_index;
5326             arg = op->args[i];
5327             arg_ct = &args_ct[i];
5328             ts = arg_temp(arg);
5329 
5330             /* ENV should not be modified.  */
5331             tcg_debug_assert(!temp_readonly(ts));
5332 
5333             switch (arg_ct->pair) {
5334             case 0: /* not paired */
5335                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5336                     reg = new_args[arg_ct->alias_index];
5337                 } else if (arg_ct->newreg) {
5338                     reg = tcg_reg_alloc(s, arg_ct->regs,
5339                                         i_allocated_regs | o_allocated_regs,
5340                                         output_pref(op, k), ts->indirect_base);
5341                 } else {
5342                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5343                                         output_pref(op, k), ts->indirect_base);
5344                 }
5345                 break;
5346 
5347             case 1: /* first of pair */
5348                 if (arg_ct->oalias) {
5349                     reg = new_args[arg_ct->alias_index];
5350                 } else if (arg_ct->newreg) {
5351                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5352                                              i_allocated_regs | o_allocated_regs,
5353                                              output_pref(op, k),
5354                                              ts->indirect_base);
5355                 } else {
5356                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5357                                              output_pref(op, k),
5358                                              ts->indirect_base);
5359                 }
5360                 break;
5361 
5362             case 2: /* second of pair */
5363                 if (arg_ct->oalias) {
5364                     reg = new_args[arg_ct->alias_index];
5365                 } else {
5366                     reg = new_args[arg_ct->pair_index] + 1;
5367                 }
5368                 break;
5369 
5370             case 3: /* first of pair, aliasing with a second input */
5371                 tcg_debug_assert(!arg_ct->newreg);
5372                 reg = new_args[arg_ct->pair_index] - 1;
5373                 break;
5374 
5375             default:
5376                 g_assert_not_reached();
5377             }
5378             tcg_regset_set_reg(o_allocated_regs, reg);
5379             set_temp_val_reg(s, ts, reg);
5380             ts->mem_coherent = 0;
5381             new_args[i] = reg;
5382         }
5383     }
5384 
5385     /* emit instruction */
5386     TCGType type = TCGOP_TYPE(op);
5387     switch (op->opc) {
5388     case INDEX_op_ext_i32_i64:
5389         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5390         break;
5391     case INDEX_op_extu_i32_i64:
5392         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5393         break;
5394     case INDEX_op_extrl_i64_i32:
5395         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5396         break;
5397 
5398     case INDEX_op_add:
5399     case INDEX_op_and:
5400     case INDEX_op_andc:
5401     case INDEX_op_clz:
5402     case INDEX_op_ctz:
5403     case INDEX_op_divs:
5404     case INDEX_op_divu:
5405     case INDEX_op_eqv:
5406     case INDEX_op_mul:
5407     case INDEX_op_mulsh:
5408     case INDEX_op_muluh:
5409     case INDEX_op_nand:
5410     case INDEX_op_nor:
5411     case INDEX_op_or:
5412     case INDEX_op_orc:
5413     case INDEX_op_rems:
5414     case INDEX_op_remu:
5415     case INDEX_op_rotl:
5416     case INDEX_op_rotr:
5417     case INDEX_op_sar:
5418     case INDEX_op_shl:
5419     case INDEX_op_shr:
5420     case INDEX_op_xor:
5421         {
5422             const TCGOutOpBinary *out =
5423                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5424 
5425             /* Constants should never appear in the first source operand. */
5426             tcg_debug_assert(!const_args[1]);
5427             if (const_args[2]) {
5428                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5429             } else {
5430                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5431             }
5432         }
5433         break;
5434 
5435     case INDEX_op_sub:
5436         {
5437             const TCGOutOpSubtract *out = &outop_sub;
5438 
5439             /*
5440              * Constants should never appear in the second source operand.
5441              * These are folded to add with negative constant.
5442              */
5443             tcg_debug_assert(!const_args[2]);
5444             if (const_args[1]) {
5445                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5446             } else {
5447                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5448             }
5449         }
5450         break;
5451 
5452     case INDEX_op_ctpop:
5453     case INDEX_op_neg:
5454     case INDEX_op_not:
5455         {
5456             const TCGOutOpUnary *out =
5457                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5458 
5459             /* Constants should have been folded. */
5460             tcg_debug_assert(!const_args[1]);
5461             out->out_rr(s, type, new_args[0], new_args[1]);
5462         }
5463         break;
5464 
5465     case INDEX_op_divs2:
5466     case INDEX_op_divu2:
5467         {
5468             const TCGOutOpDivRem *out =
5469                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5470 
5471             /* Only used by x86 and s390x, which use matching constraints. */
5472             tcg_debug_assert(new_args[0] == new_args[2]);
5473             tcg_debug_assert(new_args[1] == new_args[3]);
5474             tcg_debug_assert(!const_args[4]);
5475             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5476         }
5477         break;
5478 
5479     case INDEX_op_muls2:
5480     case INDEX_op_mulu2:
5481         {
5482             const TCGOutOpMul2 *out =
5483                 container_of(all_outop[op->opc], TCGOutOpMul2, base);
5484 
5485             tcg_debug_assert(!const_args[2]);
5486             tcg_debug_assert(!const_args[3]);
5487             out->out_rrrr(s, type, new_args[0], new_args[1],
5488                           new_args[2], new_args[3]);
5489         }
5490         break;
5491 
5492     case INDEX_op_brcond:
5493         {
5494             const TCGOutOpBrcond *out = &outop_brcond;
5495             TCGCond cond = new_args[2];
5496             TCGLabel *label = arg_label(new_args[3]);
5497 
5498             tcg_debug_assert(!const_args[0]);
5499             if (const_args[1]) {
5500                 out->out_ri(s, type, cond, new_args[0], new_args[1], label);
5501             } else {
5502                 out->out_rr(s, type, cond, new_args[0], new_args[1], label);
5503             }
5504         }
5505         break;
5506 
5507     case INDEX_op_setcond:
5508     case INDEX_op_negsetcond:
5509         {
5510             const TCGOutOpSetcond *out =
5511                 container_of(all_outop[op->opc], TCGOutOpSetcond, base);
5512             TCGCond cond = new_args[3];
5513 
5514             tcg_debug_assert(!const_args[1]);
5515             if (const_args[2]) {
5516                 out->out_rri(s, type, cond,
5517                              new_args[0], new_args[1], new_args[2]);
5518             } else {
5519                 out->out_rrr(s, type, cond,
5520                              new_args[0], new_args[1], new_args[2]);
5521             }
5522         }
5523         break;
5524 
5525     default:
5526         if (def->flags & TCG_OPF_VECTOR) {
5527             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5528                            TCGOP_VECE(op), new_args, const_args);
5529         } else {
5530             tcg_out_op(s, op->opc, type, new_args, const_args);
5531         }
5532         break;
5533     }
5534 
5535     /* move the outputs in the correct register if needed */
5536     for(i = 0; i < nb_oargs; i++) {
5537         ts = arg_temp(op->args[i]);
5538 
5539         /* ENV should not be modified.  */
5540         tcg_debug_assert(!temp_readonly(ts));
5541 
5542         if (NEED_SYNC_ARG(i)) {
5543             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5544         } else if (IS_DEAD_ARG(i)) {
5545             temp_dead(s, ts);
5546         }
5547     }
5548 }
5549 
5550 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5551 {
5552     const TCGLifeData arg_life = op->life;
5553     TCGTemp *ots, *itsl, *itsh;
5554     TCGType vtype = TCGOP_TYPE(op);
5555 
5556     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5557     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5558     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5559 
5560     ots = arg_temp(op->args[0]);
5561     itsl = arg_temp(op->args[1]);
5562     itsh = arg_temp(op->args[2]);
5563 
5564     /* ENV should not be modified.  */
5565     tcg_debug_assert(!temp_readonly(ots));
5566 
5567     /* Allocate the output register now.  */
5568     if (ots->val_type != TEMP_VAL_REG) {
5569         TCGRegSet allocated_regs = s->reserved_regs;
5570         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5571         TCGReg oreg;
5572 
5573         /* Make sure to not spill the input registers. */
5574         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5575             tcg_regset_set_reg(allocated_regs, itsl->reg);
5576         }
5577         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5578             tcg_regset_set_reg(allocated_regs, itsh->reg);
5579         }
5580 
5581         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5582                              output_pref(op, 0), ots->indirect_base);
5583         set_temp_val_reg(s, ots, oreg);
5584     }
5585 
5586     /* Promote dup2 of immediates to dupi_vec. */
5587     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5588         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5589         MemOp vece = MO_64;
5590 
5591         if (val == dup_const(MO_8, val)) {
5592             vece = MO_8;
5593         } else if (val == dup_const(MO_16, val)) {
5594             vece = MO_16;
5595         } else if (val == dup_const(MO_32, val)) {
5596             vece = MO_32;
5597         }
5598 
5599         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5600         goto done;
5601     }
5602 
5603     /* If the two inputs form one 64-bit value, try dupm_vec. */
5604     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5605         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5606         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5607         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5608 
5609         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5610         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5611 
5612         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5613                              its->mem_base->reg, its->mem_offset)) {
5614             goto done;
5615         }
5616     }
5617 
5618     /* Fall back to generic expansion. */
5619     return false;
5620 
5621  done:
5622     ots->mem_coherent = 0;
5623     if (IS_DEAD_ARG(1)) {
5624         temp_dead(s, itsl);
5625     }
5626     if (IS_DEAD_ARG(2)) {
5627         temp_dead(s, itsh);
5628     }
5629     if (NEED_SYNC_ARG(0)) {
5630         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5631     } else if (IS_DEAD_ARG(0)) {
5632         temp_dead(s, ots);
5633     }
5634     return true;
5635 }
5636 
5637 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5638                          TCGRegSet allocated_regs)
5639 {
5640     if (ts->val_type == TEMP_VAL_REG) {
5641         if (ts->reg != reg) {
5642             tcg_reg_free(s, reg, allocated_regs);
5643             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5644                 /*
5645                  * Cross register class move not supported.  Sync the
5646                  * temp back to its slot and load from there.
5647                  */
5648                 temp_sync(s, ts, allocated_regs, 0, 0);
5649                 tcg_out_ld(s, ts->type, reg,
5650                            ts->mem_base->reg, ts->mem_offset);
5651             }
5652         }
5653     } else {
5654         TCGRegSet arg_set = 0;
5655 
5656         tcg_reg_free(s, reg, allocated_regs);
5657         tcg_regset_set_reg(arg_set, reg);
5658         temp_load(s, ts, arg_set, allocated_regs, 0);
5659     }
5660 }
5661 
5662 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5663                          TCGRegSet allocated_regs)
5664 {
5665     /*
5666      * When the destination is on the stack, load up the temp and store.
5667      * If there are many call-saved registers, the temp might live to
5668      * see another use; otherwise it'll be discarded.
5669      */
5670     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5671     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5672                arg_slot_stk_ofs(arg_slot));
5673 }
5674 
5675 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5676                             TCGTemp *ts, TCGRegSet *allocated_regs)
5677 {
5678     if (arg_slot_reg_p(l->arg_slot)) {
5679         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5680         load_arg_reg(s, reg, ts, *allocated_regs);
5681         tcg_regset_set_reg(*allocated_regs, reg);
5682     } else {
5683         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5684     }
5685 }
5686 
5687 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5688                          intptr_t ref_off, TCGRegSet *allocated_regs)
5689 {
5690     TCGReg reg;
5691 
5692     if (arg_slot_reg_p(arg_slot)) {
5693         reg = tcg_target_call_iarg_regs[arg_slot];
5694         tcg_reg_free(s, reg, *allocated_regs);
5695         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5696         tcg_regset_set_reg(*allocated_regs, reg);
5697     } else {
5698         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5699                             *allocated_regs, 0, false);
5700         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5701         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5702                    arg_slot_stk_ofs(arg_slot));
5703     }
5704 }
5705 
5706 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5707 {
5708     const int nb_oargs = TCGOP_CALLO(op);
5709     const int nb_iargs = TCGOP_CALLI(op);
5710     const TCGLifeData arg_life = op->life;
5711     const TCGHelperInfo *info = tcg_call_info(op);
5712     TCGRegSet allocated_regs = s->reserved_regs;
5713     int i;
5714 
5715     /*
5716      * Move inputs into place in reverse order,
5717      * so that we place stacked arguments first.
5718      */
5719     for (i = nb_iargs - 1; i >= 0; --i) {
5720         const TCGCallArgumentLoc *loc = &info->in[i];
5721         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5722 
5723         switch (loc->kind) {
5724         case TCG_CALL_ARG_NORMAL:
5725         case TCG_CALL_ARG_EXTEND_U:
5726         case TCG_CALL_ARG_EXTEND_S:
5727             load_arg_normal(s, loc, ts, &allocated_regs);
5728             break;
5729         case TCG_CALL_ARG_BY_REF:
5730             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5731             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5732                          arg_slot_stk_ofs(loc->ref_slot),
5733                          &allocated_regs);
5734             break;
5735         case TCG_CALL_ARG_BY_REF_N:
5736             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5737             break;
5738         default:
5739             g_assert_not_reached();
5740         }
5741     }
5742 
5743     /* Mark dead temporaries and free the associated registers.  */
5744     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5745         if (IS_DEAD_ARG(i)) {
5746             temp_dead(s, arg_temp(op->args[i]));
5747         }
5748     }
5749 
5750     /* Clobber call registers.  */
5751     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5752         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5753             tcg_reg_free(s, i, allocated_regs);
5754         }
5755     }
5756 
5757     /*
5758      * Save globals if they might be written by the helper,
5759      * sync them if they might be read.
5760      */
5761     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5762         /* Nothing to do */
5763     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5764         sync_globals(s, allocated_regs);
5765     } else {
5766         save_globals(s, allocated_regs);
5767     }
5768 
5769     /*
5770      * If the ABI passes a pointer to the returned struct as the first
5771      * argument, load that now.  Pass a pointer to the output home slot.
5772      */
5773     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5774         TCGTemp *ts = arg_temp(op->args[0]);
5775 
5776         if (!ts->mem_allocated) {
5777             temp_allocate_frame(s, ts);
5778         }
5779         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5780     }
5781 
5782     tcg_out_call(s, tcg_call_func(op), info);
5783 
5784     /* Assign output registers and emit moves if needed.  */
5785     switch (info->out_kind) {
5786     case TCG_CALL_RET_NORMAL:
5787         for (i = 0; i < nb_oargs; i++) {
5788             TCGTemp *ts = arg_temp(op->args[i]);
5789             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5790 
5791             /* ENV should not be modified.  */
5792             tcg_debug_assert(!temp_readonly(ts));
5793 
5794             set_temp_val_reg(s, ts, reg);
5795             ts->mem_coherent = 0;
5796         }
5797         break;
5798 
5799     case TCG_CALL_RET_BY_VEC:
5800         {
5801             TCGTemp *ts = arg_temp(op->args[0]);
5802 
5803             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5804             tcg_debug_assert(ts->temp_subindex == 0);
5805             if (!ts->mem_allocated) {
5806                 temp_allocate_frame(s, ts);
5807             }
5808             tcg_out_st(s, TCG_TYPE_V128,
5809                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5810                        ts->mem_base->reg, ts->mem_offset);
5811         }
5812         /* fall through to mark all parts in memory */
5813 
5814     case TCG_CALL_RET_BY_REF:
5815         /* The callee has performed a write through the reference. */
5816         for (i = 0; i < nb_oargs; i++) {
5817             TCGTemp *ts = arg_temp(op->args[i]);
5818             ts->val_type = TEMP_VAL_MEM;
5819         }
5820         break;
5821 
5822     default:
5823         g_assert_not_reached();
5824     }
5825 
5826     /* Flush or discard output registers as needed. */
5827     for (i = 0; i < nb_oargs; i++) {
5828         TCGTemp *ts = arg_temp(op->args[i]);
5829         if (NEED_SYNC_ARG(i)) {
5830             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5831         } else if (IS_DEAD_ARG(i)) {
5832             temp_dead(s, ts);
5833         }
5834     }
5835 }
5836 
5837 /**
5838  * atom_and_align_for_opc:
5839  * @s: tcg context
5840  * @opc: memory operation code
5841  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5842  * @allow_two_ops: true if we are prepared to issue two operations
5843  *
5844  * Return the alignment and atomicity to use for the inline fast path
5845  * for the given memory operation.  The alignment may be larger than
5846  * that specified in @opc, and the correct alignment will be diagnosed
5847  * by the slow path helper.
5848  *
5849  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5850  * and issue two loads or stores for subalignment.
5851  */
5852 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5853                                            MemOp host_atom, bool allow_two_ops)
5854 {
5855     MemOp align = memop_alignment_bits(opc);
5856     MemOp size = opc & MO_SIZE;
5857     MemOp half = size ? size - 1 : 0;
5858     MemOp atom = opc & MO_ATOM_MASK;
5859     MemOp atmax;
5860 
5861     switch (atom) {
5862     case MO_ATOM_NONE:
5863         /* The operation requires no specific atomicity. */
5864         atmax = MO_8;
5865         break;
5866 
5867     case MO_ATOM_IFALIGN:
5868         atmax = size;
5869         break;
5870 
5871     case MO_ATOM_IFALIGN_PAIR:
5872         atmax = half;
5873         break;
5874 
5875     case MO_ATOM_WITHIN16:
5876         atmax = size;
5877         if (size == MO_128) {
5878             /* Misalignment implies !within16, and therefore no atomicity. */
5879         } else if (host_atom != MO_ATOM_WITHIN16) {
5880             /* The host does not implement within16, so require alignment. */
5881             align = MAX(align, size);
5882         }
5883         break;
5884 
5885     case MO_ATOM_WITHIN16_PAIR:
5886         atmax = size;
5887         /*
5888          * Misalignment implies !within16, and therefore half atomicity.
5889          * Any host prepared for two operations can implement this with
5890          * half alignment.
5891          */
5892         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5893             align = MAX(align, half);
5894         }
5895         break;
5896 
5897     case MO_ATOM_SUBALIGN:
5898         atmax = size;
5899         if (host_atom != MO_ATOM_SUBALIGN) {
5900             /* If unaligned but not odd, there are subobjects up to half. */
5901             if (allow_two_ops) {
5902                 align = MAX(align, half);
5903             } else {
5904                 align = MAX(align, size);
5905             }
5906         }
5907         break;
5908 
5909     default:
5910         g_assert_not_reached();
5911     }
5912 
5913     return (TCGAtomAlign){ .atom = atmax, .align = align };
5914 }
5915 
5916 /*
5917  * Similarly for qemu_ld/st slow path helpers.
5918  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5919  * using only the provided backend tcg_out_* functions.
5920  */
5921 
5922 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5923 {
5924     int ofs = arg_slot_stk_ofs(slot);
5925 
5926     /*
5927      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5928      * require extension to uint64_t, adjust the address for uint32_t.
5929      */
5930     if (HOST_BIG_ENDIAN &&
5931         TCG_TARGET_REG_BITS == 64 &&
5932         type == TCG_TYPE_I32) {
5933         ofs += 4;
5934     }
5935     return ofs;
5936 }
5937 
5938 static void tcg_out_helper_load_slots(TCGContext *s,
5939                                       unsigned nmov, TCGMovExtend *mov,
5940                                       const TCGLdstHelperParam *parm)
5941 {
5942     unsigned i;
5943     TCGReg dst3;
5944 
5945     /*
5946      * Start from the end, storing to the stack first.
5947      * This frees those registers, so we need not consider overlap.
5948      */
5949     for (i = nmov; i-- > 0; ) {
5950         unsigned slot = mov[i].dst;
5951 
5952         if (arg_slot_reg_p(slot)) {
5953             goto found_reg;
5954         }
5955 
5956         TCGReg src = mov[i].src;
5957         TCGType dst_type = mov[i].dst_type;
5958         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5959 
5960         /* The argument is going onto the stack; extend into scratch. */
5961         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5962             tcg_debug_assert(parm->ntmp != 0);
5963             mov[i].dst = src = parm->tmp[0];
5964             tcg_out_movext1(s, &mov[i]);
5965         }
5966 
5967         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5968                    tcg_out_helper_stk_ofs(dst_type, slot));
5969     }
5970     return;
5971 
5972  found_reg:
5973     /*
5974      * The remaining arguments are in registers.
5975      * Convert slot numbers to argument registers.
5976      */
5977     nmov = i + 1;
5978     for (i = 0; i < nmov; ++i) {
5979         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5980     }
5981 
5982     switch (nmov) {
5983     case 4:
5984         /* The backend must have provided enough temps for the worst case. */
5985         tcg_debug_assert(parm->ntmp >= 2);
5986 
5987         dst3 = mov[3].dst;
5988         for (unsigned j = 0; j < 3; ++j) {
5989             if (dst3 == mov[j].src) {
5990                 /*
5991                  * Conflict. Copy the source to a temporary, perform the
5992                  * remaining moves, then the extension from our scratch
5993                  * on the way out.
5994                  */
5995                 TCGReg scratch = parm->tmp[1];
5996 
5997                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5998                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5999                 tcg_out_movext1_new_src(s, &mov[3], scratch);
6000                 break;
6001             }
6002         }
6003 
6004         /* No conflicts: perform this move and continue. */
6005         tcg_out_movext1(s, &mov[3]);
6006         /* fall through */
6007 
6008     case 3:
6009         tcg_out_movext3(s, mov, mov + 1, mov + 2,
6010                         parm->ntmp ? parm->tmp[0] : -1);
6011         break;
6012     case 2:
6013         tcg_out_movext2(s, mov, mov + 1,
6014                         parm->ntmp ? parm->tmp[0] : -1);
6015         break;
6016     case 1:
6017         tcg_out_movext1(s, mov);
6018         break;
6019     default:
6020         g_assert_not_reached();
6021     }
6022 }
6023 
6024 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
6025                                     TCGType type, tcg_target_long imm,
6026                                     const TCGLdstHelperParam *parm)
6027 {
6028     if (arg_slot_reg_p(slot)) {
6029         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
6030     } else {
6031         int ofs = tcg_out_helper_stk_ofs(type, slot);
6032         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
6033             tcg_debug_assert(parm->ntmp != 0);
6034             tcg_out_movi(s, type, parm->tmp[0], imm);
6035             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
6036         }
6037     }
6038 }
6039 
6040 static void tcg_out_helper_load_common_args(TCGContext *s,
6041                                             const TCGLabelQemuLdst *ldst,
6042                                             const TCGLdstHelperParam *parm,
6043                                             const TCGHelperInfo *info,
6044                                             unsigned next_arg)
6045 {
6046     TCGMovExtend ptr_mov = {
6047         .dst_type = TCG_TYPE_PTR,
6048         .src_type = TCG_TYPE_PTR,
6049         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6050     };
6051     const TCGCallArgumentLoc *loc = &info->in[0];
6052     TCGType type;
6053     unsigned slot;
6054     tcg_target_ulong imm;
6055 
6056     /*
6057      * Handle env, which is always first.
6058      */
6059     ptr_mov.dst = loc->arg_slot;
6060     ptr_mov.src = TCG_AREG0;
6061     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6062 
6063     /*
6064      * Handle oi.
6065      */
6066     imm = ldst->oi;
6067     loc = &info->in[next_arg];
6068     type = TCG_TYPE_I32;
6069     switch (loc->kind) {
6070     case TCG_CALL_ARG_NORMAL:
6071         break;
6072     case TCG_CALL_ARG_EXTEND_U:
6073     case TCG_CALL_ARG_EXTEND_S:
6074         /* No extension required for MemOpIdx. */
6075         tcg_debug_assert(imm <= INT32_MAX);
6076         type = TCG_TYPE_REG;
6077         break;
6078     default:
6079         g_assert_not_reached();
6080     }
6081     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6082     next_arg++;
6083 
6084     /*
6085      * Handle ra.
6086      */
6087     loc = &info->in[next_arg];
6088     slot = loc->arg_slot;
6089     if (parm->ra_gen) {
6090         int arg_reg = -1;
6091         TCGReg ra_reg;
6092 
6093         if (arg_slot_reg_p(slot)) {
6094             arg_reg = tcg_target_call_iarg_regs[slot];
6095         }
6096         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6097 
6098         ptr_mov.dst = slot;
6099         ptr_mov.src = ra_reg;
6100         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6101     } else {
6102         imm = (uintptr_t)ldst->raddr;
6103         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6104     }
6105 }
6106 
6107 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6108                                        const TCGCallArgumentLoc *loc,
6109                                        TCGType dst_type, TCGType src_type,
6110                                        TCGReg lo, TCGReg hi)
6111 {
6112     MemOp reg_mo;
6113 
6114     if (dst_type <= TCG_TYPE_REG) {
6115         MemOp src_ext;
6116 
6117         switch (loc->kind) {
6118         case TCG_CALL_ARG_NORMAL:
6119             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6120             break;
6121         case TCG_CALL_ARG_EXTEND_U:
6122             dst_type = TCG_TYPE_REG;
6123             src_ext = MO_UL;
6124             break;
6125         case TCG_CALL_ARG_EXTEND_S:
6126             dst_type = TCG_TYPE_REG;
6127             src_ext = MO_SL;
6128             break;
6129         default:
6130             g_assert_not_reached();
6131         }
6132 
6133         mov[0].dst = loc->arg_slot;
6134         mov[0].dst_type = dst_type;
6135         mov[0].src = lo;
6136         mov[0].src_type = src_type;
6137         mov[0].src_ext = src_ext;
6138         return 1;
6139     }
6140 
6141     if (TCG_TARGET_REG_BITS == 32) {
6142         assert(dst_type == TCG_TYPE_I64);
6143         reg_mo = MO_32;
6144     } else {
6145         assert(dst_type == TCG_TYPE_I128);
6146         reg_mo = MO_64;
6147     }
6148 
6149     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6150     mov[0].src = lo;
6151     mov[0].dst_type = TCG_TYPE_REG;
6152     mov[0].src_type = TCG_TYPE_REG;
6153     mov[0].src_ext = reg_mo;
6154 
6155     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6156     mov[1].src = hi;
6157     mov[1].dst_type = TCG_TYPE_REG;
6158     mov[1].src_type = TCG_TYPE_REG;
6159     mov[1].src_ext = reg_mo;
6160 
6161     return 2;
6162 }
6163 
6164 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6165                                    const TCGLdstHelperParam *parm)
6166 {
6167     const TCGHelperInfo *info;
6168     const TCGCallArgumentLoc *loc;
6169     TCGMovExtend mov[2];
6170     unsigned next_arg, nmov;
6171     MemOp mop = get_memop(ldst->oi);
6172 
6173     switch (mop & MO_SIZE) {
6174     case MO_8:
6175     case MO_16:
6176     case MO_32:
6177         info = &info_helper_ld32_mmu;
6178         break;
6179     case MO_64:
6180         info = &info_helper_ld64_mmu;
6181         break;
6182     case MO_128:
6183         info = &info_helper_ld128_mmu;
6184         break;
6185     default:
6186         g_assert_not_reached();
6187     }
6188 
6189     /* Defer env argument. */
6190     next_arg = 1;
6191 
6192     loc = &info->in[next_arg];
6193     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6194         /*
6195          * 32-bit host with 32-bit guest: zero-extend the guest address
6196          * to 64-bits for the helper by storing the low part, then
6197          * load a zero for the high part.
6198          */
6199         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6200                                TCG_TYPE_I32, TCG_TYPE_I32,
6201                                ldst->addr_reg, -1);
6202         tcg_out_helper_load_slots(s, 1, mov, parm);
6203 
6204         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6205                                 TCG_TYPE_I32, 0, parm);
6206         next_arg += 2;
6207     } else {
6208         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6209                                       ldst->addr_reg, -1);
6210         tcg_out_helper_load_slots(s, nmov, mov, parm);
6211         next_arg += nmov;
6212     }
6213 
6214     switch (info->out_kind) {
6215     case TCG_CALL_RET_NORMAL:
6216     case TCG_CALL_RET_BY_VEC:
6217         break;
6218     case TCG_CALL_RET_BY_REF:
6219         /*
6220          * The return reference is in the first argument slot.
6221          * We need memory in which to return: re-use the top of stack.
6222          */
6223         {
6224             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6225 
6226             if (arg_slot_reg_p(0)) {
6227                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6228                                  TCG_REG_CALL_STACK, ofs_slot0);
6229             } else {
6230                 tcg_debug_assert(parm->ntmp != 0);
6231                 tcg_out_addi_ptr(s, parm->tmp[0],
6232                                  TCG_REG_CALL_STACK, ofs_slot0);
6233                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6234                            TCG_REG_CALL_STACK, ofs_slot0);
6235             }
6236         }
6237         break;
6238     default:
6239         g_assert_not_reached();
6240     }
6241 
6242     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6243 }
6244 
6245 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6246                                   bool load_sign,
6247                                   const TCGLdstHelperParam *parm)
6248 {
6249     MemOp mop = get_memop(ldst->oi);
6250     TCGMovExtend mov[2];
6251     int ofs_slot0;
6252 
6253     switch (ldst->type) {
6254     case TCG_TYPE_I64:
6255         if (TCG_TARGET_REG_BITS == 32) {
6256             break;
6257         }
6258         /* fall through */
6259 
6260     case TCG_TYPE_I32:
6261         mov[0].dst = ldst->datalo_reg;
6262         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6263         mov[0].dst_type = ldst->type;
6264         mov[0].src_type = TCG_TYPE_REG;
6265 
6266         /*
6267          * If load_sign, then we allowed the helper to perform the
6268          * appropriate sign extension to tcg_target_ulong, and all
6269          * we need now is a plain move.
6270          *
6271          * If they do not, then we expect the relevant extension
6272          * instruction to be no more expensive than a move, and
6273          * we thus save the icache etc by only using one of two
6274          * helper functions.
6275          */
6276         if (load_sign || !(mop & MO_SIGN)) {
6277             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6278                 mov[0].src_ext = MO_32;
6279             } else {
6280                 mov[0].src_ext = MO_64;
6281             }
6282         } else {
6283             mov[0].src_ext = mop & MO_SSIZE;
6284         }
6285         tcg_out_movext1(s, mov);
6286         return;
6287 
6288     case TCG_TYPE_I128:
6289         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6290         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6291         switch (TCG_TARGET_CALL_RET_I128) {
6292         case TCG_CALL_RET_NORMAL:
6293             break;
6294         case TCG_CALL_RET_BY_VEC:
6295             tcg_out_st(s, TCG_TYPE_V128,
6296                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6297                        TCG_REG_CALL_STACK, ofs_slot0);
6298             /* fall through */
6299         case TCG_CALL_RET_BY_REF:
6300             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6301                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6302             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6303                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6304             return;
6305         default:
6306             g_assert_not_reached();
6307         }
6308         break;
6309 
6310     default:
6311         g_assert_not_reached();
6312     }
6313 
6314     mov[0].dst = ldst->datalo_reg;
6315     mov[0].src =
6316         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6317     mov[0].dst_type = TCG_TYPE_REG;
6318     mov[0].src_type = TCG_TYPE_REG;
6319     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6320 
6321     mov[1].dst = ldst->datahi_reg;
6322     mov[1].src =
6323         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6324     mov[1].dst_type = TCG_TYPE_REG;
6325     mov[1].src_type = TCG_TYPE_REG;
6326     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6327 
6328     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6329 }
6330 
6331 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6332                                    const TCGLdstHelperParam *parm)
6333 {
6334     const TCGHelperInfo *info;
6335     const TCGCallArgumentLoc *loc;
6336     TCGMovExtend mov[4];
6337     TCGType data_type;
6338     unsigned next_arg, nmov, n;
6339     MemOp mop = get_memop(ldst->oi);
6340 
6341     switch (mop & MO_SIZE) {
6342     case MO_8:
6343     case MO_16:
6344     case MO_32:
6345         info = &info_helper_st32_mmu;
6346         data_type = TCG_TYPE_I32;
6347         break;
6348     case MO_64:
6349         info = &info_helper_st64_mmu;
6350         data_type = TCG_TYPE_I64;
6351         break;
6352     case MO_128:
6353         info = &info_helper_st128_mmu;
6354         data_type = TCG_TYPE_I128;
6355         break;
6356     default:
6357         g_assert_not_reached();
6358     }
6359 
6360     /* Defer env argument. */
6361     next_arg = 1;
6362     nmov = 0;
6363 
6364     /* Handle addr argument. */
6365     loc = &info->in[next_arg];
6366     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6367     if (TCG_TARGET_REG_BITS == 32) {
6368         /*
6369          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6370          * to 64-bits for the helper by storing the low part.  Later,
6371          * after we have processed the register inputs, we will load a
6372          * zero for the high part.
6373          */
6374         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6375                                TCG_TYPE_I32, TCG_TYPE_I32,
6376                                ldst->addr_reg, -1);
6377         next_arg += 2;
6378         nmov += 1;
6379     } else {
6380         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6381                                    ldst->addr_reg, -1);
6382         next_arg += n;
6383         nmov += n;
6384     }
6385 
6386     /* Handle data argument. */
6387     loc = &info->in[next_arg];
6388     switch (loc->kind) {
6389     case TCG_CALL_ARG_NORMAL:
6390     case TCG_CALL_ARG_EXTEND_U:
6391     case TCG_CALL_ARG_EXTEND_S:
6392         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6393                                    ldst->datalo_reg, ldst->datahi_reg);
6394         next_arg += n;
6395         nmov += n;
6396         tcg_out_helper_load_slots(s, nmov, mov, parm);
6397         break;
6398 
6399     case TCG_CALL_ARG_BY_REF:
6400         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6401         tcg_debug_assert(data_type == TCG_TYPE_I128);
6402         tcg_out_st(s, TCG_TYPE_I64,
6403                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6404                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6405         tcg_out_st(s, TCG_TYPE_I64,
6406                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6407                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6408 
6409         tcg_out_helper_load_slots(s, nmov, mov, parm);
6410 
6411         if (arg_slot_reg_p(loc->arg_slot)) {
6412             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6413                              TCG_REG_CALL_STACK,
6414                              arg_slot_stk_ofs(loc->ref_slot));
6415         } else {
6416             tcg_debug_assert(parm->ntmp != 0);
6417             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6418                              arg_slot_stk_ofs(loc->ref_slot));
6419             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6420                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6421         }
6422         next_arg += 2;
6423         break;
6424 
6425     default:
6426         g_assert_not_reached();
6427     }
6428 
6429     if (TCG_TARGET_REG_BITS == 32) {
6430         /* Zero extend the address by loading a zero for the high part. */
6431         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6432         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6433     }
6434 
6435     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6436 }
6437 
6438 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6439 {
6440     int i, start_words, num_insns;
6441     TCGOp *op;
6442 
6443     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6444                  && qemu_log_in_addr_range(pc_start))) {
6445         FILE *logfile = qemu_log_trylock();
6446         if (logfile) {
6447             fprintf(logfile, "OP:\n");
6448             tcg_dump_ops(s, logfile, false);
6449             fprintf(logfile, "\n");
6450             qemu_log_unlock(logfile);
6451         }
6452     }
6453 
6454 #ifdef CONFIG_DEBUG_TCG
6455     /* Ensure all labels referenced have been emitted.  */
6456     {
6457         TCGLabel *l;
6458         bool error = false;
6459 
6460         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6461             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6462                 qemu_log_mask(CPU_LOG_TB_OP,
6463                               "$L%d referenced but not present.\n", l->id);
6464                 error = true;
6465             }
6466         }
6467         assert(!error);
6468     }
6469 #endif
6470 
6471     /* Do not reuse any EBB that may be allocated within the TB. */
6472     tcg_temp_ebb_reset_freed(s);
6473 
6474     tcg_optimize(s);
6475 
6476     reachable_code_pass(s);
6477     liveness_pass_0(s);
6478     liveness_pass_1(s);
6479 
6480     if (s->nb_indirects > 0) {
6481         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6482                      && qemu_log_in_addr_range(pc_start))) {
6483             FILE *logfile = qemu_log_trylock();
6484             if (logfile) {
6485                 fprintf(logfile, "OP before indirect lowering:\n");
6486                 tcg_dump_ops(s, logfile, false);
6487                 fprintf(logfile, "\n");
6488                 qemu_log_unlock(logfile);
6489             }
6490         }
6491 
6492         /* Replace indirect temps with direct temps.  */
6493         if (liveness_pass_2(s)) {
6494             /* If changes were made, re-run liveness.  */
6495             liveness_pass_1(s);
6496         }
6497     }
6498 
6499     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6500                  && qemu_log_in_addr_range(pc_start))) {
6501         FILE *logfile = qemu_log_trylock();
6502         if (logfile) {
6503             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6504             tcg_dump_ops(s, logfile, true);
6505             fprintf(logfile, "\n");
6506             qemu_log_unlock(logfile);
6507         }
6508     }
6509 
6510     /* Initialize goto_tb jump offsets. */
6511     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6512     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6513     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6514     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6515 
6516     tcg_reg_alloc_start(s);
6517 
6518     /*
6519      * Reset the buffer pointers when restarting after overflow.
6520      * TODO: Move this into translate-all.c with the rest of the
6521      * buffer management.  Having only this done here is confusing.
6522      */
6523     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6524     s->code_ptr = s->code_buf;
6525     s->data_gen_ptr = NULL;
6526 
6527     QSIMPLEQ_INIT(&s->ldst_labels);
6528     s->pool_labels = NULL;
6529 
6530     start_words = s->insn_start_words;
6531     s->gen_insn_data =
6532         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6533 
6534     tcg_out_tb_start(s);
6535 
6536     num_insns = -1;
6537     QTAILQ_FOREACH(op, &s->ops, link) {
6538         TCGOpcode opc = op->opc;
6539 
6540         switch (opc) {
6541         case INDEX_op_mov:
6542         case INDEX_op_mov_vec:
6543             tcg_reg_alloc_mov(s, op);
6544             break;
6545         case INDEX_op_dup_vec:
6546             tcg_reg_alloc_dup(s, op);
6547             break;
6548         case INDEX_op_insn_start:
6549             if (num_insns >= 0) {
6550                 size_t off = tcg_current_code_size(s);
6551                 s->gen_insn_end_off[num_insns] = off;
6552                 /* Assert that we do not overflow our stored offset.  */
6553                 assert(s->gen_insn_end_off[num_insns] == off);
6554             }
6555             num_insns++;
6556             for (i = 0; i < start_words; ++i) {
6557                 s->gen_insn_data[num_insns * start_words + i] =
6558                     tcg_get_insn_start_param(op, i);
6559             }
6560             break;
6561         case INDEX_op_discard:
6562             temp_dead(s, arg_temp(op->args[0]));
6563             break;
6564         case INDEX_op_set_label:
6565             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6566             tcg_out_label(s, arg_label(op->args[0]));
6567             break;
6568         case INDEX_op_call:
6569             tcg_reg_alloc_call(s, op);
6570             break;
6571         case INDEX_op_exit_tb:
6572             tcg_out_exit_tb(s, op->args[0]);
6573             break;
6574         case INDEX_op_goto_tb:
6575             tcg_out_goto_tb(s, op->args[0]);
6576             break;
6577         case INDEX_op_dup2_vec:
6578             if (tcg_reg_alloc_dup2(s, op)) {
6579                 break;
6580             }
6581             /* fall through */
6582         default:
6583             /* Sanity check that we've not introduced any unhandled opcodes. */
6584             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6585                                               TCGOP_FLAGS(op)));
6586             /* Note: in order to speed up the code, it would be much
6587                faster to have specialized register allocator functions for
6588                some common argument patterns */
6589             tcg_reg_alloc_op(s, op);
6590             break;
6591         }
6592         /* Test for (pending) buffer overflow.  The assumption is that any
6593            one operation beginning below the high water mark cannot overrun
6594            the buffer completely.  Thus we can test for overflow after
6595            generating code without having to check during generation.  */
6596         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6597             return -1;
6598         }
6599         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6600         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6601             return -2;
6602         }
6603     }
6604     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6605     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6606 
6607     /* Generate TB finalization at the end of block */
6608     i = tcg_out_ldst_finalize(s);
6609     if (i < 0) {
6610         return i;
6611     }
6612     i = tcg_out_pool_finalize(s);
6613     if (i < 0) {
6614         return i;
6615     }
6616     if (!tcg_resolve_relocs(s)) {
6617         return -2;
6618     }
6619 
6620 #ifndef CONFIG_TCG_INTERPRETER
6621     /* flush instruction cache */
6622     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6623                         (uintptr_t)s->code_buf,
6624                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6625 #endif
6626 
6627     return tcg_current_code_size(s);
6628 }
6629 
6630 #ifdef ELF_HOST_MACHINE
6631 /* In order to use this feature, the backend needs to do three things:
6632 
6633    (1) Define ELF_HOST_MACHINE to indicate both what value to
6634        put into the ELF image and to indicate support for the feature.
6635 
6636    (2) Define tcg_register_jit.  This should create a buffer containing
6637        the contents of a .debug_frame section that describes the post-
6638        prologue unwind info for the tcg machine.
6639 
6640    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6641 */
6642 
6643 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6644 typedef enum {
6645     JIT_NOACTION = 0,
6646     JIT_REGISTER_FN,
6647     JIT_UNREGISTER_FN
6648 } jit_actions_t;
6649 
6650 struct jit_code_entry {
6651     struct jit_code_entry *next_entry;
6652     struct jit_code_entry *prev_entry;
6653     const void *symfile_addr;
6654     uint64_t symfile_size;
6655 };
6656 
6657 struct jit_descriptor {
6658     uint32_t version;
6659     uint32_t action_flag;
6660     struct jit_code_entry *relevant_entry;
6661     struct jit_code_entry *first_entry;
6662 };
6663 
6664 void __jit_debug_register_code(void) __attribute__((noinline));
6665 void __jit_debug_register_code(void)
6666 {
6667     asm("");
6668 }
6669 
6670 /* Must statically initialize the version, because GDB may check
6671    the version before we can set it.  */
6672 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6673 
6674 /* End GDB interface.  */
6675 
6676 static int find_string(const char *strtab, const char *str)
6677 {
6678     const char *p = strtab + 1;
6679 
6680     while (1) {
6681         if (strcmp(p, str) == 0) {
6682             return p - strtab;
6683         }
6684         p += strlen(p) + 1;
6685     }
6686 }
6687 
6688 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6689                                  const void *debug_frame,
6690                                  size_t debug_frame_size)
6691 {
6692     struct __attribute__((packed)) DebugInfo {
6693         uint32_t  len;
6694         uint16_t  version;
6695         uint32_t  abbrev;
6696         uint8_t   ptr_size;
6697         uint8_t   cu_die;
6698         uint16_t  cu_lang;
6699         uintptr_t cu_low_pc;
6700         uintptr_t cu_high_pc;
6701         uint8_t   fn_die;
6702         char      fn_name[16];
6703         uintptr_t fn_low_pc;
6704         uintptr_t fn_high_pc;
6705         uint8_t   cu_eoc;
6706     };
6707 
6708     struct ElfImage {
6709         ElfW(Ehdr) ehdr;
6710         ElfW(Phdr) phdr;
6711         ElfW(Shdr) shdr[7];
6712         ElfW(Sym)  sym[2];
6713         struct DebugInfo di;
6714         uint8_t    da[24];
6715         char       str[80];
6716     };
6717 
6718     struct ElfImage *img;
6719 
6720     static const struct ElfImage img_template = {
6721         .ehdr = {
6722             .e_ident[EI_MAG0] = ELFMAG0,
6723             .e_ident[EI_MAG1] = ELFMAG1,
6724             .e_ident[EI_MAG2] = ELFMAG2,
6725             .e_ident[EI_MAG3] = ELFMAG3,
6726             .e_ident[EI_CLASS] = ELF_CLASS,
6727             .e_ident[EI_DATA] = ELF_DATA,
6728             .e_ident[EI_VERSION] = EV_CURRENT,
6729             .e_type = ET_EXEC,
6730             .e_machine = ELF_HOST_MACHINE,
6731             .e_version = EV_CURRENT,
6732             .e_phoff = offsetof(struct ElfImage, phdr),
6733             .e_shoff = offsetof(struct ElfImage, shdr),
6734             .e_ehsize = sizeof(ElfW(Shdr)),
6735             .e_phentsize = sizeof(ElfW(Phdr)),
6736             .e_phnum = 1,
6737             .e_shentsize = sizeof(ElfW(Shdr)),
6738             .e_shnum = ARRAY_SIZE(img->shdr),
6739             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6740 #ifdef ELF_HOST_FLAGS
6741             .e_flags = ELF_HOST_FLAGS,
6742 #endif
6743 #ifdef ELF_OSABI
6744             .e_ident[EI_OSABI] = ELF_OSABI,
6745 #endif
6746         },
6747         .phdr = {
6748             .p_type = PT_LOAD,
6749             .p_flags = PF_X,
6750         },
6751         .shdr = {
6752             [0] = { .sh_type = SHT_NULL },
6753             /* Trick: The contents of code_gen_buffer are not present in
6754                this fake ELF file; that got allocated elsewhere.  Therefore
6755                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6756                will not look for contents.  We can record any address.  */
6757             [1] = { /* .text */
6758                 .sh_type = SHT_NOBITS,
6759                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6760             },
6761             [2] = { /* .debug_info */
6762                 .sh_type = SHT_PROGBITS,
6763                 .sh_offset = offsetof(struct ElfImage, di),
6764                 .sh_size = sizeof(struct DebugInfo),
6765             },
6766             [3] = { /* .debug_abbrev */
6767                 .sh_type = SHT_PROGBITS,
6768                 .sh_offset = offsetof(struct ElfImage, da),
6769                 .sh_size = sizeof(img->da),
6770             },
6771             [4] = { /* .debug_frame */
6772                 .sh_type = SHT_PROGBITS,
6773                 .sh_offset = sizeof(struct ElfImage),
6774             },
6775             [5] = { /* .symtab */
6776                 .sh_type = SHT_SYMTAB,
6777                 .sh_offset = offsetof(struct ElfImage, sym),
6778                 .sh_size = sizeof(img->sym),
6779                 .sh_info = 1,
6780                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6781                 .sh_entsize = sizeof(ElfW(Sym)),
6782             },
6783             [6] = { /* .strtab */
6784                 .sh_type = SHT_STRTAB,
6785                 .sh_offset = offsetof(struct ElfImage, str),
6786                 .sh_size = sizeof(img->str),
6787             }
6788         },
6789         .sym = {
6790             [1] = { /* code_gen_buffer */
6791                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6792                 .st_shndx = 1,
6793             }
6794         },
6795         .di = {
6796             .len = sizeof(struct DebugInfo) - 4,
6797             .version = 2,
6798             .ptr_size = sizeof(void *),
6799             .cu_die = 1,
6800             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6801             .fn_die = 2,
6802             .fn_name = "code_gen_buffer"
6803         },
6804         .da = {
6805             1,          /* abbrev number (the cu) */
6806             0x11, 1,    /* DW_TAG_compile_unit, has children */
6807             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6808             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6809             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6810             0, 0,       /* end of abbrev */
6811             2,          /* abbrev number (the fn) */
6812             0x2e, 0,    /* DW_TAG_subprogram, no children */
6813             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6814             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6815             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6816             0, 0,       /* end of abbrev */
6817             0           /* no more abbrev */
6818         },
6819         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6820                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6821     };
6822 
6823     /* We only need a single jit entry; statically allocate it.  */
6824     static struct jit_code_entry one_entry;
6825 
6826     uintptr_t buf = (uintptr_t)buf_ptr;
6827     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6828     DebugFrameHeader *dfh;
6829 
6830     img = g_malloc(img_size);
6831     *img = img_template;
6832 
6833     img->phdr.p_vaddr = buf;
6834     img->phdr.p_paddr = buf;
6835     img->phdr.p_memsz = buf_size;
6836 
6837     img->shdr[1].sh_name = find_string(img->str, ".text");
6838     img->shdr[1].sh_addr = buf;
6839     img->shdr[1].sh_size = buf_size;
6840 
6841     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6842     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6843 
6844     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6845     img->shdr[4].sh_size = debug_frame_size;
6846 
6847     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6848     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6849 
6850     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6851     img->sym[1].st_value = buf;
6852     img->sym[1].st_size = buf_size;
6853 
6854     img->di.cu_low_pc = buf;
6855     img->di.cu_high_pc = buf + buf_size;
6856     img->di.fn_low_pc = buf;
6857     img->di.fn_high_pc = buf + buf_size;
6858 
6859     dfh = (DebugFrameHeader *)(img + 1);
6860     memcpy(dfh, debug_frame, debug_frame_size);
6861     dfh->fde.func_start = buf;
6862     dfh->fde.func_len = buf_size;
6863 
6864 #ifdef DEBUG_JIT
6865     /* Enable this block to be able to debug the ELF image file creation.
6866        One can use readelf, objdump, or other inspection utilities.  */
6867     {
6868         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6869         FILE *f = fopen(jit, "w+b");
6870         if (f) {
6871             if (fwrite(img, img_size, 1, f) != img_size) {
6872                 /* Avoid stupid unused return value warning for fwrite.  */
6873             }
6874             fclose(f);
6875         }
6876     }
6877 #endif
6878 
6879     one_entry.symfile_addr = img;
6880     one_entry.symfile_size = img_size;
6881 
6882     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6883     __jit_debug_descriptor.relevant_entry = &one_entry;
6884     __jit_debug_descriptor.first_entry = &one_entry;
6885     __jit_debug_register_code();
6886 }
6887 #else
6888 /* No support for the feature.  Provide the entry point expected by exec.c,
6889    and implement the internal function we declared earlier.  */
6890 
6891 static void tcg_register_jit_int(const void *buf, size_t size,
6892                                  const void *debug_frame,
6893                                  size_t debug_frame_size)
6894 {
6895 }
6896 
6897 void tcg_register_jit(const void *buf, size_t buf_size)
6898 {
6899 }
6900 #endif /* ELF_HOST_MACHINE */
6901 
6902 #if !TCG_TARGET_MAYBE_vec
6903 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6904 {
6905     g_assert_not_reached();
6906 }
6907 #endif
6908