xref: /openbmc/qemu/tcg/tcg.c (revision 0cadc1eda1a3120c37c713ab6d6b7a02da0d2e6f)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/cacheflush.h"
38 #include "qemu/cacheinfo.h"
39 #include "qemu/timer.h"
40 
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42    CPU definitions. Currently they are used for qemu_ld/st
43    instructions */
44 #define NO_CPU_IO_DEFS
45 
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #if HOST_BIG_ENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg/tcg-temp-internal.h"
64 #include "tcg-internal.h"
65 #include "accel/tcg/perf.h"
66 
67 /* Forward declarations for functions declared in tcg-target.c.inc and
68    used here. */
69 static void tcg_target_init(TCGContext *s);
70 static void tcg_target_qemu_prologue(TCGContext *s);
71 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
72                         intptr_t value, intptr_t addend);
73 
74 /* The CIE and FDE header definitions will be common to all hosts.  */
75 typedef struct {
76     uint32_t len __attribute__((aligned((sizeof(void *)))));
77     uint32_t id;
78     uint8_t version;
79     char augmentation[1];
80     uint8_t code_align;
81     uint8_t data_align;
82     uint8_t return_column;
83 } DebugFrameCIE;
84 
85 typedef struct QEMU_PACKED {
86     uint32_t len __attribute__((aligned((sizeof(void *)))));
87     uint32_t cie_offset;
88     uintptr_t func_start;
89     uintptr_t func_len;
90 } DebugFrameFDEHeader;
91 
92 typedef struct QEMU_PACKED {
93     DebugFrameCIE cie;
94     DebugFrameFDEHeader fde;
95 } DebugFrameHeader;
96 
97 typedef struct TCGLabelQemuLdst {
98     bool is_ld;             /* qemu_ld: true, qemu_st: false */
99     MemOpIdx oi;
100     TCGType type;           /* result type of a load */
101     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
102     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
103     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
104     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
105     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
106     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
107     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
108 } TCGLabelQemuLdst;
109 
110 static void tcg_register_jit_int(const void *buf, size_t size,
111                                  const void *debug_frame,
112                                  size_t debug_frame_size)
113     __attribute__((unused));
114 
115 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
116 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
117                        intptr_t arg2);
118 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119 static void tcg_out_movi(TCGContext *s, TCGType type,
120                          TCGReg ret, tcg_target_long arg);
121 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
122 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
123 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
131 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
132 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
133 static void tcg_out_goto_tb(TCGContext *s, int which);
134 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
135                        const TCGArg args[TCG_MAX_OP_ARGS],
136                        const int const_args[TCG_MAX_OP_ARGS]);
137 #if TCG_TARGET_MAYBE_vec
138 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
139                             TCGReg dst, TCGReg src);
140 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
141                              TCGReg dst, TCGReg base, intptr_t offset);
142 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, int64_t arg);
144 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
145                            unsigned vecl, unsigned vece,
146                            const TCGArg args[TCG_MAX_OP_ARGS],
147                            const int const_args[TCG_MAX_OP_ARGS]);
148 #else
149 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
150                                    TCGReg dst, TCGReg src)
151 {
152     g_assert_not_reached();
153 }
154 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
155                                     TCGReg dst, TCGReg base, intptr_t offset)
156 {
157     g_assert_not_reached();
158 }
159 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
160                                     TCGReg dst, int64_t arg)
161 {
162     g_assert_not_reached();
163 }
164 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
165                                   unsigned vecl, unsigned vece,
166                                   const TCGArg args[TCG_MAX_OP_ARGS],
167                                   const int const_args[TCG_MAX_OP_ARGS])
168 {
169     g_assert_not_reached();
170 }
171 #endif
172 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
173                        intptr_t arg2);
174 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
175                         TCGReg base, intptr_t ofs);
176 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
177                          const TCGHelperInfo *info);
178 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
179 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
180 #ifdef TCG_TARGET_NEED_LDST_LABELS
181 static int tcg_out_ldst_finalize(TCGContext *s);
182 #endif
183 
184 typedef struct TCGLdstHelperParam {
185     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
186     unsigned ntmp;
187     int tmp[3];
188 } TCGLdstHelperParam;
189 
190 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
191                                    const TCGLdstHelperParam *p)
192     __attribute__((unused));
193 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
194                                   bool load_sign, const TCGLdstHelperParam *p)
195     __attribute__((unused));
196 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
197                                    const TCGLdstHelperParam *p)
198     __attribute__((unused));
199 
200 #ifdef CONFIG_SOFTMMU
201 static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
202     [MO_UB] = helper_ldub_mmu,
203     [MO_SB] = helper_ldsb_mmu,
204     [MO_UW] = helper_lduw_mmu,
205     [MO_SW] = helper_ldsw_mmu,
206     [MO_UL] = helper_ldul_mmu,
207     [MO_UQ] = helper_ldq_mmu,
208 #if TCG_TARGET_REG_BITS == 64
209     [MO_SL] = helper_ldsl_mmu,
210 #endif
211 };
212 
213 static void * const qemu_st_helpers[MO_SIZE + 1] = {
214     [MO_8]  = helper_stb_mmu,
215     [MO_16] = helper_stw_mmu,
216     [MO_32] = helper_stl_mmu,
217     [MO_64] = helper_stq_mmu,
218 };
219 #endif
220 
221 TCGContext tcg_init_ctx;
222 __thread TCGContext *tcg_ctx;
223 
224 TCGContext **tcg_ctxs;
225 unsigned int tcg_cur_ctxs;
226 unsigned int tcg_max_ctxs;
227 TCGv_env cpu_env = 0;
228 const void *tcg_code_gen_epilogue;
229 uintptr_t tcg_splitwx_diff;
230 
231 #ifndef CONFIG_TCG_INTERPRETER
232 tcg_prologue_fn *tcg_qemu_tb_exec;
233 #endif
234 
235 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
236 static TCGRegSet tcg_target_call_clobber_regs;
237 
238 #if TCG_TARGET_INSN_UNIT_SIZE == 1
239 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
240 {
241     *s->code_ptr++ = v;
242 }
243 
244 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
245                                                       uint8_t v)
246 {
247     *p = v;
248 }
249 #endif
250 
251 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
252 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
253 {
254     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
255         *s->code_ptr++ = v;
256     } else {
257         tcg_insn_unit *p = s->code_ptr;
258         memcpy(p, &v, sizeof(v));
259         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
260     }
261 }
262 
263 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
264                                                        uint16_t v)
265 {
266     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
267         *p = v;
268     } else {
269         memcpy(p, &v, sizeof(v));
270     }
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
275 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
287                                                        uint32_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
298 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
310                                                        uint64_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 /* label relocation processing */
321 
322 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
323                           TCGLabel *l, intptr_t addend)
324 {
325     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
326 
327     r->type = type;
328     r->ptr = code_ptr;
329     r->addend = addend;
330     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
331 }
332 
333 static void tcg_out_label(TCGContext *s, TCGLabel *l)
334 {
335     tcg_debug_assert(!l->has_value);
336     l->has_value = 1;
337     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
338 }
339 
340 TCGLabel *gen_new_label(void)
341 {
342     TCGContext *s = tcg_ctx;
343     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
344 
345     memset(l, 0, sizeof(TCGLabel));
346     l->id = s->nb_labels++;
347     QSIMPLEQ_INIT(&l->branches);
348     QSIMPLEQ_INIT(&l->relocs);
349 
350     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
351 
352     return l;
353 }
354 
355 static bool tcg_resolve_relocs(TCGContext *s)
356 {
357     TCGLabel *l;
358 
359     QSIMPLEQ_FOREACH(l, &s->labels, next) {
360         TCGRelocation *r;
361         uintptr_t value = l->u.value;
362 
363         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
364             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
365                 return false;
366             }
367         }
368     }
369     return true;
370 }
371 
372 static void set_jmp_reset_offset(TCGContext *s, int which)
373 {
374     /*
375      * We will check for overflow at the end of the opcode loop in
376      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
377      */
378     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
379 }
380 
381 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
382 {
383     /*
384      * We will check for overflow at the end of the opcode loop in
385      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
386      */
387     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
388 }
389 
390 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
391 {
392     /*
393      * Return the read-execute version of the pointer, for the benefit
394      * of any pc-relative addressing mode.
395      */
396     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
397 }
398 
399 /* Signal overflow, starting over with fewer guest insns. */
400 static G_NORETURN
401 void tcg_raise_tb_overflow(TCGContext *s)
402 {
403     siglongjmp(s->jmp_trans, -2);
404 }
405 
406 /*
407  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
408  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
409  *
410  * However, tcg_out_helper_load_slots reuses this field to hold an
411  * argument slot number (which may designate a argument register or an
412  * argument stack slot), converting to TCGReg once all arguments that
413  * are destined for the stack are processed.
414  */
415 typedef struct TCGMovExtend {
416     unsigned dst;
417     TCGReg src;
418     TCGType dst_type;
419     TCGType src_type;
420     MemOp src_ext;
421 } TCGMovExtend;
422 
423 /**
424  * tcg_out_movext -- move and extend
425  * @s: tcg context
426  * @dst_type: integral type for destination
427  * @dst: destination register
428  * @src_type: integral type for source
429  * @src_ext: extension to apply to source
430  * @src: source register
431  *
432  * Move or extend @src into @dst, depending on @src_ext and the types.
433  */
434 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
435                            TCGType src_type, MemOp src_ext, TCGReg src)
436 {
437     switch (src_ext) {
438     case MO_UB:
439         tcg_out_ext8u(s, dst, src);
440         break;
441     case MO_SB:
442         tcg_out_ext8s(s, dst_type, dst, src);
443         break;
444     case MO_UW:
445         tcg_out_ext16u(s, dst, src);
446         break;
447     case MO_SW:
448         tcg_out_ext16s(s, dst_type, dst, src);
449         break;
450     case MO_UL:
451     case MO_SL:
452         if (dst_type == TCG_TYPE_I32) {
453             if (src_type == TCG_TYPE_I32) {
454                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
455             } else {
456                 tcg_out_extrl_i64_i32(s, dst, src);
457             }
458         } else if (src_type == TCG_TYPE_I32) {
459             if (src_ext & MO_SIGN) {
460                 tcg_out_exts_i32_i64(s, dst, src);
461             } else {
462                 tcg_out_extu_i32_i64(s, dst, src);
463             }
464         } else {
465             if (src_ext & MO_SIGN) {
466                 tcg_out_ext32s(s, dst, src);
467             } else {
468                 tcg_out_ext32u(s, dst, src);
469             }
470         }
471         break;
472     case MO_UQ:
473         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
474         if (dst_type == TCG_TYPE_I32) {
475             tcg_out_extrl_i64_i32(s, dst, src);
476         } else {
477             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
478         }
479         break;
480     default:
481         g_assert_not_reached();
482     }
483 }
484 
485 /* Minor variations on a theme, using a structure. */
486 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
487                                     TCGReg src)
488 {
489     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
490 }
491 
492 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
493 {
494     tcg_out_movext1_new_src(s, i, i->src);
495 }
496 
497 /**
498  * tcg_out_movext2 -- move and extend two pair
499  * @s: tcg context
500  * @i1: first move description
501  * @i2: second move description
502  * @scratch: temporary register, or -1 for none
503  *
504  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
505  * between the sources and destinations.
506  */
507 
508 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
509                             const TCGMovExtend *i2, int scratch)
510 {
511     TCGReg src1 = i1->src;
512     TCGReg src2 = i2->src;
513 
514     if (i1->dst != src2) {
515         tcg_out_movext1(s, i1);
516         tcg_out_movext1(s, i2);
517         return;
518     }
519     if (i2->dst == src1) {
520         TCGType src1_type = i1->src_type;
521         TCGType src2_type = i2->src_type;
522 
523         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
524             /* The data is now in the correct registers, now extend. */
525             src1 = i2->src;
526             src2 = i1->src;
527         } else {
528             tcg_debug_assert(scratch >= 0);
529             tcg_out_mov(s, src1_type, scratch, src1);
530             src1 = scratch;
531         }
532     }
533     tcg_out_movext1_new_src(s, i2, src2);
534     tcg_out_movext1_new_src(s, i1, src1);
535 }
536 
537 #define C_PFX1(P, A)                    P##A
538 #define C_PFX2(P, A, B)                 P##A##_##B
539 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
540 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
541 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
542 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
543 
544 /* Define an enumeration for the various combinations. */
545 
546 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
547 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
548 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
549 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
550 
551 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
552 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
553 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
554 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
555 
556 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
557 
558 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
559 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
560 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
561 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
562 
563 typedef enum {
564 #include "tcg-target-con-set.h"
565 } TCGConstraintSetIndex;
566 
567 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
568 
569 #undef C_O0_I1
570 #undef C_O0_I2
571 #undef C_O0_I3
572 #undef C_O0_I4
573 #undef C_O1_I1
574 #undef C_O1_I2
575 #undef C_O1_I3
576 #undef C_O1_I4
577 #undef C_N1_I2
578 #undef C_O2_I1
579 #undef C_O2_I2
580 #undef C_O2_I3
581 #undef C_O2_I4
582 
583 /* Put all of the constraint sets into an array, indexed by the enum. */
584 
585 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
586 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
587 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
588 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
589 
590 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
591 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
592 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
593 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
594 
595 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
596 
597 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
598 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
599 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
600 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
601 
602 static const TCGTargetOpDef constraint_sets[] = {
603 #include "tcg-target-con-set.h"
604 };
605 
606 
607 #undef C_O0_I1
608 #undef C_O0_I2
609 #undef C_O0_I3
610 #undef C_O0_I4
611 #undef C_O1_I1
612 #undef C_O1_I2
613 #undef C_O1_I3
614 #undef C_O1_I4
615 #undef C_N1_I2
616 #undef C_O2_I1
617 #undef C_O2_I2
618 #undef C_O2_I3
619 #undef C_O2_I4
620 
621 /* Expand the enumerator to be returned from tcg_target_op_def(). */
622 
623 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
624 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
625 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
626 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
627 
628 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
629 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
630 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
631 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
632 
633 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
634 
635 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
636 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
637 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
638 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
639 
640 #include "tcg-target.c.inc"
641 
642 static void alloc_tcg_plugin_context(TCGContext *s)
643 {
644 #ifdef CONFIG_PLUGIN
645     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
646     s->plugin_tb->insns =
647         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
648 #endif
649 }
650 
651 /*
652  * All TCG threads except the parent (i.e. the one that called tcg_context_init
653  * and registered the target's TCG globals) must register with this function
654  * before initiating translation.
655  *
656  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
657  * of tcg_region_init() for the reasoning behind this.
658  *
659  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
660  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
661  * is not used anymore for translation once this function is called.
662  *
663  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
664  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
665  */
666 #ifdef CONFIG_USER_ONLY
667 void tcg_register_thread(void)
668 {
669     tcg_ctx = &tcg_init_ctx;
670 }
671 #else
672 void tcg_register_thread(void)
673 {
674     TCGContext *s = g_malloc(sizeof(*s));
675     unsigned int i, n;
676 
677     *s = tcg_init_ctx;
678 
679     /* Relink mem_base.  */
680     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
681         if (tcg_init_ctx.temps[i].mem_base) {
682             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
683             tcg_debug_assert(b >= 0 && b < n);
684             s->temps[i].mem_base = &s->temps[b];
685         }
686     }
687 
688     /* Claim an entry in tcg_ctxs */
689     n = qatomic_fetch_inc(&tcg_cur_ctxs);
690     g_assert(n < tcg_max_ctxs);
691     qatomic_set(&tcg_ctxs[n], s);
692 
693     if (n > 0) {
694         alloc_tcg_plugin_context(s);
695         tcg_region_initial_alloc(s);
696     }
697 
698     tcg_ctx = s;
699 }
700 #endif /* !CONFIG_USER_ONLY */
701 
702 /* pool based memory allocation */
703 void *tcg_malloc_internal(TCGContext *s, int size)
704 {
705     TCGPool *p;
706     int pool_size;
707 
708     if (size > TCG_POOL_CHUNK_SIZE) {
709         /* big malloc: insert a new pool (XXX: could optimize) */
710         p = g_malloc(sizeof(TCGPool) + size);
711         p->size = size;
712         p->next = s->pool_first_large;
713         s->pool_first_large = p;
714         return p->data;
715     } else {
716         p = s->pool_current;
717         if (!p) {
718             p = s->pool_first;
719             if (!p)
720                 goto new_pool;
721         } else {
722             if (!p->next) {
723             new_pool:
724                 pool_size = TCG_POOL_CHUNK_SIZE;
725                 p = g_malloc(sizeof(TCGPool) + pool_size);
726                 p->size = pool_size;
727                 p->next = NULL;
728                 if (s->pool_current) {
729                     s->pool_current->next = p;
730                 } else {
731                     s->pool_first = p;
732                 }
733             } else {
734                 p = p->next;
735             }
736         }
737     }
738     s->pool_current = p;
739     s->pool_cur = p->data + size;
740     s->pool_end = p->data + p->size;
741     return p->data;
742 }
743 
744 void tcg_pool_reset(TCGContext *s)
745 {
746     TCGPool *p, *t;
747     for (p = s->pool_first_large; p; p = t) {
748         t = p->next;
749         g_free(p);
750     }
751     s->pool_first_large = NULL;
752     s->pool_cur = s->pool_end = NULL;
753     s->pool_current = NULL;
754 }
755 
756 #include "exec/helper-proto.h"
757 
758 static TCGHelperInfo all_helpers[] = {
759 #include "exec/helper-tcg.h"
760 };
761 static GHashTable *helper_table;
762 
763 /*
764  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
765  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
766  * We only use these for layout in tcg_out_ld_helper_ret and
767  * tcg_out_st_helper_args, and share them between several of
768  * the helpers, with the end result that it's easier to build manually.
769  */
770 
771 #if TCG_TARGET_REG_BITS == 32
772 # define dh_typecode_ttl  dh_typecode_i32
773 #else
774 # define dh_typecode_ttl  dh_typecode_i64
775 #endif
776 
777 static TCGHelperInfo info_helper_ld32_mmu = {
778     .flags = TCG_CALL_NO_WG,
779     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
780               | dh_typemask(env, 1)
781               | dh_typemask(tl, 2)   /* target_ulong addr */
782               | dh_typemask(i32, 3)  /* unsigned oi */
783               | dh_typemask(ptr, 4)  /* uintptr_t ra */
784 };
785 
786 static TCGHelperInfo info_helper_ld64_mmu = {
787     .flags = TCG_CALL_NO_WG,
788     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
789               | dh_typemask(env, 1)
790               | dh_typemask(tl, 2)   /* target_ulong addr */
791               | dh_typemask(i32, 3)  /* unsigned oi */
792               | dh_typemask(ptr, 4)  /* uintptr_t ra */
793 };
794 
795 static TCGHelperInfo info_helper_st32_mmu = {
796     .flags = TCG_CALL_NO_WG,
797     .typemask = dh_typemask(void, 0)
798               | dh_typemask(env, 1)
799               | dh_typemask(tl, 2)   /* target_ulong addr */
800               | dh_typemask(i32, 3)  /* uint32_t data */
801               | dh_typemask(i32, 4)  /* unsigned oi */
802               | dh_typemask(ptr, 5)  /* uintptr_t ra */
803 };
804 
805 static TCGHelperInfo info_helper_st64_mmu = {
806     .flags = TCG_CALL_NO_WG,
807     .typemask = dh_typemask(void, 0)
808               | dh_typemask(env, 1)
809               | dh_typemask(tl, 2)   /* target_ulong addr */
810               | dh_typemask(i64, 3)  /* uint64_t data */
811               | dh_typemask(i32, 4)  /* unsigned oi */
812               | dh_typemask(ptr, 5)  /* uintptr_t ra */
813 };
814 
815 #ifdef CONFIG_TCG_INTERPRETER
816 static ffi_type *typecode_to_ffi(int argmask)
817 {
818     /*
819      * libffi does not support __int128_t, so we have forced Int128
820      * to use the structure definition instead of the builtin type.
821      */
822     static ffi_type *ffi_type_i128_elements[3] = {
823         &ffi_type_uint64,
824         &ffi_type_uint64,
825         NULL
826     };
827     static ffi_type ffi_type_i128 = {
828         .size = 16,
829         .alignment = __alignof__(Int128),
830         .type = FFI_TYPE_STRUCT,
831         .elements = ffi_type_i128_elements,
832     };
833 
834     switch (argmask) {
835     case dh_typecode_void:
836         return &ffi_type_void;
837     case dh_typecode_i32:
838         return &ffi_type_uint32;
839     case dh_typecode_s32:
840         return &ffi_type_sint32;
841     case dh_typecode_i64:
842         return &ffi_type_uint64;
843     case dh_typecode_s64:
844         return &ffi_type_sint64;
845     case dh_typecode_ptr:
846         return &ffi_type_pointer;
847     case dh_typecode_i128:
848         return &ffi_type_i128;
849     }
850     g_assert_not_reached();
851 }
852 
853 static void init_ffi_layouts(void)
854 {
855     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
856     GHashTable *ffi_table = g_hash_table_new(NULL, NULL);
857 
858     for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
859         TCGHelperInfo *info = &all_helpers[i];
860         unsigned typemask = info->typemask;
861         gpointer hash = (gpointer)(uintptr_t)typemask;
862         struct {
863             ffi_cif cif;
864             ffi_type *args[];
865         } *ca;
866         ffi_status status;
867         int nargs;
868         ffi_cif *cif;
869 
870         cif = g_hash_table_lookup(ffi_table, hash);
871         if (cif) {
872             info->cif = cif;
873             continue;
874         }
875 
876         /* Ignoring the return type, find the last non-zero field. */
877         nargs = 32 - clz32(typemask >> 3);
878         nargs = DIV_ROUND_UP(nargs, 3);
879         assert(nargs <= MAX_CALL_IARGS);
880 
881         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
882         ca->cif.rtype = typecode_to_ffi(typemask & 7);
883         ca->cif.nargs = nargs;
884 
885         if (nargs != 0) {
886             ca->cif.arg_types = ca->args;
887             for (int j = 0; j < nargs; ++j) {
888                 int typecode = extract32(typemask, (j + 1) * 3, 3);
889                 ca->args[j] = typecode_to_ffi(typecode);
890             }
891         }
892 
893         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
894                               ca->cif.rtype, ca->cif.arg_types);
895         assert(status == FFI_OK);
896 
897         cif = &ca->cif;
898         info->cif = cif;
899         g_hash_table_insert(ffi_table, hash, (gpointer)cif);
900     }
901 
902     g_hash_table_destroy(ffi_table);
903 }
904 #endif /* CONFIG_TCG_INTERPRETER */
905 
906 static inline bool arg_slot_reg_p(unsigned arg_slot)
907 {
908     /*
909      * Split the sizeof away from the comparison to avoid Werror from
910      * "unsigned < 0 is always false", when iarg_regs is empty.
911      */
912     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
913     return arg_slot < nreg;
914 }
915 
916 static inline int arg_slot_stk_ofs(unsigned arg_slot)
917 {
918     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
919     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
920 
921     tcg_debug_assert(stk_slot < max);
922     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
923 }
924 
925 typedef struct TCGCumulativeArgs {
926     int arg_idx;                /* tcg_gen_callN args[] */
927     int info_in_idx;            /* TCGHelperInfo in[] */
928     int arg_slot;               /* regs+stack slot */
929     int ref_slot;               /* stack slots for references */
930 } TCGCumulativeArgs;
931 
932 static void layout_arg_even(TCGCumulativeArgs *cum)
933 {
934     cum->arg_slot += cum->arg_slot & 1;
935 }
936 
937 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
938                          TCGCallArgumentKind kind)
939 {
940     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
941 
942     *loc = (TCGCallArgumentLoc){
943         .kind = kind,
944         .arg_idx = cum->arg_idx,
945         .arg_slot = cum->arg_slot,
946     };
947     cum->info_in_idx++;
948     cum->arg_slot++;
949 }
950 
951 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
952                                 TCGHelperInfo *info, int n)
953 {
954     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
955 
956     for (int i = 0; i < n; ++i) {
957         /* Layout all using the same arg_idx, adjusting the subindex. */
958         loc[i] = (TCGCallArgumentLoc){
959             .kind = TCG_CALL_ARG_NORMAL,
960             .arg_idx = cum->arg_idx,
961             .tmp_subindex = i,
962             .arg_slot = cum->arg_slot + i,
963         };
964     }
965     cum->info_in_idx += n;
966     cum->arg_slot += n;
967 }
968 
969 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
970 {
971     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
972     int n = 128 / TCG_TARGET_REG_BITS;
973 
974     /* The first subindex carries the pointer. */
975     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
976 
977     /*
978      * The callee is allowed to clobber memory associated with
979      * structure pass by-reference.  Therefore we must make copies.
980      * Allocate space from "ref_slot", which will be adjusted to
981      * follow the parameters on the stack.
982      */
983     loc[0].ref_slot = cum->ref_slot;
984 
985     /*
986      * Subsequent words also go into the reference slot, but
987      * do not accumulate into the regular arguments.
988      */
989     for (int i = 1; i < n; ++i) {
990         loc[i] = (TCGCallArgumentLoc){
991             .kind = TCG_CALL_ARG_BY_REF_N,
992             .arg_idx = cum->arg_idx,
993             .tmp_subindex = i,
994             .ref_slot = cum->ref_slot + i,
995         };
996     }
997     cum->info_in_idx += n;
998     cum->ref_slot += n;
999 }
1000 
1001 static void init_call_layout(TCGHelperInfo *info)
1002 {
1003     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1004     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1005     unsigned typemask = info->typemask;
1006     unsigned typecode;
1007     TCGCumulativeArgs cum = { };
1008 
1009     /*
1010      * Parse and place any function return value.
1011      */
1012     typecode = typemask & 7;
1013     switch (typecode) {
1014     case dh_typecode_void:
1015         info->nr_out = 0;
1016         break;
1017     case dh_typecode_i32:
1018     case dh_typecode_s32:
1019     case dh_typecode_ptr:
1020         info->nr_out = 1;
1021         info->out_kind = TCG_CALL_RET_NORMAL;
1022         break;
1023     case dh_typecode_i64:
1024     case dh_typecode_s64:
1025         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1026         info->out_kind = TCG_CALL_RET_NORMAL;
1027         /* Query the last register now to trigger any assert early. */
1028         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1029         break;
1030     case dh_typecode_i128:
1031         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1032         info->out_kind = TCG_TARGET_CALL_RET_I128;
1033         switch (TCG_TARGET_CALL_RET_I128) {
1034         case TCG_CALL_RET_NORMAL:
1035             /* Query the last register now to trigger any assert early. */
1036             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1037             break;
1038         case TCG_CALL_RET_BY_VEC:
1039             /* Query the single register now to trigger any assert early. */
1040             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1041             break;
1042         case TCG_CALL_RET_BY_REF:
1043             /*
1044              * Allocate the first argument to the output.
1045              * We don't need to store this anywhere, just make it
1046              * unavailable for use in the input loop below.
1047              */
1048             cum.arg_slot = 1;
1049             break;
1050         default:
1051             qemu_build_not_reached();
1052         }
1053         break;
1054     default:
1055         g_assert_not_reached();
1056     }
1057 
1058     /*
1059      * Parse and place function arguments.
1060      */
1061     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1062         TCGCallArgumentKind kind;
1063         TCGType type;
1064 
1065         typecode = typemask & 7;
1066         switch (typecode) {
1067         case dh_typecode_i32:
1068         case dh_typecode_s32:
1069             type = TCG_TYPE_I32;
1070             break;
1071         case dh_typecode_i64:
1072         case dh_typecode_s64:
1073             type = TCG_TYPE_I64;
1074             break;
1075         case dh_typecode_ptr:
1076             type = TCG_TYPE_PTR;
1077             break;
1078         case dh_typecode_i128:
1079             type = TCG_TYPE_I128;
1080             break;
1081         default:
1082             g_assert_not_reached();
1083         }
1084 
1085         switch (type) {
1086         case TCG_TYPE_I32:
1087             switch (TCG_TARGET_CALL_ARG_I32) {
1088             case TCG_CALL_ARG_EVEN:
1089                 layout_arg_even(&cum);
1090                 /* fall through */
1091             case TCG_CALL_ARG_NORMAL:
1092                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1093                 break;
1094             case TCG_CALL_ARG_EXTEND:
1095                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1096                 layout_arg_1(&cum, info, kind);
1097                 break;
1098             default:
1099                 qemu_build_not_reached();
1100             }
1101             break;
1102 
1103         case TCG_TYPE_I64:
1104             switch (TCG_TARGET_CALL_ARG_I64) {
1105             case TCG_CALL_ARG_EVEN:
1106                 layout_arg_even(&cum);
1107                 /* fall through */
1108             case TCG_CALL_ARG_NORMAL:
1109                 if (TCG_TARGET_REG_BITS == 32) {
1110                     layout_arg_normal_n(&cum, info, 2);
1111                 } else {
1112                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1113                 }
1114                 break;
1115             default:
1116                 qemu_build_not_reached();
1117             }
1118             break;
1119 
1120         case TCG_TYPE_I128:
1121             switch (TCG_TARGET_CALL_ARG_I128) {
1122             case TCG_CALL_ARG_EVEN:
1123                 layout_arg_even(&cum);
1124                 /* fall through */
1125             case TCG_CALL_ARG_NORMAL:
1126                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1127                 break;
1128             case TCG_CALL_ARG_BY_REF:
1129                 layout_arg_by_ref(&cum, info);
1130                 break;
1131             default:
1132                 qemu_build_not_reached();
1133             }
1134             break;
1135 
1136         default:
1137             g_assert_not_reached();
1138         }
1139     }
1140     info->nr_in = cum.info_in_idx;
1141 
1142     /* Validate that we didn't overrun the input array. */
1143     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1144     /* Validate the backend has enough argument space. */
1145     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1146 
1147     /*
1148      * Relocate the "ref_slot" area to the end of the parameters.
1149      * Minimizing this stack offset helps code size for x86,
1150      * which has a signed 8-bit offset encoding.
1151      */
1152     if (cum.ref_slot != 0) {
1153         int ref_base = 0;
1154 
1155         if (cum.arg_slot > max_reg_slots) {
1156             int align = __alignof(Int128) / sizeof(tcg_target_long);
1157 
1158             ref_base = cum.arg_slot - max_reg_slots;
1159             if (align > 1) {
1160                 ref_base = ROUND_UP(ref_base, align);
1161             }
1162         }
1163         assert(ref_base + cum.ref_slot <= max_stk_slots);
1164         ref_base += max_reg_slots;
1165 
1166         if (ref_base != 0) {
1167             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1168                 TCGCallArgumentLoc *loc = &info->in[i];
1169                 switch (loc->kind) {
1170                 case TCG_CALL_ARG_BY_REF:
1171                 case TCG_CALL_ARG_BY_REF_N:
1172                     loc->ref_slot += ref_base;
1173                     break;
1174                 default:
1175                     break;
1176                 }
1177             }
1178         }
1179     }
1180 }
1181 
1182 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1183 static void process_op_defs(TCGContext *s);
1184 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1185                                             TCGReg reg, const char *name);
1186 
1187 static void tcg_context_init(unsigned max_cpus)
1188 {
1189     TCGContext *s = &tcg_init_ctx;
1190     int op, total_args, n, i;
1191     TCGOpDef *def;
1192     TCGArgConstraint *args_ct;
1193     TCGTemp *ts;
1194 
1195     memset(s, 0, sizeof(*s));
1196     s->nb_globals = 0;
1197 
1198     /* Count total number of arguments and allocate the corresponding
1199        space */
1200     total_args = 0;
1201     for(op = 0; op < NB_OPS; op++) {
1202         def = &tcg_op_defs[op];
1203         n = def->nb_iargs + def->nb_oargs;
1204         total_args += n;
1205     }
1206 
1207     args_ct = g_new0(TCGArgConstraint, total_args);
1208 
1209     for(op = 0; op < NB_OPS; op++) {
1210         def = &tcg_op_defs[op];
1211         def->args_ct = args_ct;
1212         n = def->nb_iargs + def->nb_oargs;
1213         args_ct += n;
1214     }
1215 
1216     /* Register helpers.  */
1217     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1218     helper_table = g_hash_table_new(NULL, NULL);
1219 
1220     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1221         init_call_layout(&all_helpers[i]);
1222         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1223                             (gpointer)&all_helpers[i]);
1224     }
1225 
1226     init_call_layout(&info_helper_ld32_mmu);
1227     init_call_layout(&info_helper_ld64_mmu);
1228     init_call_layout(&info_helper_st32_mmu);
1229     init_call_layout(&info_helper_st64_mmu);
1230 
1231 #ifdef CONFIG_TCG_INTERPRETER
1232     init_ffi_layouts();
1233 #endif
1234 
1235     tcg_target_init(s);
1236     process_op_defs(s);
1237 
1238     /* Reverse the order of the saved registers, assuming they're all at
1239        the start of tcg_target_reg_alloc_order.  */
1240     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1241         int r = tcg_target_reg_alloc_order[n];
1242         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1243             break;
1244         }
1245     }
1246     for (i = 0; i < n; ++i) {
1247         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1248     }
1249     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1250         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1251     }
1252 
1253     alloc_tcg_plugin_context(s);
1254 
1255     tcg_ctx = s;
1256     /*
1257      * In user-mode we simply share the init context among threads, since we
1258      * use a single region. See the documentation tcg_region_init() for the
1259      * reasoning behind this.
1260      * In softmmu we will have at most max_cpus TCG threads.
1261      */
1262 #ifdef CONFIG_USER_ONLY
1263     tcg_ctxs = &tcg_ctx;
1264     tcg_cur_ctxs = 1;
1265     tcg_max_ctxs = 1;
1266 #else
1267     tcg_max_ctxs = max_cpus;
1268     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1269 #endif
1270 
1271     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1272     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1273     cpu_env = temp_tcgv_ptr(ts);
1274 }
1275 
1276 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1277 {
1278     tcg_context_init(max_cpus);
1279     tcg_region_init(tb_size, splitwx, max_cpus);
1280 }
1281 
1282 /*
1283  * Allocate TBs right before their corresponding translated code, making
1284  * sure that TBs and code are on different cache lines.
1285  */
1286 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1287 {
1288     uintptr_t align = qemu_icache_linesize;
1289     TranslationBlock *tb;
1290     void *next;
1291 
1292  retry:
1293     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1294     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1295 
1296     if (unlikely(next > s->code_gen_highwater)) {
1297         if (tcg_region_alloc(s)) {
1298             return NULL;
1299         }
1300         goto retry;
1301     }
1302     qatomic_set(&s->code_gen_ptr, next);
1303     s->data_gen_ptr = NULL;
1304     return tb;
1305 }
1306 
1307 void tcg_prologue_init(TCGContext *s)
1308 {
1309     size_t prologue_size;
1310 
1311     s->code_ptr = s->code_gen_ptr;
1312     s->code_buf = s->code_gen_ptr;
1313     s->data_gen_ptr = NULL;
1314 
1315 #ifndef CONFIG_TCG_INTERPRETER
1316     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1317 #endif
1318 
1319 #ifdef TCG_TARGET_NEED_POOL_LABELS
1320     s->pool_labels = NULL;
1321 #endif
1322 
1323     qemu_thread_jit_write();
1324     /* Generate the prologue.  */
1325     tcg_target_qemu_prologue(s);
1326 
1327 #ifdef TCG_TARGET_NEED_POOL_LABELS
1328     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1329     {
1330         int result = tcg_out_pool_finalize(s);
1331         tcg_debug_assert(result == 0);
1332     }
1333 #endif
1334 
1335     prologue_size = tcg_current_code_size(s);
1336     perf_report_prologue(s->code_gen_ptr, prologue_size);
1337 
1338 #ifndef CONFIG_TCG_INTERPRETER
1339     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1340                         (uintptr_t)s->code_buf, prologue_size);
1341 #endif
1342 
1343 #ifdef DEBUG_DISAS
1344     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1345         FILE *logfile = qemu_log_trylock();
1346         if (logfile) {
1347             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1348             if (s->data_gen_ptr) {
1349                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1350                 size_t data_size = prologue_size - code_size;
1351                 size_t i;
1352 
1353                 disas(logfile, s->code_gen_ptr, code_size);
1354 
1355                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1356                     if (sizeof(tcg_target_ulong) == 8) {
1357                         fprintf(logfile,
1358                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1359                                 (uintptr_t)s->data_gen_ptr + i,
1360                                 *(uint64_t *)(s->data_gen_ptr + i));
1361                     } else {
1362                         fprintf(logfile,
1363                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1364                                 (uintptr_t)s->data_gen_ptr + i,
1365                                 *(uint32_t *)(s->data_gen_ptr + i));
1366                     }
1367                 }
1368             } else {
1369                 disas(logfile, s->code_gen_ptr, prologue_size);
1370             }
1371             fprintf(logfile, "\n");
1372             qemu_log_unlock(logfile);
1373         }
1374     }
1375 #endif
1376 
1377 #ifndef CONFIG_TCG_INTERPRETER
1378     /*
1379      * Assert that goto_ptr is implemented completely, setting an epilogue.
1380      * For tci, we use NULL as the signal to return from the interpreter,
1381      * so skip this check.
1382      */
1383     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1384 #endif
1385 
1386     tcg_region_prologue_set(s);
1387 }
1388 
1389 void tcg_func_start(TCGContext *s)
1390 {
1391     tcg_pool_reset(s);
1392     s->nb_temps = s->nb_globals;
1393 
1394     /* No temps have been previously allocated for size or locality.  */
1395     memset(s->free_temps, 0, sizeof(s->free_temps));
1396 
1397     /* No constant temps have been previously allocated. */
1398     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1399         if (s->const_table[i]) {
1400             g_hash_table_remove_all(s->const_table[i]);
1401         }
1402     }
1403 
1404     s->nb_ops = 0;
1405     s->nb_labels = 0;
1406     s->current_frame_offset = s->frame_start;
1407 
1408 #ifdef CONFIG_DEBUG_TCG
1409     s->goto_tb_issue_mask = 0;
1410 #endif
1411 
1412     QTAILQ_INIT(&s->ops);
1413     QTAILQ_INIT(&s->free_ops);
1414     QSIMPLEQ_INIT(&s->labels);
1415 }
1416 
1417 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1418 {
1419     int n = s->nb_temps++;
1420 
1421     if (n >= TCG_MAX_TEMPS) {
1422         tcg_raise_tb_overflow(s);
1423     }
1424     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1425 }
1426 
1427 static TCGTemp *tcg_global_alloc(TCGContext *s)
1428 {
1429     TCGTemp *ts;
1430 
1431     tcg_debug_assert(s->nb_globals == s->nb_temps);
1432     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1433     s->nb_globals++;
1434     ts = tcg_temp_alloc(s);
1435     ts->kind = TEMP_GLOBAL;
1436 
1437     return ts;
1438 }
1439 
1440 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1441                                             TCGReg reg, const char *name)
1442 {
1443     TCGTemp *ts;
1444 
1445     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1446 
1447     ts = tcg_global_alloc(s);
1448     ts->base_type = type;
1449     ts->type = type;
1450     ts->kind = TEMP_FIXED;
1451     ts->reg = reg;
1452     ts->name = name;
1453     tcg_regset_set_reg(s->reserved_regs, reg);
1454 
1455     return ts;
1456 }
1457 
1458 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1459 {
1460     s->frame_start = start;
1461     s->frame_end = start + size;
1462     s->frame_temp
1463         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1464 }
1465 
1466 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1467                                      intptr_t offset, const char *name)
1468 {
1469     TCGContext *s = tcg_ctx;
1470     TCGTemp *base_ts = tcgv_ptr_temp(base);
1471     TCGTemp *ts = tcg_global_alloc(s);
1472     int indirect_reg = 0;
1473 
1474     switch (base_ts->kind) {
1475     case TEMP_FIXED:
1476         break;
1477     case TEMP_GLOBAL:
1478         /* We do not support double-indirect registers.  */
1479         tcg_debug_assert(!base_ts->indirect_reg);
1480         base_ts->indirect_base = 1;
1481         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1482                             ? 2 : 1);
1483         indirect_reg = 1;
1484         break;
1485     default:
1486         g_assert_not_reached();
1487     }
1488 
1489     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1490         TCGTemp *ts2 = tcg_global_alloc(s);
1491         char buf[64];
1492 
1493         ts->base_type = TCG_TYPE_I64;
1494         ts->type = TCG_TYPE_I32;
1495         ts->indirect_reg = indirect_reg;
1496         ts->mem_allocated = 1;
1497         ts->mem_base = base_ts;
1498         ts->mem_offset = offset;
1499         pstrcpy(buf, sizeof(buf), name);
1500         pstrcat(buf, sizeof(buf), "_0");
1501         ts->name = strdup(buf);
1502 
1503         tcg_debug_assert(ts2 == ts + 1);
1504         ts2->base_type = TCG_TYPE_I64;
1505         ts2->type = TCG_TYPE_I32;
1506         ts2->indirect_reg = indirect_reg;
1507         ts2->mem_allocated = 1;
1508         ts2->mem_base = base_ts;
1509         ts2->mem_offset = offset + 4;
1510         ts2->temp_subindex = 1;
1511         pstrcpy(buf, sizeof(buf), name);
1512         pstrcat(buf, sizeof(buf), "_1");
1513         ts2->name = strdup(buf);
1514     } else {
1515         ts->base_type = type;
1516         ts->type = type;
1517         ts->indirect_reg = indirect_reg;
1518         ts->mem_allocated = 1;
1519         ts->mem_base = base_ts;
1520         ts->mem_offset = offset;
1521         ts->name = name;
1522     }
1523     return ts;
1524 }
1525 
1526 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1527 {
1528     TCGContext *s = tcg_ctx;
1529     TCGTemp *ts;
1530     int n;
1531 
1532     if (kind == TEMP_EBB) {
1533         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1534 
1535         if (idx < TCG_MAX_TEMPS) {
1536             /* There is already an available temp with the right type.  */
1537             clear_bit(idx, s->free_temps[type].l);
1538 
1539             ts = &s->temps[idx];
1540             ts->temp_allocated = 1;
1541             tcg_debug_assert(ts->base_type == type);
1542             tcg_debug_assert(ts->kind == kind);
1543             return ts;
1544         }
1545     } else {
1546         tcg_debug_assert(kind == TEMP_TB);
1547     }
1548 
1549     switch (type) {
1550     case TCG_TYPE_I32:
1551     case TCG_TYPE_V64:
1552     case TCG_TYPE_V128:
1553     case TCG_TYPE_V256:
1554         n = 1;
1555         break;
1556     case TCG_TYPE_I64:
1557         n = 64 / TCG_TARGET_REG_BITS;
1558         break;
1559     case TCG_TYPE_I128:
1560         n = 128 / TCG_TARGET_REG_BITS;
1561         break;
1562     default:
1563         g_assert_not_reached();
1564     }
1565 
1566     ts = tcg_temp_alloc(s);
1567     ts->base_type = type;
1568     ts->temp_allocated = 1;
1569     ts->kind = kind;
1570 
1571     if (n == 1) {
1572         ts->type = type;
1573     } else {
1574         ts->type = TCG_TYPE_REG;
1575 
1576         for (int i = 1; i < n; ++i) {
1577             TCGTemp *ts2 = tcg_temp_alloc(s);
1578 
1579             tcg_debug_assert(ts2 == ts + i);
1580             ts2->base_type = type;
1581             ts2->type = TCG_TYPE_REG;
1582             ts2->temp_allocated = 1;
1583             ts2->temp_subindex = i;
1584             ts2->kind = kind;
1585         }
1586     }
1587     return ts;
1588 }
1589 
1590 TCGv_vec tcg_temp_new_vec(TCGType type)
1591 {
1592     TCGTemp *t;
1593 
1594 #ifdef CONFIG_DEBUG_TCG
1595     switch (type) {
1596     case TCG_TYPE_V64:
1597         assert(TCG_TARGET_HAS_v64);
1598         break;
1599     case TCG_TYPE_V128:
1600         assert(TCG_TARGET_HAS_v128);
1601         break;
1602     case TCG_TYPE_V256:
1603         assert(TCG_TARGET_HAS_v256);
1604         break;
1605     default:
1606         g_assert_not_reached();
1607     }
1608 #endif
1609 
1610     t = tcg_temp_new_internal(type, TEMP_EBB);
1611     return temp_tcgv_vec(t);
1612 }
1613 
1614 /* Create a new temp of the same type as an existing temp.  */
1615 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1616 {
1617     TCGTemp *t = tcgv_vec_temp(match);
1618 
1619     tcg_debug_assert(t->temp_allocated != 0);
1620 
1621     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1622     return temp_tcgv_vec(t);
1623 }
1624 
1625 void tcg_temp_free_internal(TCGTemp *ts)
1626 {
1627     TCGContext *s = tcg_ctx;
1628 
1629     switch (ts->kind) {
1630     case TEMP_CONST:
1631     case TEMP_TB:
1632         /* Silently ignore free. */
1633         break;
1634     case TEMP_EBB:
1635         tcg_debug_assert(ts->temp_allocated != 0);
1636         ts->temp_allocated = 0;
1637         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1638         break;
1639     default:
1640         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1641         g_assert_not_reached();
1642     }
1643 }
1644 
1645 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1646 {
1647     TCGContext *s = tcg_ctx;
1648     GHashTable *h = s->const_table[type];
1649     TCGTemp *ts;
1650 
1651     if (h == NULL) {
1652         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1653         s->const_table[type] = h;
1654     }
1655 
1656     ts = g_hash_table_lookup(h, &val);
1657     if (ts == NULL) {
1658         int64_t *val_ptr;
1659 
1660         ts = tcg_temp_alloc(s);
1661 
1662         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1663             TCGTemp *ts2 = tcg_temp_alloc(s);
1664 
1665             tcg_debug_assert(ts2 == ts + 1);
1666 
1667             ts->base_type = TCG_TYPE_I64;
1668             ts->type = TCG_TYPE_I32;
1669             ts->kind = TEMP_CONST;
1670             ts->temp_allocated = 1;
1671 
1672             ts2->base_type = TCG_TYPE_I64;
1673             ts2->type = TCG_TYPE_I32;
1674             ts2->kind = TEMP_CONST;
1675             ts2->temp_allocated = 1;
1676             ts2->temp_subindex = 1;
1677 
1678             /*
1679              * Retain the full value of the 64-bit constant in the low
1680              * part, so that the hash table works.  Actual uses will
1681              * truncate the value to the low part.
1682              */
1683             ts[HOST_BIG_ENDIAN].val = val;
1684             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1685             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1686         } else {
1687             ts->base_type = type;
1688             ts->type = type;
1689             ts->kind = TEMP_CONST;
1690             ts->temp_allocated = 1;
1691             ts->val = val;
1692             val_ptr = &ts->val;
1693         }
1694         g_hash_table_insert(h, val_ptr, ts);
1695     }
1696 
1697     return ts;
1698 }
1699 
1700 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1701 {
1702     val = dup_const(vece, val);
1703     return temp_tcgv_vec(tcg_constant_internal(type, val));
1704 }
1705 
1706 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1707 {
1708     TCGTemp *t = tcgv_vec_temp(match);
1709 
1710     tcg_debug_assert(t->temp_allocated != 0);
1711     return tcg_constant_vec(t->base_type, vece, val);
1712 }
1713 
1714 /* Return true if OP may appear in the opcode stream.
1715    Test the runtime variable that controls each opcode.  */
1716 bool tcg_op_supported(TCGOpcode op)
1717 {
1718     const bool have_vec
1719         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1720 
1721     switch (op) {
1722     case INDEX_op_discard:
1723     case INDEX_op_set_label:
1724     case INDEX_op_call:
1725     case INDEX_op_br:
1726     case INDEX_op_mb:
1727     case INDEX_op_insn_start:
1728     case INDEX_op_exit_tb:
1729     case INDEX_op_goto_tb:
1730     case INDEX_op_goto_ptr:
1731     case INDEX_op_qemu_ld_i32:
1732     case INDEX_op_qemu_st_i32:
1733     case INDEX_op_qemu_ld_i64:
1734     case INDEX_op_qemu_st_i64:
1735         return true;
1736 
1737     case INDEX_op_qemu_st8_i32:
1738         return TCG_TARGET_HAS_qemu_st8_i32;
1739 
1740     case INDEX_op_mov_i32:
1741     case INDEX_op_setcond_i32:
1742     case INDEX_op_brcond_i32:
1743     case INDEX_op_ld8u_i32:
1744     case INDEX_op_ld8s_i32:
1745     case INDEX_op_ld16u_i32:
1746     case INDEX_op_ld16s_i32:
1747     case INDEX_op_ld_i32:
1748     case INDEX_op_st8_i32:
1749     case INDEX_op_st16_i32:
1750     case INDEX_op_st_i32:
1751     case INDEX_op_add_i32:
1752     case INDEX_op_sub_i32:
1753     case INDEX_op_mul_i32:
1754     case INDEX_op_and_i32:
1755     case INDEX_op_or_i32:
1756     case INDEX_op_xor_i32:
1757     case INDEX_op_shl_i32:
1758     case INDEX_op_shr_i32:
1759     case INDEX_op_sar_i32:
1760         return true;
1761 
1762     case INDEX_op_movcond_i32:
1763         return TCG_TARGET_HAS_movcond_i32;
1764     case INDEX_op_div_i32:
1765     case INDEX_op_divu_i32:
1766         return TCG_TARGET_HAS_div_i32;
1767     case INDEX_op_rem_i32:
1768     case INDEX_op_remu_i32:
1769         return TCG_TARGET_HAS_rem_i32;
1770     case INDEX_op_div2_i32:
1771     case INDEX_op_divu2_i32:
1772         return TCG_TARGET_HAS_div2_i32;
1773     case INDEX_op_rotl_i32:
1774     case INDEX_op_rotr_i32:
1775         return TCG_TARGET_HAS_rot_i32;
1776     case INDEX_op_deposit_i32:
1777         return TCG_TARGET_HAS_deposit_i32;
1778     case INDEX_op_extract_i32:
1779         return TCG_TARGET_HAS_extract_i32;
1780     case INDEX_op_sextract_i32:
1781         return TCG_TARGET_HAS_sextract_i32;
1782     case INDEX_op_extract2_i32:
1783         return TCG_TARGET_HAS_extract2_i32;
1784     case INDEX_op_add2_i32:
1785         return TCG_TARGET_HAS_add2_i32;
1786     case INDEX_op_sub2_i32:
1787         return TCG_TARGET_HAS_sub2_i32;
1788     case INDEX_op_mulu2_i32:
1789         return TCG_TARGET_HAS_mulu2_i32;
1790     case INDEX_op_muls2_i32:
1791         return TCG_TARGET_HAS_muls2_i32;
1792     case INDEX_op_muluh_i32:
1793         return TCG_TARGET_HAS_muluh_i32;
1794     case INDEX_op_mulsh_i32:
1795         return TCG_TARGET_HAS_mulsh_i32;
1796     case INDEX_op_ext8s_i32:
1797         return TCG_TARGET_HAS_ext8s_i32;
1798     case INDEX_op_ext16s_i32:
1799         return TCG_TARGET_HAS_ext16s_i32;
1800     case INDEX_op_ext8u_i32:
1801         return TCG_TARGET_HAS_ext8u_i32;
1802     case INDEX_op_ext16u_i32:
1803         return TCG_TARGET_HAS_ext16u_i32;
1804     case INDEX_op_bswap16_i32:
1805         return TCG_TARGET_HAS_bswap16_i32;
1806     case INDEX_op_bswap32_i32:
1807         return TCG_TARGET_HAS_bswap32_i32;
1808     case INDEX_op_not_i32:
1809         return TCG_TARGET_HAS_not_i32;
1810     case INDEX_op_neg_i32:
1811         return TCG_TARGET_HAS_neg_i32;
1812     case INDEX_op_andc_i32:
1813         return TCG_TARGET_HAS_andc_i32;
1814     case INDEX_op_orc_i32:
1815         return TCG_TARGET_HAS_orc_i32;
1816     case INDEX_op_eqv_i32:
1817         return TCG_TARGET_HAS_eqv_i32;
1818     case INDEX_op_nand_i32:
1819         return TCG_TARGET_HAS_nand_i32;
1820     case INDEX_op_nor_i32:
1821         return TCG_TARGET_HAS_nor_i32;
1822     case INDEX_op_clz_i32:
1823         return TCG_TARGET_HAS_clz_i32;
1824     case INDEX_op_ctz_i32:
1825         return TCG_TARGET_HAS_ctz_i32;
1826     case INDEX_op_ctpop_i32:
1827         return TCG_TARGET_HAS_ctpop_i32;
1828 
1829     case INDEX_op_brcond2_i32:
1830     case INDEX_op_setcond2_i32:
1831         return TCG_TARGET_REG_BITS == 32;
1832 
1833     case INDEX_op_mov_i64:
1834     case INDEX_op_setcond_i64:
1835     case INDEX_op_brcond_i64:
1836     case INDEX_op_ld8u_i64:
1837     case INDEX_op_ld8s_i64:
1838     case INDEX_op_ld16u_i64:
1839     case INDEX_op_ld16s_i64:
1840     case INDEX_op_ld32u_i64:
1841     case INDEX_op_ld32s_i64:
1842     case INDEX_op_ld_i64:
1843     case INDEX_op_st8_i64:
1844     case INDEX_op_st16_i64:
1845     case INDEX_op_st32_i64:
1846     case INDEX_op_st_i64:
1847     case INDEX_op_add_i64:
1848     case INDEX_op_sub_i64:
1849     case INDEX_op_mul_i64:
1850     case INDEX_op_and_i64:
1851     case INDEX_op_or_i64:
1852     case INDEX_op_xor_i64:
1853     case INDEX_op_shl_i64:
1854     case INDEX_op_shr_i64:
1855     case INDEX_op_sar_i64:
1856     case INDEX_op_ext_i32_i64:
1857     case INDEX_op_extu_i32_i64:
1858         return TCG_TARGET_REG_BITS == 64;
1859 
1860     case INDEX_op_movcond_i64:
1861         return TCG_TARGET_HAS_movcond_i64;
1862     case INDEX_op_div_i64:
1863     case INDEX_op_divu_i64:
1864         return TCG_TARGET_HAS_div_i64;
1865     case INDEX_op_rem_i64:
1866     case INDEX_op_remu_i64:
1867         return TCG_TARGET_HAS_rem_i64;
1868     case INDEX_op_div2_i64:
1869     case INDEX_op_divu2_i64:
1870         return TCG_TARGET_HAS_div2_i64;
1871     case INDEX_op_rotl_i64:
1872     case INDEX_op_rotr_i64:
1873         return TCG_TARGET_HAS_rot_i64;
1874     case INDEX_op_deposit_i64:
1875         return TCG_TARGET_HAS_deposit_i64;
1876     case INDEX_op_extract_i64:
1877         return TCG_TARGET_HAS_extract_i64;
1878     case INDEX_op_sextract_i64:
1879         return TCG_TARGET_HAS_sextract_i64;
1880     case INDEX_op_extract2_i64:
1881         return TCG_TARGET_HAS_extract2_i64;
1882     case INDEX_op_extrl_i64_i32:
1883         return TCG_TARGET_HAS_extrl_i64_i32;
1884     case INDEX_op_extrh_i64_i32:
1885         return TCG_TARGET_HAS_extrh_i64_i32;
1886     case INDEX_op_ext8s_i64:
1887         return TCG_TARGET_HAS_ext8s_i64;
1888     case INDEX_op_ext16s_i64:
1889         return TCG_TARGET_HAS_ext16s_i64;
1890     case INDEX_op_ext32s_i64:
1891         return TCG_TARGET_HAS_ext32s_i64;
1892     case INDEX_op_ext8u_i64:
1893         return TCG_TARGET_HAS_ext8u_i64;
1894     case INDEX_op_ext16u_i64:
1895         return TCG_TARGET_HAS_ext16u_i64;
1896     case INDEX_op_ext32u_i64:
1897         return TCG_TARGET_HAS_ext32u_i64;
1898     case INDEX_op_bswap16_i64:
1899         return TCG_TARGET_HAS_bswap16_i64;
1900     case INDEX_op_bswap32_i64:
1901         return TCG_TARGET_HAS_bswap32_i64;
1902     case INDEX_op_bswap64_i64:
1903         return TCG_TARGET_HAS_bswap64_i64;
1904     case INDEX_op_not_i64:
1905         return TCG_TARGET_HAS_not_i64;
1906     case INDEX_op_neg_i64:
1907         return TCG_TARGET_HAS_neg_i64;
1908     case INDEX_op_andc_i64:
1909         return TCG_TARGET_HAS_andc_i64;
1910     case INDEX_op_orc_i64:
1911         return TCG_TARGET_HAS_orc_i64;
1912     case INDEX_op_eqv_i64:
1913         return TCG_TARGET_HAS_eqv_i64;
1914     case INDEX_op_nand_i64:
1915         return TCG_TARGET_HAS_nand_i64;
1916     case INDEX_op_nor_i64:
1917         return TCG_TARGET_HAS_nor_i64;
1918     case INDEX_op_clz_i64:
1919         return TCG_TARGET_HAS_clz_i64;
1920     case INDEX_op_ctz_i64:
1921         return TCG_TARGET_HAS_ctz_i64;
1922     case INDEX_op_ctpop_i64:
1923         return TCG_TARGET_HAS_ctpop_i64;
1924     case INDEX_op_add2_i64:
1925         return TCG_TARGET_HAS_add2_i64;
1926     case INDEX_op_sub2_i64:
1927         return TCG_TARGET_HAS_sub2_i64;
1928     case INDEX_op_mulu2_i64:
1929         return TCG_TARGET_HAS_mulu2_i64;
1930     case INDEX_op_muls2_i64:
1931         return TCG_TARGET_HAS_muls2_i64;
1932     case INDEX_op_muluh_i64:
1933         return TCG_TARGET_HAS_muluh_i64;
1934     case INDEX_op_mulsh_i64:
1935         return TCG_TARGET_HAS_mulsh_i64;
1936 
1937     case INDEX_op_mov_vec:
1938     case INDEX_op_dup_vec:
1939     case INDEX_op_dupm_vec:
1940     case INDEX_op_ld_vec:
1941     case INDEX_op_st_vec:
1942     case INDEX_op_add_vec:
1943     case INDEX_op_sub_vec:
1944     case INDEX_op_and_vec:
1945     case INDEX_op_or_vec:
1946     case INDEX_op_xor_vec:
1947     case INDEX_op_cmp_vec:
1948         return have_vec;
1949     case INDEX_op_dup2_vec:
1950         return have_vec && TCG_TARGET_REG_BITS == 32;
1951     case INDEX_op_not_vec:
1952         return have_vec && TCG_TARGET_HAS_not_vec;
1953     case INDEX_op_neg_vec:
1954         return have_vec && TCG_TARGET_HAS_neg_vec;
1955     case INDEX_op_abs_vec:
1956         return have_vec && TCG_TARGET_HAS_abs_vec;
1957     case INDEX_op_andc_vec:
1958         return have_vec && TCG_TARGET_HAS_andc_vec;
1959     case INDEX_op_orc_vec:
1960         return have_vec && TCG_TARGET_HAS_orc_vec;
1961     case INDEX_op_nand_vec:
1962         return have_vec && TCG_TARGET_HAS_nand_vec;
1963     case INDEX_op_nor_vec:
1964         return have_vec && TCG_TARGET_HAS_nor_vec;
1965     case INDEX_op_eqv_vec:
1966         return have_vec && TCG_TARGET_HAS_eqv_vec;
1967     case INDEX_op_mul_vec:
1968         return have_vec && TCG_TARGET_HAS_mul_vec;
1969     case INDEX_op_shli_vec:
1970     case INDEX_op_shri_vec:
1971     case INDEX_op_sari_vec:
1972         return have_vec && TCG_TARGET_HAS_shi_vec;
1973     case INDEX_op_shls_vec:
1974     case INDEX_op_shrs_vec:
1975     case INDEX_op_sars_vec:
1976         return have_vec && TCG_TARGET_HAS_shs_vec;
1977     case INDEX_op_shlv_vec:
1978     case INDEX_op_shrv_vec:
1979     case INDEX_op_sarv_vec:
1980         return have_vec && TCG_TARGET_HAS_shv_vec;
1981     case INDEX_op_rotli_vec:
1982         return have_vec && TCG_TARGET_HAS_roti_vec;
1983     case INDEX_op_rotls_vec:
1984         return have_vec && TCG_TARGET_HAS_rots_vec;
1985     case INDEX_op_rotlv_vec:
1986     case INDEX_op_rotrv_vec:
1987         return have_vec && TCG_TARGET_HAS_rotv_vec;
1988     case INDEX_op_ssadd_vec:
1989     case INDEX_op_usadd_vec:
1990     case INDEX_op_sssub_vec:
1991     case INDEX_op_ussub_vec:
1992         return have_vec && TCG_TARGET_HAS_sat_vec;
1993     case INDEX_op_smin_vec:
1994     case INDEX_op_umin_vec:
1995     case INDEX_op_smax_vec:
1996     case INDEX_op_umax_vec:
1997         return have_vec && TCG_TARGET_HAS_minmax_vec;
1998     case INDEX_op_bitsel_vec:
1999         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2000     case INDEX_op_cmpsel_vec:
2001         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2002 
2003     default:
2004         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2005         return true;
2006     }
2007 }
2008 
2009 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2010 
2011 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
2012 {
2013     const TCGHelperInfo *info;
2014     TCGv_i64 extend_free[MAX_CALL_IARGS];
2015     int n_extend = 0;
2016     TCGOp *op;
2017     int i, n, pi = 0, total_args;
2018 
2019     info = g_hash_table_lookup(helper_table, (gpointer)func);
2020     total_args = info->nr_out + info->nr_in + 2;
2021     op = tcg_op_alloc(INDEX_op_call, total_args);
2022 
2023 #ifdef CONFIG_PLUGIN
2024     /* Flag helpers that may affect guest state */
2025     if (tcg_ctx->plugin_insn &&
2026         !(info->flags & TCG_CALL_PLUGIN) &&
2027         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2028         tcg_ctx->plugin_insn->calls_helpers = true;
2029     }
2030 #endif
2031 
2032     TCGOP_CALLO(op) = n = info->nr_out;
2033     switch (n) {
2034     case 0:
2035         tcg_debug_assert(ret == NULL);
2036         break;
2037     case 1:
2038         tcg_debug_assert(ret != NULL);
2039         op->args[pi++] = temp_arg(ret);
2040         break;
2041     case 2:
2042     case 4:
2043         tcg_debug_assert(ret != NULL);
2044         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2045         tcg_debug_assert(ret->temp_subindex == 0);
2046         for (i = 0; i < n; ++i) {
2047             op->args[pi++] = temp_arg(ret + i);
2048         }
2049         break;
2050     default:
2051         g_assert_not_reached();
2052     }
2053 
2054     TCGOP_CALLI(op) = n = info->nr_in;
2055     for (i = 0; i < n; i++) {
2056         const TCGCallArgumentLoc *loc = &info->in[i];
2057         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2058 
2059         switch (loc->kind) {
2060         case TCG_CALL_ARG_NORMAL:
2061         case TCG_CALL_ARG_BY_REF:
2062         case TCG_CALL_ARG_BY_REF_N:
2063             op->args[pi++] = temp_arg(ts);
2064             break;
2065 
2066         case TCG_CALL_ARG_EXTEND_U:
2067         case TCG_CALL_ARG_EXTEND_S:
2068             {
2069                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2070                 TCGv_i32 orig = temp_tcgv_i32(ts);
2071 
2072                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2073                     tcg_gen_ext_i32_i64(temp, orig);
2074                 } else {
2075                     tcg_gen_extu_i32_i64(temp, orig);
2076                 }
2077                 op->args[pi++] = tcgv_i64_arg(temp);
2078                 extend_free[n_extend++] = temp;
2079             }
2080             break;
2081 
2082         default:
2083             g_assert_not_reached();
2084         }
2085     }
2086     op->args[pi++] = (uintptr_t)func;
2087     op->args[pi++] = (uintptr_t)info;
2088     tcg_debug_assert(pi == total_args);
2089 
2090     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2091 
2092     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2093     for (i = 0; i < n_extend; ++i) {
2094         tcg_temp_free_i64(extend_free[i]);
2095     }
2096 }
2097 
2098 static void tcg_reg_alloc_start(TCGContext *s)
2099 {
2100     int i, n;
2101 
2102     for (i = 0, n = s->nb_temps; i < n; i++) {
2103         TCGTemp *ts = &s->temps[i];
2104         TCGTempVal val = TEMP_VAL_MEM;
2105 
2106         switch (ts->kind) {
2107         case TEMP_CONST:
2108             val = TEMP_VAL_CONST;
2109             break;
2110         case TEMP_FIXED:
2111             val = TEMP_VAL_REG;
2112             break;
2113         case TEMP_GLOBAL:
2114             break;
2115         case TEMP_EBB:
2116             val = TEMP_VAL_DEAD;
2117             /* fall through */
2118         case TEMP_TB:
2119             ts->mem_allocated = 0;
2120             break;
2121         default:
2122             g_assert_not_reached();
2123         }
2124         ts->val_type = val;
2125     }
2126 
2127     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2128 }
2129 
2130 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2131                                  TCGTemp *ts)
2132 {
2133     int idx = temp_idx(ts);
2134 
2135     switch (ts->kind) {
2136     case TEMP_FIXED:
2137     case TEMP_GLOBAL:
2138         pstrcpy(buf, buf_size, ts->name);
2139         break;
2140     case TEMP_TB:
2141         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2142         break;
2143     case TEMP_EBB:
2144         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2145         break;
2146     case TEMP_CONST:
2147         switch (ts->type) {
2148         case TCG_TYPE_I32:
2149             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2150             break;
2151 #if TCG_TARGET_REG_BITS > 32
2152         case TCG_TYPE_I64:
2153             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2154             break;
2155 #endif
2156         case TCG_TYPE_V64:
2157         case TCG_TYPE_V128:
2158         case TCG_TYPE_V256:
2159             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2160                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2161             break;
2162         default:
2163             g_assert_not_reached();
2164         }
2165         break;
2166     }
2167     return buf;
2168 }
2169 
2170 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2171                              int buf_size, TCGArg arg)
2172 {
2173     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2174 }
2175 
2176 static const char * const cond_name[] =
2177 {
2178     [TCG_COND_NEVER] = "never",
2179     [TCG_COND_ALWAYS] = "always",
2180     [TCG_COND_EQ] = "eq",
2181     [TCG_COND_NE] = "ne",
2182     [TCG_COND_LT] = "lt",
2183     [TCG_COND_GE] = "ge",
2184     [TCG_COND_LE] = "le",
2185     [TCG_COND_GT] = "gt",
2186     [TCG_COND_LTU] = "ltu",
2187     [TCG_COND_GEU] = "geu",
2188     [TCG_COND_LEU] = "leu",
2189     [TCG_COND_GTU] = "gtu"
2190 };
2191 
2192 static const char * const ldst_name[] =
2193 {
2194     [MO_UB]   = "ub",
2195     [MO_SB]   = "sb",
2196     [MO_LEUW] = "leuw",
2197     [MO_LESW] = "lesw",
2198     [MO_LEUL] = "leul",
2199     [MO_LESL] = "lesl",
2200     [MO_LEUQ] = "leq",
2201     [MO_BEUW] = "beuw",
2202     [MO_BESW] = "besw",
2203     [MO_BEUL] = "beul",
2204     [MO_BESL] = "besl",
2205     [MO_BEUQ] = "beq",
2206 };
2207 
2208 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2209     [MO_UNALN >> MO_ASHIFT]    = "un+",
2210     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2211     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2212     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2213     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2214     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2215     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2216     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2217 };
2218 
2219 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2220     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2221     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2222     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2223     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2224     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2225     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2226 };
2227 
2228 static const char bswap_flag_name[][6] = {
2229     [TCG_BSWAP_IZ] = "iz",
2230     [TCG_BSWAP_OZ] = "oz",
2231     [TCG_BSWAP_OS] = "os",
2232     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2233     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2234 };
2235 
2236 static inline bool tcg_regset_single(TCGRegSet d)
2237 {
2238     return (d & (d - 1)) == 0;
2239 }
2240 
2241 static inline TCGReg tcg_regset_first(TCGRegSet d)
2242 {
2243     if (TCG_TARGET_NB_REGS <= 32) {
2244         return ctz32(d);
2245     } else {
2246         return ctz64(d);
2247     }
2248 }
2249 
2250 /* Return only the number of characters output -- no error return. */
2251 #define ne_fprintf(...) \
2252     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2253 
2254 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2255 {
2256     char buf[128];
2257     TCGOp *op;
2258 
2259     QTAILQ_FOREACH(op, &s->ops, link) {
2260         int i, k, nb_oargs, nb_iargs, nb_cargs;
2261         const TCGOpDef *def;
2262         TCGOpcode c;
2263         int col = 0;
2264 
2265         c = op->opc;
2266         def = &tcg_op_defs[c];
2267 
2268         if (c == INDEX_op_insn_start) {
2269             nb_oargs = 0;
2270             col += ne_fprintf(f, "\n ----");
2271 
2272             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2273                 target_ulong a;
2274 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2275                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2276 #else
2277                 a = op->args[i];
2278 #endif
2279                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
2280             }
2281         } else if (c == INDEX_op_call) {
2282             const TCGHelperInfo *info = tcg_call_info(op);
2283             void *func = tcg_call_func(op);
2284 
2285             /* variable number of arguments */
2286             nb_oargs = TCGOP_CALLO(op);
2287             nb_iargs = TCGOP_CALLI(op);
2288             nb_cargs = def->nb_cargs;
2289 
2290             col += ne_fprintf(f, " %s ", def->name);
2291 
2292             /*
2293              * Print the function name from TCGHelperInfo, if available.
2294              * Note that plugins have a template function for the info,
2295              * but the actual function pointer comes from the plugin.
2296              */
2297             if (func == info->func) {
2298                 col += ne_fprintf(f, "%s", info->name);
2299             } else {
2300                 col += ne_fprintf(f, "plugin(%p)", func);
2301             }
2302 
2303             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2304             for (i = 0; i < nb_oargs; i++) {
2305                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2306                                                             op->args[i]));
2307             }
2308             for (i = 0; i < nb_iargs; i++) {
2309                 TCGArg arg = op->args[nb_oargs + i];
2310                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2311                 col += ne_fprintf(f, ",%s", t);
2312             }
2313         } else {
2314             col += ne_fprintf(f, " %s ", def->name);
2315 
2316             nb_oargs = def->nb_oargs;
2317             nb_iargs = def->nb_iargs;
2318             nb_cargs = def->nb_cargs;
2319 
2320             if (def->flags & TCG_OPF_VECTOR) {
2321                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2322                                   8 << TCGOP_VECE(op));
2323             }
2324 
2325             k = 0;
2326             for (i = 0; i < nb_oargs; i++) {
2327                 const char *sep =  k ? "," : "";
2328                 col += ne_fprintf(f, "%s%s", sep,
2329                                   tcg_get_arg_str(s, buf, sizeof(buf),
2330                                                   op->args[k++]));
2331             }
2332             for (i = 0; i < nb_iargs; i++) {
2333                 const char *sep =  k ? "," : "";
2334                 col += ne_fprintf(f, "%s%s", sep,
2335                                   tcg_get_arg_str(s, buf, sizeof(buf),
2336                                                   op->args[k++]));
2337             }
2338             switch (c) {
2339             case INDEX_op_brcond_i32:
2340             case INDEX_op_setcond_i32:
2341             case INDEX_op_movcond_i32:
2342             case INDEX_op_brcond2_i32:
2343             case INDEX_op_setcond2_i32:
2344             case INDEX_op_brcond_i64:
2345             case INDEX_op_setcond_i64:
2346             case INDEX_op_movcond_i64:
2347             case INDEX_op_cmp_vec:
2348             case INDEX_op_cmpsel_vec:
2349                 if (op->args[k] < ARRAY_SIZE(cond_name)
2350                     && cond_name[op->args[k]]) {
2351                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2352                 } else {
2353                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2354                 }
2355                 i = 1;
2356                 break;
2357             case INDEX_op_qemu_ld_i32:
2358             case INDEX_op_qemu_st_i32:
2359             case INDEX_op_qemu_st8_i32:
2360             case INDEX_op_qemu_ld_i64:
2361             case INDEX_op_qemu_st_i64:
2362                 {
2363                     const char *s_al, *s_op, *s_at;
2364                     MemOpIdx oi = op->args[k++];
2365                     MemOp op = get_memop(oi);
2366                     unsigned ix = get_mmuidx(oi);
2367 
2368                     s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2369                     s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2370                     s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2371                     op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2372 
2373                     /* If all fields are accounted for, print symbolically. */
2374                     if (!op && s_al && s_op && s_at) {
2375                         col += ne_fprintf(f, ",%s%s%s,%u",
2376                                           s_at, s_al, s_op, ix);
2377                     } else {
2378                         op = get_memop(oi);
2379                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2380                     }
2381                     i = 1;
2382                 }
2383                 break;
2384             case INDEX_op_bswap16_i32:
2385             case INDEX_op_bswap16_i64:
2386             case INDEX_op_bswap32_i32:
2387             case INDEX_op_bswap32_i64:
2388             case INDEX_op_bswap64_i64:
2389                 {
2390                     TCGArg flags = op->args[k];
2391                     const char *name = NULL;
2392 
2393                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2394                         name = bswap_flag_name[flags];
2395                     }
2396                     if (name) {
2397                         col += ne_fprintf(f, ",%s", name);
2398                     } else {
2399                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2400                     }
2401                     i = k = 1;
2402                 }
2403                 break;
2404             default:
2405                 i = 0;
2406                 break;
2407             }
2408             switch (c) {
2409             case INDEX_op_set_label:
2410             case INDEX_op_br:
2411             case INDEX_op_brcond_i32:
2412             case INDEX_op_brcond_i64:
2413             case INDEX_op_brcond2_i32:
2414                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2415                                   arg_label(op->args[k])->id);
2416                 i++, k++;
2417                 break;
2418             case INDEX_op_mb:
2419                 {
2420                     TCGBar membar = op->args[k];
2421                     const char *b_op, *m_op;
2422 
2423                     switch (membar & TCG_BAR_SC) {
2424                     case 0:
2425                         b_op = "none";
2426                         break;
2427                     case TCG_BAR_LDAQ:
2428                         b_op = "acq";
2429                         break;
2430                     case TCG_BAR_STRL:
2431                         b_op = "rel";
2432                         break;
2433                     case TCG_BAR_SC:
2434                         b_op = "seq";
2435                         break;
2436                     default:
2437                         g_assert_not_reached();
2438                     }
2439 
2440                     switch (membar & TCG_MO_ALL) {
2441                     case 0:
2442                         m_op = "none";
2443                         break;
2444                     case TCG_MO_LD_LD:
2445                         m_op = "rr";
2446                         break;
2447                     case TCG_MO_LD_ST:
2448                         m_op = "rw";
2449                         break;
2450                     case TCG_MO_ST_LD:
2451                         m_op = "wr";
2452                         break;
2453                     case TCG_MO_ST_ST:
2454                         m_op = "ww";
2455                         break;
2456                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2457                         m_op = "rr+rw";
2458                         break;
2459                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2460                         m_op = "rr+wr";
2461                         break;
2462                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2463                         m_op = "rr+ww";
2464                         break;
2465                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2466                         m_op = "rw+wr";
2467                         break;
2468                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2469                         m_op = "rw+ww";
2470                         break;
2471                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2472                         m_op = "wr+ww";
2473                         break;
2474                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2475                         m_op = "rr+rw+wr";
2476                         break;
2477                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2478                         m_op = "rr+rw+ww";
2479                         break;
2480                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2481                         m_op = "rr+wr+ww";
2482                         break;
2483                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2484                         m_op = "rw+wr+ww";
2485                         break;
2486                     case TCG_MO_ALL:
2487                         m_op = "all";
2488                         break;
2489                     default:
2490                         g_assert_not_reached();
2491                     }
2492 
2493                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2494                     i++, k++;
2495                 }
2496                 break;
2497             default:
2498                 break;
2499             }
2500             for (; i < nb_cargs; i++, k++) {
2501                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2502                                   op->args[k]);
2503             }
2504         }
2505 
2506         if (have_prefs || op->life) {
2507             for (; col < 40; ++col) {
2508                 putc(' ', f);
2509             }
2510         }
2511 
2512         if (op->life) {
2513             unsigned life = op->life;
2514 
2515             if (life & (SYNC_ARG * 3)) {
2516                 ne_fprintf(f, "  sync:");
2517                 for (i = 0; i < 2; ++i) {
2518                     if (life & (SYNC_ARG << i)) {
2519                         ne_fprintf(f, " %d", i);
2520                     }
2521                 }
2522             }
2523             life /= DEAD_ARG;
2524             if (life) {
2525                 ne_fprintf(f, "  dead:");
2526                 for (i = 0; life; ++i, life >>= 1) {
2527                     if (life & 1) {
2528                         ne_fprintf(f, " %d", i);
2529                     }
2530                 }
2531             }
2532         }
2533 
2534         if (have_prefs) {
2535             for (i = 0; i < nb_oargs; ++i) {
2536                 TCGRegSet set = output_pref(op, i);
2537 
2538                 if (i == 0) {
2539                     ne_fprintf(f, "  pref=");
2540                 } else {
2541                     ne_fprintf(f, ",");
2542                 }
2543                 if (set == 0) {
2544                     ne_fprintf(f, "none");
2545                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2546                     ne_fprintf(f, "all");
2547 #ifdef CONFIG_DEBUG_TCG
2548                 } else if (tcg_regset_single(set)) {
2549                     TCGReg reg = tcg_regset_first(set);
2550                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2551 #endif
2552                 } else if (TCG_TARGET_NB_REGS <= 32) {
2553                     ne_fprintf(f, "0x%x", (uint32_t)set);
2554                 } else {
2555                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2556                 }
2557             }
2558         }
2559 
2560         putc('\n', f);
2561     }
2562 }
2563 
2564 /* we give more priority to constraints with less registers */
2565 static int get_constraint_priority(const TCGOpDef *def, int k)
2566 {
2567     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2568     int n = ctpop64(arg_ct->regs);
2569 
2570     /*
2571      * Sort constraints of a single register first, which includes output
2572      * aliases (which must exactly match the input already allocated).
2573      */
2574     if (n == 1 || arg_ct->oalias) {
2575         return INT_MAX;
2576     }
2577 
2578     /*
2579      * Sort register pairs next, first then second immediately after.
2580      * Arbitrarily sort multiple pairs by the index of the first reg;
2581      * there shouldn't be many pairs.
2582      */
2583     switch (arg_ct->pair) {
2584     case 1:
2585     case 3:
2586         return (k + 1) * 2;
2587     case 2:
2588         return (arg_ct->pair_index + 1) * 2 - 1;
2589     }
2590 
2591     /* Finally, sort by decreasing register count. */
2592     assert(n > 1);
2593     return -n;
2594 }
2595 
2596 /* sort from highest priority to lowest */
2597 static void sort_constraints(TCGOpDef *def, int start, int n)
2598 {
2599     int i, j;
2600     TCGArgConstraint *a = def->args_ct;
2601 
2602     for (i = 0; i < n; i++) {
2603         a[start + i].sort_index = start + i;
2604     }
2605     if (n <= 1) {
2606         return;
2607     }
2608     for (i = 0; i < n - 1; i++) {
2609         for (j = i + 1; j < n; j++) {
2610             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2611             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2612             if (p1 < p2) {
2613                 int tmp = a[start + i].sort_index;
2614                 a[start + i].sort_index = a[start + j].sort_index;
2615                 a[start + j].sort_index = tmp;
2616             }
2617         }
2618     }
2619 }
2620 
2621 static void process_op_defs(TCGContext *s)
2622 {
2623     TCGOpcode op;
2624 
2625     for (op = 0; op < NB_OPS; op++) {
2626         TCGOpDef *def = &tcg_op_defs[op];
2627         const TCGTargetOpDef *tdefs;
2628         bool saw_alias_pair = false;
2629         int i, o, i2, o2, nb_args;
2630 
2631         if (def->flags & TCG_OPF_NOT_PRESENT) {
2632             continue;
2633         }
2634 
2635         nb_args = def->nb_iargs + def->nb_oargs;
2636         if (nb_args == 0) {
2637             continue;
2638         }
2639 
2640         /*
2641          * Macro magic should make it impossible, but double-check that
2642          * the array index is in range.  Since the signness of an enum
2643          * is implementation defined, force the result to unsigned.
2644          */
2645         unsigned con_set = tcg_target_op_def(op);
2646         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2647         tdefs = &constraint_sets[con_set];
2648 
2649         for (i = 0; i < nb_args; i++) {
2650             const char *ct_str = tdefs->args_ct_str[i];
2651             bool input_p = i >= def->nb_oargs;
2652 
2653             /* Incomplete TCGTargetOpDef entry. */
2654             tcg_debug_assert(ct_str != NULL);
2655 
2656             switch (*ct_str) {
2657             case '0' ... '9':
2658                 o = *ct_str - '0';
2659                 tcg_debug_assert(input_p);
2660                 tcg_debug_assert(o < def->nb_oargs);
2661                 tcg_debug_assert(def->args_ct[o].regs != 0);
2662                 tcg_debug_assert(!def->args_ct[o].oalias);
2663                 def->args_ct[i] = def->args_ct[o];
2664                 /* The output sets oalias.  */
2665                 def->args_ct[o].oalias = 1;
2666                 def->args_ct[o].alias_index = i;
2667                 /* The input sets ialias. */
2668                 def->args_ct[i].ialias = 1;
2669                 def->args_ct[i].alias_index = o;
2670                 if (def->args_ct[i].pair) {
2671                     saw_alias_pair = true;
2672                 }
2673                 tcg_debug_assert(ct_str[1] == '\0');
2674                 continue;
2675 
2676             case '&':
2677                 tcg_debug_assert(!input_p);
2678                 def->args_ct[i].newreg = true;
2679                 ct_str++;
2680                 break;
2681 
2682             case 'p': /* plus */
2683                 /* Allocate to the register after the previous. */
2684                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2685                 o = i - 1;
2686                 tcg_debug_assert(!def->args_ct[o].pair);
2687                 tcg_debug_assert(!def->args_ct[o].ct);
2688                 def->args_ct[i] = (TCGArgConstraint){
2689                     .pair = 2,
2690                     .pair_index = o,
2691                     .regs = def->args_ct[o].regs << 1,
2692                 };
2693                 def->args_ct[o].pair = 1;
2694                 def->args_ct[o].pair_index = i;
2695                 tcg_debug_assert(ct_str[1] == '\0');
2696                 continue;
2697 
2698             case 'm': /* minus */
2699                 /* Allocate to the register before the previous. */
2700                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2701                 o = i - 1;
2702                 tcg_debug_assert(!def->args_ct[o].pair);
2703                 tcg_debug_assert(!def->args_ct[o].ct);
2704                 def->args_ct[i] = (TCGArgConstraint){
2705                     .pair = 1,
2706                     .pair_index = o,
2707                     .regs = def->args_ct[o].regs >> 1,
2708                 };
2709                 def->args_ct[o].pair = 2;
2710                 def->args_ct[o].pair_index = i;
2711                 tcg_debug_assert(ct_str[1] == '\0');
2712                 continue;
2713             }
2714 
2715             do {
2716                 switch (*ct_str) {
2717                 case 'i':
2718                     def->args_ct[i].ct |= TCG_CT_CONST;
2719                     break;
2720 
2721                 /* Include all of the target-specific constraints. */
2722 
2723 #undef CONST
2724 #define CONST(CASE, MASK) \
2725     case CASE: def->args_ct[i].ct |= MASK; break;
2726 #define REGS(CASE, MASK) \
2727     case CASE: def->args_ct[i].regs |= MASK; break;
2728 
2729 #include "tcg-target-con-str.h"
2730 
2731 #undef REGS
2732 #undef CONST
2733                 default:
2734                 case '0' ... '9':
2735                 case '&':
2736                 case 'p':
2737                 case 'm':
2738                     /* Typo in TCGTargetOpDef constraint. */
2739                     g_assert_not_reached();
2740                 }
2741             } while (*++ct_str != '\0');
2742         }
2743 
2744         /* TCGTargetOpDef entry with too much information? */
2745         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2746 
2747         /*
2748          * Fix up output pairs that are aliased with inputs.
2749          * When we created the alias, we copied pair from the output.
2750          * There are three cases:
2751          *    (1a) Pairs of inputs alias pairs of outputs.
2752          *    (1b) One input aliases the first of a pair of outputs.
2753          *    (2)  One input aliases the second of a pair of outputs.
2754          *
2755          * Case 1a is handled by making sure that the pair_index'es are
2756          * properly updated so that they appear the same as a pair of inputs.
2757          *
2758          * Case 1b is handled by setting the pair_index of the input to
2759          * itself, simply so it doesn't point to an unrelated argument.
2760          * Since we don't encounter the "second" during the input allocation
2761          * phase, nothing happens with the second half of the input pair.
2762          *
2763          * Case 2 is handled by setting the second input to pair=3, the
2764          * first output to pair=3, and the pair_index'es to match.
2765          */
2766         if (saw_alias_pair) {
2767             for (i = def->nb_oargs; i < nb_args; i++) {
2768                 /*
2769                  * Since [0-9pm] must be alone in the constraint string,
2770                  * the only way they can both be set is if the pair comes
2771                  * from the output alias.
2772                  */
2773                 if (!def->args_ct[i].ialias) {
2774                     continue;
2775                 }
2776                 switch (def->args_ct[i].pair) {
2777                 case 0:
2778                     break;
2779                 case 1:
2780                     o = def->args_ct[i].alias_index;
2781                     o2 = def->args_ct[o].pair_index;
2782                     tcg_debug_assert(def->args_ct[o].pair == 1);
2783                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2784                     if (def->args_ct[o2].oalias) {
2785                         /* Case 1a */
2786                         i2 = def->args_ct[o2].alias_index;
2787                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2788                         def->args_ct[i2].pair_index = i;
2789                         def->args_ct[i].pair_index = i2;
2790                     } else {
2791                         /* Case 1b */
2792                         def->args_ct[i].pair_index = i;
2793                     }
2794                     break;
2795                 case 2:
2796                     o = def->args_ct[i].alias_index;
2797                     o2 = def->args_ct[o].pair_index;
2798                     tcg_debug_assert(def->args_ct[o].pair == 2);
2799                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2800                     if (def->args_ct[o2].oalias) {
2801                         /* Case 1a */
2802                         i2 = def->args_ct[o2].alias_index;
2803                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2804                         def->args_ct[i2].pair_index = i;
2805                         def->args_ct[i].pair_index = i2;
2806                     } else {
2807                         /* Case 2 */
2808                         def->args_ct[i].pair = 3;
2809                         def->args_ct[o2].pair = 3;
2810                         def->args_ct[i].pair_index = o2;
2811                         def->args_ct[o2].pair_index = i;
2812                     }
2813                     break;
2814                 default:
2815                     g_assert_not_reached();
2816                 }
2817             }
2818         }
2819 
2820         /* sort the constraints (XXX: this is just an heuristic) */
2821         sort_constraints(def, 0, def->nb_oargs);
2822         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2823     }
2824 }
2825 
2826 static void remove_label_use(TCGOp *op, int idx)
2827 {
2828     TCGLabel *label = arg_label(op->args[idx]);
2829     TCGLabelUse *use;
2830 
2831     QSIMPLEQ_FOREACH(use, &label->branches, next) {
2832         if (use->op == op) {
2833             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
2834             return;
2835         }
2836     }
2837     g_assert_not_reached();
2838 }
2839 
2840 void tcg_op_remove(TCGContext *s, TCGOp *op)
2841 {
2842     switch (op->opc) {
2843     case INDEX_op_br:
2844         remove_label_use(op, 0);
2845         break;
2846     case INDEX_op_brcond_i32:
2847     case INDEX_op_brcond_i64:
2848         remove_label_use(op, 3);
2849         break;
2850     case INDEX_op_brcond2_i32:
2851         remove_label_use(op, 5);
2852         break;
2853     default:
2854         break;
2855     }
2856 
2857     QTAILQ_REMOVE(&s->ops, op, link);
2858     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2859     s->nb_ops--;
2860 
2861 #ifdef CONFIG_PROFILER
2862     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2863 #endif
2864 }
2865 
2866 void tcg_remove_ops_after(TCGOp *op)
2867 {
2868     TCGContext *s = tcg_ctx;
2869 
2870     while (true) {
2871         TCGOp *last = tcg_last_op();
2872         if (last == op) {
2873             return;
2874         }
2875         tcg_op_remove(s, last);
2876     }
2877 }
2878 
2879 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
2880 {
2881     TCGContext *s = tcg_ctx;
2882     TCGOp *op = NULL;
2883 
2884     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
2885         QTAILQ_FOREACH(op, &s->free_ops, link) {
2886             if (nargs <= op->nargs) {
2887                 QTAILQ_REMOVE(&s->free_ops, op, link);
2888                 nargs = op->nargs;
2889                 goto found;
2890             }
2891         }
2892     }
2893 
2894     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
2895     nargs = MAX(4, nargs);
2896     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
2897 
2898  found:
2899     memset(op, 0, offsetof(TCGOp, link));
2900     op->opc = opc;
2901     op->nargs = nargs;
2902 
2903     /* Check for bitfield overflow. */
2904     tcg_debug_assert(op->nargs == nargs);
2905 
2906     s->nb_ops++;
2907     return op;
2908 }
2909 
2910 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
2911 {
2912     TCGOp *op = tcg_op_alloc(opc, nargs);
2913     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2914     return op;
2915 }
2916 
2917 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
2918                             TCGOpcode opc, unsigned nargs)
2919 {
2920     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2921     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2922     return new_op;
2923 }
2924 
2925 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
2926                            TCGOpcode opc, unsigned nargs)
2927 {
2928     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2929     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2930     return new_op;
2931 }
2932 
2933 static void move_label_uses(TCGLabel *to, TCGLabel *from)
2934 {
2935     TCGLabelUse *u;
2936 
2937     QSIMPLEQ_FOREACH(u, &from->branches, next) {
2938         TCGOp *op = u->op;
2939         switch (op->opc) {
2940         case INDEX_op_br:
2941             op->args[0] = label_arg(to);
2942             break;
2943         case INDEX_op_brcond_i32:
2944         case INDEX_op_brcond_i64:
2945             op->args[3] = label_arg(to);
2946             break;
2947         case INDEX_op_brcond2_i32:
2948             op->args[5] = label_arg(to);
2949             break;
2950         default:
2951             g_assert_not_reached();
2952         }
2953     }
2954 
2955     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
2956 }
2957 
2958 /* Reachable analysis : remove unreachable code.  */
2959 static void __attribute__((noinline))
2960 reachable_code_pass(TCGContext *s)
2961 {
2962     TCGOp *op, *op_next, *op_prev;
2963     bool dead = false;
2964 
2965     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2966         bool remove = dead;
2967         TCGLabel *label;
2968 
2969         switch (op->opc) {
2970         case INDEX_op_set_label:
2971             label = arg_label(op->args[0]);
2972 
2973             /*
2974              * Note that the first op in the TB is always a load,
2975              * so there is always something before a label.
2976              */
2977             op_prev = QTAILQ_PREV(op, link);
2978 
2979             /*
2980              * If we find two sequential labels, move all branches to
2981              * reference the second label and remove the first label.
2982              * Do this before branch to next optimization, so that the
2983              * middle label is out of the way.
2984              */
2985             if (op_prev->opc == INDEX_op_set_label) {
2986                 move_label_uses(label, arg_label(op_prev->args[0]));
2987                 tcg_op_remove(s, op_prev);
2988                 op_prev = QTAILQ_PREV(op, link);
2989             }
2990 
2991             /*
2992              * Optimization can fold conditional branches to unconditional.
2993              * If we find a label which is preceded by an unconditional
2994              * branch to next, remove the branch.  We couldn't do this when
2995              * processing the branch because any dead code between the branch
2996              * and label had not yet been removed.
2997              */
2998             if (op_prev->opc == INDEX_op_br &&
2999                 label == arg_label(op_prev->args[0])) {
3000                 tcg_op_remove(s, op_prev);
3001                 /* Fall through means insns become live again.  */
3002                 dead = false;
3003             }
3004 
3005             if (QSIMPLEQ_EMPTY(&label->branches)) {
3006                 /*
3007                  * While there is an occasional backward branch, virtually
3008                  * all branches generated by the translators are forward.
3009                  * Which means that generally we will have already removed
3010                  * all references to the label that will be, and there is
3011                  * little to be gained by iterating.
3012                  */
3013                 remove = true;
3014             } else {
3015                 /* Once we see a label, insns become live again.  */
3016                 dead = false;
3017                 remove = false;
3018             }
3019             break;
3020 
3021         case INDEX_op_br:
3022         case INDEX_op_exit_tb:
3023         case INDEX_op_goto_ptr:
3024             /* Unconditional branches; everything following is dead.  */
3025             dead = true;
3026             break;
3027 
3028         case INDEX_op_call:
3029             /* Notice noreturn helper calls, raising exceptions.  */
3030             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3031                 dead = true;
3032             }
3033             break;
3034 
3035         case INDEX_op_insn_start:
3036             /* Never remove -- we need to keep these for unwind.  */
3037             remove = false;
3038             break;
3039 
3040         default:
3041             break;
3042         }
3043 
3044         if (remove) {
3045             tcg_op_remove(s, op);
3046         }
3047     }
3048 }
3049 
3050 #define TS_DEAD  1
3051 #define TS_MEM   2
3052 
3053 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3054 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3055 
3056 /* For liveness_pass_1, the register preferences for a given temp.  */
3057 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3058 {
3059     return ts->state_ptr;
3060 }
3061 
3062 /* For liveness_pass_1, reset the preferences for a given temp to the
3063  * maximal regset for its type.
3064  */
3065 static inline void la_reset_pref(TCGTemp *ts)
3066 {
3067     *la_temp_pref(ts)
3068         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3069 }
3070 
3071 /* liveness analysis: end of function: all temps are dead, and globals
3072    should be in memory. */
3073 static void la_func_end(TCGContext *s, int ng, int nt)
3074 {
3075     int i;
3076 
3077     for (i = 0; i < ng; ++i) {
3078         s->temps[i].state = TS_DEAD | TS_MEM;
3079         la_reset_pref(&s->temps[i]);
3080     }
3081     for (i = ng; i < nt; ++i) {
3082         s->temps[i].state = TS_DEAD;
3083         la_reset_pref(&s->temps[i]);
3084     }
3085 }
3086 
3087 /* liveness analysis: end of basic block: all temps are dead, globals
3088    and local temps should be in memory. */
3089 static void la_bb_end(TCGContext *s, int ng, int nt)
3090 {
3091     int i;
3092 
3093     for (i = 0; i < nt; ++i) {
3094         TCGTemp *ts = &s->temps[i];
3095         int state;
3096 
3097         switch (ts->kind) {
3098         case TEMP_FIXED:
3099         case TEMP_GLOBAL:
3100         case TEMP_TB:
3101             state = TS_DEAD | TS_MEM;
3102             break;
3103         case TEMP_EBB:
3104         case TEMP_CONST:
3105             state = TS_DEAD;
3106             break;
3107         default:
3108             g_assert_not_reached();
3109         }
3110         ts->state = state;
3111         la_reset_pref(ts);
3112     }
3113 }
3114 
3115 /* liveness analysis: sync globals back to memory.  */
3116 static void la_global_sync(TCGContext *s, int ng)
3117 {
3118     int i;
3119 
3120     for (i = 0; i < ng; ++i) {
3121         int state = s->temps[i].state;
3122         s->temps[i].state = state | TS_MEM;
3123         if (state == TS_DEAD) {
3124             /* If the global was previously dead, reset prefs.  */
3125             la_reset_pref(&s->temps[i]);
3126         }
3127     }
3128 }
3129 
3130 /*
3131  * liveness analysis: conditional branch: all temps are dead unless
3132  * explicitly live-across-conditional-branch, globals and local temps
3133  * should be synced.
3134  */
3135 static void la_bb_sync(TCGContext *s, int ng, int nt)
3136 {
3137     la_global_sync(s, ng);
3138 
3139     for (int i = ng; i < nt; ++i) {
3140         TCGTemp *ts = &s->temps[i];
3141         int state;
3142 
3143         switch (ts->kind) {
3144         case TEMP_TB:
3145             state = ts->state;
3146             ts->state = state | TS_MEM;
3147             if (state != TS_DEAD) {
3148                 continue;
3149             }
3150             break;
3151         case TEMP_EBB:
3152         case TEMP_CONST:
3153             continue;
3154         default:
3155             g_assert_not_reached();
3156         }
3157         la_reset_pref(&s->temps[i]);
3158     }
3159 }
3160 
3161 /* liveness analysis: sync globals back to memory and kill.  */
3162 static void la_global_kill(TCGContext *s, int ng)
3163 {
3164     int i;
3165 
3166     for (i = 0; i < ng; i++) {
3167         s->temps[i].state = TS_DEAD | TS_MEM;
3168         la_reset_pref(&s->temps[i]);
3169     }
3170 }
3171 
3172 /* liveness analysis: note live globals crossing calls.  */
3173 static void la_cross_call(TCGContext *s, int nt)
3174 {
3175     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3176     int i;
3177 
3178     for (i = 0; i < nt; i++) {
3179         TCGTemp *ts = &s->temps[i];
3180         if (!(ts->state & TS_DEAD)) {
3181             TCGRegSet *pset = la_temp_pref(ts);
3182             TCGRegSet set = *pset;
3183 
3184             set &= mask;
3185             /* If the combination is not possible, restart.  */
3186             if (set == 0) {
3187                 set = tcg_target_available_regs[ts->type] & mask;
3188             }
3189             *pset = set;
3190         }
3191     }
3192 }
3193 
3194 /*
3195  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3196  * to TEMP_EBB, if possible.
3197  */
3198 static void __attribute__((noinline))
3199 liveness_pass_0(TCGContext *s)
3200 {
3201     void * const multiple_ebb = (void *)(uintptr_t)-1;
3202     int nb_temps = s->nb_temps;
3203     TCGOp *op, *ebb;
3204 
3205     for (int i = s->nb_globals; i < nb_temps; ++i) {
3206         s->temps[i].state_ptr = NULL;
3207     }
3208 
3209     /*
3210      * Represent each EBB by the op at which it begins.  In the case of
3211      * the first EBB, this is the first op, otherwise it is a label.
3212      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3213      * within a single EBB, else MULTIPLE_EBB.
3214      */
3215     ebb = QTAILQ_FIRST(&s->ops);
3216     QTAILQ_FOREACH(op, &s->ops, link) {
3217         const TCGOpDef *def;
3218         int nb_oargs, nb_iargs;
3219 
3220         switch (op->opc) {
3221         case INDEX_op_set_label:
3222             ebb = op;
3223             continue;
3224         case INDEX_op_discard:
3225             continue;
3226         case INDEX_op_call:
3227             nb_oargs = TCGOP_CALLO(op);
3228             nb_iargs = TCGOP_CALLI(op);
3229             break;
3230         default:
3231             def = &tcg_op_defs[op->opc];
3232             nb_oargs = def->nb_oargs;
3233             nb_iargs = def->nb_iargs;
3234             break;
3235         }
3236 
3237         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3238             TCGTemp *ts = arg_temp(op->args[i]);
3239 
3240             if (ts->kind != TEMP_TB) {
3241                 continue;
3242             }
3243             if (ts->state_ptr == NULL) {
3244                 ts->state_ptr = ebb;
3245             } else if (ts->state_ptr != ebb) {
3246                 ts->state_ptr = multiple_ebb;
3247             }
3248         }
3249     }
3250 
3251     /*
3252      * For TEMP_TB that turned out not to be used beyond one EBB,
3253      * reduce the liveness to TEMP_EBB.
3254      */
3255     for (int i = s->nb_globals; i < nb_temps; ++i) {
3256         TCGTemp *ts = &s->temps[i];
3257         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3258             ts->kind = TEMP_EBB;
3259         }
3260     }
3261 }
3262 
3263 /* Liveness analysis : update the opc_arg_life array to tell if a
3264    given input arguments is dead. Instructions updating dead
3265    temporaries are removed. */
3266 static void __attribute__((noinline))
3267 liveness_pass_1(TCGContext *s)
3268 {
3269     int nb_globals = s->nb_globals;
3270     int nb_temps = s->nb_temps;
3271     TCGOp *op, *op_prev;
3272     TCGRegSet *prefs;
3273     int i;
3274 
3275     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3276     for (i = 0; i < nb_temps; ++i) {
3277         s->temps[i].state_ptr = prefs + i;
3278     }
3279 
3280     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3281     la_func_end(s, nb_globals, nb_temps);
3282 
3283     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3284         int nb_iargs, nb_oargs;
3285         TCGOpcode opc_new, opc_new2;
3286         bool have_opc_new2;
3287         TCGLifeData arg_life = 0;
3288         TCGTemp *ts;
3289         TCGOpcode opc = op->opc;
3290         const TCGOpDef *def = &tcg_op_defs[opc];
3291 
3292         switch (opc) {
3293         case INDEX_op_call:
3294             {
3295                 const TCGHelperInfo *info = tcg_call_info(op);
3296                 int call_flags = tcg_call_flags(op);
3297 
3298                 nb_oargs = TCGOP_CALLO(op);
3299                 nb_iargs = TCGOP_CALLI(op);
3300 
3301                 /* pure functions can be removed if their result is unused */
3302                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3303                     for (i = 0; i < nb_oargs; i++) {
3304                         ts = arg_temp(op->args[i]);
3305                         if (ts->state != TS_DEAD) {
3306                             goto do_not_remove_call;
3307                         }
3308                     }
3309                     goto do_remove;
3310                 }
3311             do_not_remove_call:
3312 
3313                 /* Output args are dead.  */
3314                 for (i = 0; i < nb_oargs; i++) {
3315                     ts = arg_temp(op->args[i]);
3316                     if (ts->state & TS_DEAD) {
3317                         arg_life |= DEAD_ARG << i;
3318                     }
3319                     if (ts->state & TS_MEM) {
3320                         arg_life |= SYNC_ARG << i;
3321                     }
3322                     ts->state = TS_DEAD;
3323                     la_reset_pref(ts);
3324                 }
3325 
3326                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3327                 memset(op->output_pref, 0, sizeof(op->output_pref));
3328 
3329                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3330                                     TCG_CALL_NO_READ_GLOBALS))) {
3331                     la_global_kill(s, nb_globals);
3332                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3333                     la_global_sync(s, nb_globals);
3334                 }
3335 
3336                 /* Record arguments that die in this helper.  */
3337                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3338                     ts = arg_temp(op->args[i]);
3339                     if (ts->state & TS_DEAD) {
3340                         arg_life |= DEAD_ARG << i;
3341                     }
3342                 }
3343 
3344                 /* For all live registers, remove call-clobbered prefs.  */
3345                 la_cross_call(s, nb_temps);
3346 
3347                 /*
3348                  * Input arguments are live for preceding opcodes.
3349                  *
3350                  * For those arguments that die, and will be allocated in
3351                  * registers, clear the register set for that arg, to be
3352                  * filled in below.  For args that will be on the stack,
3353                  * reset to any available reg.  Process arguments in reverse
3354                  * order so that if a temp is used more than once, the stack
3355                  * reset to max happens before the register reset to 0.
3356                  */
3357                 for (i = nb_iargs - 1; i >= 0; i--) {
3358                     const TCGCallArgumentLoc *loc = &info->in[i];
3359                     ts = arg_temp(op->args[nb_oargs + i]);
3360 
3361                     if (ts->state & TS_DEAD) {
3362                         switch (loc->kind) {
3363                         case TCG_CALL_ARG_NORMAL:
3364                         case TCG_CALL_ARG_EXTEND_U:
3365                         case TCG_CALL_ARG_EXTEND_S:
3366                             if (arg_slot_reg_p(loc->arg_slot)) {
3367                                 *la_temp_pref(ts) = 0;
3368                                 break;
3369                             }
3370                             /* fall through */
3371                         default:
3372                             *la_temp_pref(ts) =
3373                                 tcg_target_available_regs[ts->type];
3374                             break;
3375                         }
3376                         ts->state &= ~TS_DEAD;
3377                     }
3378                 }
3379 
3380                 /*
3381                  * For each input argument, add its input register to prefs.
3382                  * If a temp is used once, this produces a single set bit;
3383                  * if a temp is used multiple times, this produces a set.
3384                  */
3385                 for (i = 0; i < nb_iargs; i++) {
3386                     const TCGCallArgumentLoc *loc = &info->in[i];
3387                     ts = arg_temp(op->args[nb_oargs + i]);
3388 
3389                     switch (loc->kind) {
3390                     case TCG_CALL_ARG_NORMAL:
3391                     case TCG_CALL_ARG_EXTEND_U:
3392                     case TCG_CALL_ARG_EXTEND_S:
3393                         if (arg_slot_reg_p(loc->arg_slot)) {
3394                             tcg_regset_set_reg(*la_temp_pref(ts),
3395                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3396                         }
3397                         break;
3398                     default:
3399                         break;
3400                     }
3401                 }
3402             }
3403             break;
3404         case INDEX_op_insn_start:
3405             break;
3406         case INDEX_op_discard:
3407             /* mark the temporary as dead */
3408             ts = arg_temp(op->args[0]);
3409             ts->state = TS_DEAD;
3410             la_reset_pref(ts);
3411             break;
3412 
3413         case INDEX_op_add2_i32:
3414             opc_new = INDEX_op_add_i32;
3415             goto do_addsub2;
3416         case INDEX_op_sub2_i32:
3417             opc_new = INDEX_op_sub_i32;
3418             goto do_addsub2;
3419         case INDEX_op_add2_i64:
3420             opc_new = INDEX_op_add_i64;
3421             goto do_addsub2;
3422         case INDEX_op_sub2_i64:
3423             opc_new = INDEX_op_sub_i64;
3424         do_addsub2:
3425             nb_iargs = 4;
3426             nb_oargs = 2;
3427             /* Test if the high part of the operation is dead, but not
3428                the low part.  The result can be optimized to a simple
3429                add or sub.  This happens often for x86_64 guest when the
3430                cpu mode is set to 32 bit.  */
3431             if (arg_temp(op->args[1])->state == TS_DEAD) {
3432                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3433                     goto do_remove;
3434                 }
3435                 /* Replace the opcode and adjust the args in place,
3436                    leaving 3 unused args at the end.  */
3437                 op->opc = opc = opc_new;
3438                 op->args[1] = op->args[2];
3439                 op->args[2] = op->args[4];
3440                 /* Fall through and mark the single-word operation live.  */
3441                 nb_iargs = 2;
3442                 nb_oargs = 1;
3443             }
3444             goto do_not_remove;
3445 
3446         case INDEX_op_mulu2_i32:
3447             opc_new = INDEX_op_mul_i32;
3448             opc_new2 = INDEX_op_muluh_i32;
3449             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3450             goto do_mul2;
3451         case INDEX_op_muls2_i32:
3452             opc_new = INDEX_op_mul_i32;
3453             opc_new2 = INDEX_op_mulsh_i32;
3454             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3455             goto do_mul2;
3456         case INDEX_op_mulu2_i64:
3457             opc_new = INDEX_op_mul_i64;
3458             opc_new2 = INDEX_op_muluh_i64;
3459             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3460             goto do_mul2;
3461         case INDEX_op_muls2_i64:
3462             opc_new = INDEX_op_mul_i64;
3463             opc_new2 = INDEX_op_mulsh_i64;
3464             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3465             goto do_mul2;
3466         do_mul2:
3467             nb_iargs = 2;
3468             nb_oargs = 2;
3469             if (arg_temp(op->args[1])->state == TS_DEAD) {
3470                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3471                     /* Both parts of the operation are dead.  */
3472                     goto do_remove;
3473                 }
3474                 /* The high part of the operation is dead; generate the low. */
3475                 op->opc = opc = opc_new;
3476                 op->args[1] = op->args[2];
3477                 op->args[2] = op->args[3];
3478             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3479                 /* The low part of the operation is dead; generate the high. */
3480                 op->opc = opc = opc_new2;
3481                 op->args[0] = op->args[1];
3482                 op->args[1] = op->args[2];
3483                 op->args[2] = op->args[3];
3484             } else {
3485                 goto do_not_remove;
3486             }
3487             /* Mark the single-word operation live.  */
3488             nb_oargs = 1;
3489             goto do_not_remove;
3490 
3491         default:
3492             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3493             nb_iargs = def->nb_iargs;
3494             nb_oargs = def->nb_oargs;
3495 
3496             /* Test if the operation can be removed because all
3497                its outputs are dead. We assume that nb_oargs == 0
3498                implies side effects */
3499             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3500                 for (i = 0; i < nb_oargs; i++) {
3501                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3502                         goto do_not_remove;
3503                     }
3504                 }
3505                 goto do_remove;
3506             }
3507             goto do_not_remove;
3508 
3509         do_remove:
3510             tcg_op_remove(s, op);
3511             break;
3512 
3513         do_not_remove:
3514             for (i = 0; i < nb_oargs; i++) {
3515                 ts = arg_temp(op->args[i]);
3516 
3517                 /* Remember the preference of the uses that followed.  */
3518                 if (i < ARRAY_SIZE(op->output_pref)) {
3519                     op->output_pref[i] = *la_temp_pref(ts);
3520                 }
3521 
3522                 /* Output args are dead.  */
3523                 if (ts->state & TS_DEAD) {
3524                     arg_life |= DEAD_ARG << i;
3525                 }
3526                 if (ts->state & TS_MEM) {
3527                     arg_life |= SYNC_ARG << i;
3528                 }
3529                 ts->state = TS_DEAD;
3530                 la_reset_pref(ts);
3531             }
3532 
3533             /* If end of basic block, update.  */
3534             if (def->flags & TCG_OPF_BB_EXIT) {
3535                 la_func_end(s, nb_globals, nb_temps);
3536             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3537                 la_bb_sync(s, nb_globals, nb_temps);
3538             } else if (def->flags & TCG_OPF_BB_END) {
3539                 la_bb_end(s, nb_globals, nb_temps);
3540             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3541                 la_global_sync(s, nb_globals);
3542                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3543                     la_cross_call(s, nb_temps);
3544                 }
3545             }
3546 
3547             /* Record arguments that die in this opcode.  */
3548             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3549                 ts = arg_temp(op->args[i]);
3550                 if (ts->state & TS_DEAD) {
3551                     arg_life |= DEAD_ARG << i;
3552                 }
3553             }
3554 
3555             /* Input arguments are live for preceding opcodes.  */
3556             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3557                 ts = arg_temp(op->args[i]);
3558                 if (ts->state & TS_DEAD) {
3559                     /* For operands that were dead, initially allow
3560                        all regs for the type.  */
3561                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3562                     ts->state &= ~TS_DEAD;
3563                 }
3564             }
3565 
3566             /* Incorporate constraints for this operand.  */
3567             switch (opc) {
3568             case INDEX_op_mov_i32:
3569             case INDEX_op_mov_i64:
3570                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3571                    have proper constraints.  That said, special case
3572                    moves to propagate preferences backward.  */
3573                 if (IS_DEAD_ARG(1)) {
3574                     *la_temp_pref(arg_temp(op->args[0]))
3575                         = *la_temp_pref(arg_temp(op->args[1]));
3576                 }
3577                 break;
3578 
3579             default:
3580                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3581                     const TCGArgConstraint *ct = &def->args_ct[i];
3582                     TCGRegSet set, *pset;
3583 
3584                     ts = arg_temp(op->args[i]);
3585                     pset = la_temp_pref(ts);
3586                     set = *pset;
3587 
3588                     set &= ct->regs;
3589                     if (ct->ialias) {
3590                         set &= output_pref(op, ct->alias_index);
3591                     }
3592                     /* If the combination is not possible, restart.  */
3593                     if (set == 0) {
3594                         set = ct->regs;
3595                     }
3596                     *pset = set;
3597                 }
3598                 break;
3599             }
3600             break;
3601         }
3602         op->life = arg_life;
3603     }
3604 }
3605 
3606 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3607 static bool __attribute__((noinline))
3608 liveness_pass_2(TCGContext *s)
3609 {
3610     int nb_globals = s->nb_globals;
3611     int nb_temps, i;
3612     bool changes = false;
3613     TCGOp *op, *op_next;
3614 
3615     /* Create a temporary for each indirect global.  */
3616     for (i = 0; i < nb_globals; ++i) {
3617         TCGTemp *its = &s->temps[i];
3618         if (its->indirect_reg) {
3619             TCGTemp *dts = tcg_temp_alloc(s);
3620             dts->type = its->type;
3621             dts->base_type = its->base_type;
3622             dts->temp_subindex = its->temp_subindex;
3623             dts->kind = TEMP_EBB;
3624             its->state_ptr = dts;
3625         } else {
3626             its->state_ptr = NULL;
3627         }
3628         /* All globals begin dead.  */
3629         its->state = TS_DEAD;
3630     }
3631     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3632         TCGTemp *its = &s->temps[i];
3633         its->state_ptr = NULL;
3634         its->state = TS_DEAD;
3635     }
3636 
3637     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3638         TCGOpcode opc = op->opc;
3639         const TCGOpDef *def = &tcg_op_defs[opc];
3640         TCGLifeData arg_life = op->life;
3641         int nb_iargs, nb_oargs, call_flags;
3642         TCGTemp *arg_ts, *dir_ts;
3643 
3644         if (opc == INDEX_op_call) {
3645             nb_oargs = TCGOP_CALLO(op);
3646             nb_iargs = TCGOP_CALLI(op);
3647             call_flags = tcg_call_flags(op);
3648         } else {
3649             nb_iargs = def->nb_iargs;
3650             nb_oargs = def->nb_oargs;
3651 
3652             /* Set flags similar to how calls require.  */
3653             if (def->flags & TCG_OPF_COND_BRANCH) {
3654                 /* Like reading globals: sync_globals */
3655                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3656             } else if (def->flags & TCG_OPF_BB_END) {
3657                 /* Like writing globals: save_globals */
3658                 call_flags = 0;
3659             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3660                 /* Like reading globals: sync_globals */
3661                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3662             } else {
3663                 /* No effect on globals.  */
3664                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3665                               TCG_CALL_NO_WRITE_GLOBALS);
3666             }
3667         }
3668 
3669         /* Make sure that input arguments are available.  */
3670         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3671             arg_ts = arg_temp(op->args[i]);
3672             dir_ts = arg_ts->state_ptr;
3673             if (dir_ts && arg_ts->state == TS_DEAD) {
3674                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3675                                   ? INDEX_op_ld_i32
3676                                   : INDEX_op_ld_i64);
3677                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3678 
3679                 lop->args[0] = temp_arg(dir_ts);
3680                 lop->args[1] = temp_arg(arg_ts->mem_base);
3681                 lop->args[2] = arg_ts->mem_offset;
3682 
3683                 /* Loaded, but synced with memory.  */
3684                 arg_ts->state = TS_MEM;
3685             }
3686         }
3687 
3688         /* Perform input replacement, and mark inputs that became dead.
3689            No action is required except keeping temp_state up to date
3690            so that we reload when needed.  */
3691         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3692             arg_ts = arg_temp(op->args[i]);
3693             dir_ts = arg_ts->state_ptr;
3694             if (dir_ts) {
3695                 op->args[i] = temp_arg(dir_ts);
3696                 changes = true;
3697                 if (IS_DEAD_ARG(i)) {
3698                     arg_ts->state = TS_DEAD;
3699                 }
3700             }
3701         }
3702 
3703         /* Liveness analysis should ensure that the following are
3704            all correct, for call sites and basic block end points.  */
3705         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3706             /* Nothing to do */
3707         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3708             for (i = 0; i < nb_globals; ++i) {
3709                 /* Liveness should see that globals are synced back,
3710                    that is, either TS_DEAD or TS_MEM.  */
3711                 arg_ts = &s->temps[i];
3712                 tcg_debug_assert(arg_ts->state_ptr == 0
3713                                  || arg_ts->state != 0);
3714             }
3715         } else {
3716             for (i = 0; i < nb_globals; ++i) {
3717                 /* Liveness should see that globals are saved back,
3718                    that is, TS_DEAD, waiting to be reloaded.  */
3719                 arg_ts = &s->temps[i];
3720                 tcg_debug_assert(arg_ts->state_ptr == 0
3721                                  || arg_ts->state == TS_DEAD);
3722             }
3723         }
3724 
3725         /* Outputs become available.  */
3726         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3727             arg_ts = arg_temp(op->args[0]);
3728             dir_ts = arg_ts->state_ptr;
3729             if (dir_ts) {
3730                 op->args[0] = temp_arg(dir_ts);
3731                 changes = true;
3732 
3733                 /* The output is now live and modified.  */
3734                 arg_ts->state = 0;
3735 
3736                 if (NEED_SYNC_ARG(0)) {
3737                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3738                                       ? INDEX_op_st_i32
3739                                       : INDEX_op_st_i64);
3740                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3741                     TCGTemp *out_ts = dir_ts;
3742 
3743                     if (IS_DEAD_ARG(0)) {
3744                         out_ts = arg_temp(op->args[1]);
3745                         arg_ts->state = TS_DEAD;
3746                         tcg_op_remove(s, op);
3747                     } else {
3748                         arg_ts->state = TS_MEM;
3749                     }
3750 
3751                     sop->args[0] = temp_arg(out_ts);
3752                     sop->args[1] = temp_arg(arg_ts->mem_base);
3753                     sop->args[2] = arg_ts->mem_offset;
3754                 } else {
3755                     tcg_debug_assert(!IS_DEAD_ARG(0));
3756                 }
3757             }
3758         } else {
3759             for (i = 0; i < nb_oargs; i++) {
3760                 arg_ts = arg_temp(op->args[i]);
3761                 dir_ts = arg_ts->state_ptr;
3762                 if (!dir_ts) {
3763                     continue;
3764                 }
3765                 op->args[i] = temp_arg(dir_ts);
3766                 changes = true;
3767 
3768                 /* The output is now live and modified.  */
3769                 arg_ts->state = 0;
3770 
3771                 /* Sync outputs upon their last write.  */
3772                 if (NEED_SYNC_ARG(i)) {
3773                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3774                                       ? INDEX_op_st_i32
3775                                       : INDEX_op_st_i64);
3776                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3777 
3778                     sop->args[0] = temp_arg(dir_ts);
3779                     sop->args[1] = temp_arg(arg_ts->mem_base);
3780                     sop->args[2] = arg_ts->mem_offset;
3781 
3782                     arg_ts->state = TS_MEM;
3783                 }
3784                 /* Drop outputs that are dead.  */
3785                 if (IS_DEAD_ARG(i)) {
3786                     arg_ts->state = TS_DEAD;
3787                 }
3788             }
3789         }
3790     }
3791 
3792     return changes;
3793 }
3794 
3795 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3796 {
3797     intptr_t off;
3798     int size, align;
3799 
3800     /* When allocating an object, look at the full type. */
3801     size = tcg_type_size(ts->base_type);
3802     switch (ts->base_type) {
3803     case TCG_TYPE_I32:
3804         align = 4;
3805         break;
3806     case TCG_TYPE_I64:
3807     case TCG_TYPE_V64:
3808         align = 8;
3809         break;
3810     case TCG_TYPE_I128:
3811     case TCG_TYPE_V128:
3812     case TCG_TYPE_V256:
3813         /*
3814          * Note that we do not require aligned storage for V256,
3815          * and that we provide alignment for I128 to match V128,
3816          * even if that's above what the host ABI requires.
3817          */
3818         align = 16;
3819         break;
3820     default:
3821         g_assert_not_reached();
3822     }
3823 
3824     /*
3825      * Assume the stack is sufficiently aligned.
3826      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3827      * and do not require 16 byte vector alignment.  This seems slightly
3828      * easier than fully parameterizing the above switch statement.
3829      */
3830     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3831     off = ROUND_UP(s->current_frame_offset, align);
3832 
3833     /* If we've exhausted the stack frame, restart with a smaller TB. */
3834     if (off + size > s->frame_end) {
3835         tcg_raise_tb_overflow(s);
3836     }
3837     s->current_frame_offset = off + size;
3838 #if defined(__sparc__)
3839     off += TCG_TARGET_STACK_BIAS;
3840 #endif
3841 
3842     /* If the object was subdivided, assign memory to all the parts. */
3843     if (ts->base_type != ts->type) {
3844         int part_size = tcg_type_size(ts->type);
3845         int part_count = size / part_size;
3846 
3847         /*
3848          * Each part is allocated sequentially in tcg_temp_new_internal.
3849          * Jump back to the first part by subtracting the current index.
3850          */
3851         ts -= ts->temp_subindex;
3852         for (int i = 0; i < part_count; ++i) {
3853             ts[i].mem_offset = off + i * part_size;
3854             ts[i].mem_base = s->frame_temp;
3855             ts[i].mem_allocated = 1;
3856         }
3857     } else {
3858         ts->mem_offset = off;
3859         ts->mem_base = s->frame_temp;
3860         ts->mem_allocated = 1;
3861     }
3862 }
3863 
3864 /* Assign @reg to @ts, and update reg_to_temp[]. */
3865 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
3866 {
3867     if (ts->val_type == TEMP_VAL_REG) {
3868         TCGReg old = ts->reg;
3869         tcg_debug_assert(s->reg_to_temp[old] == ts);
3870         if (old == reg) {
3871             return;
3872         }
3873         s->reg_to_temp[old] = NULL;
3874     }
3875     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3876     s->reg_to_temp[reg] = ts;
3877     ts->val_type = TEMP_VAL_REG;
3878     ts->reg = reg;
3879 }
3880 
3881 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
3882 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
3883 {
3884     tcg_debug_assert(type != TEMP_VAL_REG);
3885     if (ts->val_type == TEMP_VAL_REG) {
3886         TCGReg reg = ts->reg;
3887         tcg_debug_assert(s->reg_to_temp[reg] == ts);
3888         s->reg_to_temp[reg] = NULL;
3889     }
3890     ts->val_type = type;
3891 }
3892 
3893 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3894 
3895 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3896    mark it free; otherwise mark it dead.  */
3897 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3898 {
3899     TCGTempVal new_type;
3900 
3901     switch (ts->kind) {
3902     case TEMP_FIXED:
3903         return;
3904     case TEMP_GLOBAL:
3905     case TEMP_TB:
3906         new_type = TEMP_VAL_MEM;
3907         break;
3908     case TEMP_EBB:
3909         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3910         break;
3911     case TEMP_CONST:
3912         new_type = TEMP_VAL_CONST;
3913         break;
3914     default:
3915         g_assert_not_reached();
3916     }
3917     set_temp_val_nonreg(s, ts, new_type);
3918 }
3919 
3920 /* Mark a temporary as dead.  */
3921 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3922 {
3923     temp_free_or_dead(s, ts, 1);
3924 }
3925 
3926 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3927    registers needs to be allocated to store a constant.  If 'free_or_dead'
3928    is non-zero, subsequently release the temporary; if it is positive, the
3929    temp is dead; if it is negative, the temp is free.  */
3930 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3931                       TCGRegSet preferred_regs, int free_or_dead)
3932 {
3933     if (!temp_readonly(ts) && !ts->mem_coherent) {
3934         if (!ts->mem_allocated) {
3935             temp_allocate_frame(s, ts);
3936         }
3937         switch (ts->val_type) {
3938         case TEMP_VAL_CONST:
3939             /* If we're going to free the temp immediately, then we won't
3940                require it later in a register, so attempt to store the
3941                constant to memory directly.  */
3942             if (free_or_dead
3943                 && tcg_out_sti(s, ts->type, ts->val,
3944                                ts->mem_base->reg, ts->mem_offset)) {
3945                 break;
3946             }
3947             temp_load(s, ts, tcg_target_available_regs[ts->type],
3948                       allocated_regs, preferred_regs);
3949             /* fallthrough */
3950 
3951         case TEMP_VAL_REG:
3952             tcg_out_st(s, ts->type, ts->reg,
3953                        ts->mem_base->reg, ts->mem_offset);
3954             break;
3955 
3956         case TEMP_VAL_MEM:
3957             break;
3958 
3959         case TEMP_VAL_DEAD:
3960         default:
3961             g_assert_not_reached();
3962         }
3963         ts->mem_coherent = 1;
3964     }
3965     if (free_or_dead) {
3966         temp_free_or_dead(s, ts, free_or_dead);
3967     }
3968 }
3969 
3970 /* free register 'reg' by spilling the corresponding temporary if necessary */
3971 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3972 {
3973     TCGTemp *ts = s->reg_to_temp[reg];
3974     if (ts != NULL) {
3975         temp_sync(s, ts, allocated_regs, 0, -1);
3976     }
3977 }
3978 
3979 /**
3980  * tcg_reg_alloc:
3981  * @required_regs: Set of registers in which we must allocate.
3982  * @allocated_regs: Set of registers which must be avoided.
3983  * @preferred_regs: Set of registers we should prefer.
3984  * @rev: True if we search the registers in "indirect" order.
3985  *
3986  * The allocated register must be in @required_regs & ~@allocated_regs,
3987  * but if we can put it in @preferred_regs we may save a move later.
3988  */
3989 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3990                             TCGRegSet allocated_regs,
3991                             TCGRegSet preferred_regs, bool rev)
3992 {
3993     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3994     TCGRegSet reg_ct[2];
3995     const int *order;
3996 
3997     reg_ct[1] = required_regs & ~allocated_regs;
3998     tcg_debug_assert(reg_ct[1] != 0);
3999     reg_ct[0] = reg_ct[1] & preferred_regs;
4000 
4001     /* Skip the preferred_regs option if it cannot be satisfied,
4002        or if the preference made no difference.  */
4003     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4004 
4005     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4006 
4007     /* Try free registers, preferences first.  */
4008     for (j = f; j < 2; j++) {
4009         TCGRegSet set = reg_ct[j];
4010 
4011         if (tcg_regset_single(set)) {
4012             /* One register in the set.  */
4013             TCGReg reg = tcg_regset_first(set);
4014             if (s->reg_to_temp[reg] == NULL) {
4015                 return reg;
4016             }
4017         } else {
4018             for (i = 0; i < n; i++) {
4019                 TCGReg reg = order[i];
4020                 if (s->reg_to_temp[reg] == NULL &&
4021                     tcg_regset_test_reg(set, reg)) {
4022                     return reg;
4023                 }
4024             }
4025         }
4026     }
4027 
4028     /* We must spill something.  */
4029     for (j = f; j < 2; j++) {
4030         TCGRegSet set = reg_ct[j];
4031 
4032         if (tcg_regset_single(set)) {
4033             /* One register in the set.  */
4034             TCGReg reg = tcg_regset_first(set);
4035             tcg_reg_free(s, reg, allocated_regs);
4036             return reg;
4037         } else {
4038             for (i = 0; i < n; i++) {
4039                 TCGReg reg = order[i];
4040                 if (tcg_regset_test_reg(set, reg)) {
4041                     tcg_reg_free(s, reg, allocated_regs);
4042                     return reg;
4043                 }
4044             }
4045         }
4046     }
4047 
4048     g_assert_not_reached();
4049 }
4050 
4051 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4052                                  TCGRegSet allocated_regs,
4053                                  TCGRegSet preferred_regs, bool rev)
4054 {
4055     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4056     TCGRegSet reg_ct[2];
4057     const int *order;
4058 
4059     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4060     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4061     tcg_debug_assert(reg_ct[1] != 0);
4062     reg_ct[0] = reg_ct[1] & preferred_regs;
4063 
4064     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4065 
4066     /*
4067      * Skip the preferred_regs option if it cannot be satisfied,
4068      * or if the preference made no difference.
4069      */
4070     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4071 
4072     /*
4073      * Minimize the number of flushes by looking for 2 free registers first,
4074      * then a single flush, then two flushes.
4075      */
4076     for (fmin = 2; fmin >= 0; fmin--) {
4077         for (j = k; j < 2; j++) {
4078             TCGRegSet set = reg_ct[j];
4079 
4080             for (i = 0; i < n; i++) {
4081                 TCGReg reg = order[i];
4082 
4083                 if (tcg_regset_test_reg(set, reg)) {
4084                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4085                     if (f >= fmin) {
4086                         tcg_reg_free(s, reg, allocated_regs);
4087                         tcg_reg_free(s, reg + 1, allocated_regs);
4088                         return reg;
4089                     }
4090                 }
4091             }
4092         }
4093     }
4094     g_assert_not_reached();
4095 }
4096 
4097 /* Make sure the temporary is in a register.  If needed, allocate the register
4098    from DESIRED while avoiding ALLOCATED.  */
4099 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4100                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4101 {
4102     TCGReg reg;
4103 
4104     switch (ts->val_type) {
4105     case TEMP_VAL_REG:
4106         return;
4107     case TEMP_VAL_CONST:
4108         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4109                             preferred_regs, ts->indirect_base);
4110         if (ts->type <= TCG_TYPE_I64) {
4111             tcg_out_movi(s, ts->type, reg, ts->val);
4112         } else {
4113             uint64_t val = ts->val;
4114             MemOp vece = MO_64;
4115 
4116             /*
4117              * Find the minimal vector element that matches the constant.
4118              * The targets will, in general, have to do this search anyway,
4119              * do this generically.
4120              */
4121             if (val == dup_const(MO_8, val)) {
4122                 vece = MO_8;
4123             } else if (val == dup_const(MO_16, val)) {
4124                 vece = MO_16;
4125             } else if (val == dup_const(MO_32, val)) {
4126                 vece = MO_32;
4127             }
4128 
4129             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4130         }
4131         ts->mem_coherent = 0;
4132         break;
4133     case TEMP_VAL_MEM:
4134         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4135                             preferred_regs, ts->indirect_base);
4136         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4137         ts->mem_coherent = 1;
4138         break;
4139     case TEMP_VAL_DEAD:
4140     default:
4141         g_assert_not_reached();
4142     }
4143     set_temp_val_reg(s, ts, reg);
4144 }
4145 
4146 /* Save a temporary to memory. 'allocated_regs' is used in case a
4147    temporary registers needs to be allocated to store a constant.  */
4148 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4149 {
4150     /* The liveness analysis already ensures that globals are back
4151        in memory. Keep an tcg_debug_assert for safety. */
4152     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4153 }
4154 
4155 /* save globals to their canonical location and assume they can be
4156    modified be the following code. 'allocated_regs' is used in case a
4157    temporary registers needs to be allocated to store a constant. */
4158 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4159 {
4160     int i, n;
4161 
4162     for (i = 0, n = s->nb_globals; i < n; i++) {
4163         temp_save(s, &s->temps[i], allocated_regs);
4164     }
4165 }
4166 
4167 /* sync globals to their canonical location and assume they can be
4168    read by the following code. 'allocated_regs' is used in case a
4169    temporary registers needs to be allocated to store a constant. */
4170 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4171 {
4172     int i, n;
4173 
4174     for (i = 0, n = s->nb_globals; i < n; i++) {
4175         TCGTemp *ts = &s->temps[i];
4176         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4177                          || ts->kind == TEMP_FIXED
4178                          || ts->mem_coherent);
4179     }
4180 }
4181 
4182 /* at the end of a basic block, we assume all temporaries are dead and
4183    all globals are stored at their canonical location. */
4184 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4185 {
4186     int i;
4187 
4188     for (i = s->nb_globals; i < s->nb_temps; i++) {
4189         TCGTemp *ts = &s->temps[i];
4190 
4191         switch (ts->kind) {
4192         case TEMP_TB:
4193             temp_save(s, ts, allocated_regs);
4194             break;
4195         case TEMP_EBB:
4196             /* The liveness analysis already ensures that temps are dead.
4197                Keep an tcg_debug_assert for safety. */
4198             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4199             break;
4200         case TEMP_CONST:
4201             /* Similarly, we should have freed any allocated register. */
4202             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4203             break;
4204         default:
4205             g_assert_not_reached();
4206         }
4207     }
4208 
4209     save_globals(s, allocated_regs);
4210 }
4211 
4212 /*
4213  * At a conditional branch, we assume all temporaries are dead unless
4214  * explicitly live-across-conditional-branch; all globals and local
4215  * temps are synced to their location.
4216  */
4217 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4218 {
4219     sync_globals(s, allocated_regs);
4220 
4221     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4222         TCGTemp *ts = &s->temps[i];
4223         /*
4224          * The liveness analysis already ensures that temps are dead.
4225          * Keep tcg_debug_asserts for safety.
4226          */
4227         switch (ts->kind) {
4228         case TEMP_TB:
4229             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4230             break;
4231         case TEMP_EBB:
4232         case TEMP_CONST:
4233             break;
4234         default:
4235             g_assert_not_reached();
4236         }
4237     }
4238 }
4239 
4240 /*
4241  * Specialized code generation for INDEX_op_mov_* with a constant.
4242  */
4243 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4244                                   tcg_target_ulong val, TCGLifeData arg_life,
4245                                   TCGRegSet preferred_regs)
4246 {
4247     /* ENV should not be modified.  */
4248     tcg_debug_assert(!temp_readonly(ots));
4249 
4250     /* The movi is not explicitly generated here.  */
4251     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4252     ots->val = val;
4253     ots->mem_coherent = 0;
4254     if (NEED_SYNC_ARG(0)) {
4255         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4256     } else if (IS_DEAD_ARG(0)) {
4257         temp_dead(s, ots);
4258     }
4259 }
4260 
4261 /*
4262  * Specialized code generation for INDEX_op_mov_*.
4263  */
4264 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4265 {
4266     const TCGLifeData arg_life = op->life;
4267     TCGRegSet allocated_regs, preferred_regs;
4268     TCGTemp *ts, *ots;
4269     TCGType otype, itype;
4270     TCGReg oreg, ireg;
4271 
4272     allocated_regs = s->reserved_regs;
4273     preferred_regs = output_pref(op, 0);
4274     ots = arg_temp(op->args[0]);
4275     ts = arg_temp(op->args[1]);
4276 
4277     /* ENV should not be modified.  */
4278     tcg_debug_assert(!temp_readonly(ots));
4279 
4280     /* Note that otype != itype for no-op truncation.  */
4281     otype = ots->type;
4282     itype = ts->type;
4283 
4284     if (ts->val_type == TEMP_VAL_CONST) {
4285         /* propagate constant or generate sti */
4286         tcg_target_ulong val = ts->val;
4287         if (IS_DEAD_ARG(1)) {
4288             temp_dead(s, ts);
4289         }
4290         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4291         return;
4292     }
4293 
4294     /* If the source value is in memory we're going to be forced
4295        to have it in a register in order to perform the copy.  Copy
4296        the SOURCE value into its own register first, that way we
4297        don't have to reload SOURCE the next time it is used. */
4298     if (ts->val_type == TEMP_VAL_MEM) {
4299         temp_load(s, ts, tcg_target_available_regs[itype],
4300                   allocated_regs, preferred_regs);
4301     }
4302     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4303     ireg = ts->reg;
4304 
4305     if (IS_DEAD_ARG(0)) {
4306         /* mov to a non-saved dead register makes no sense (even with
4307            liveness analysis disabled). */
4308         tcg_debug_assert(NEED_SYNC_ARG(0));
4309         if (!ots->mem_allocated) {
4310             temp_allocate_frame(s, ots);
4311         }
4312         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4313         if (IS_DEAD_ARG(1)) {
4314             temp_dead(s, ts);
4315         }
4316         temp_dead(s, ots);
4317         return;
4318     }
4319 
4320     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4321         /*
4322          * The mov can be suppressed.  Kill input first, so that it
4323          * is unlinked from reg_to_temp, then set the output to the
4324          * reg that we saved from the input.
4325          */
4326         temp_dead(s, ts);
4327         oreg = ireg;
4328     } else {
4329         if (ots->val_type == TEMP_VAL_REG) {
4330             oreg = ots->reg;
4331         } else {
4332             /* Make sure to not spill the input register during allocation. */
4333             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4334                                  allocated_regs | ((TCGRegSet)1 << ireg),
4335                                  preferred_regs, ots->indirect_base);
4336         }
4337         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4338             /*
4339              * Cross register class move not supported.
4340              * Store the source register into the destination slot
4341              * and leave the destination temp as TEMP_VAL_MEM.
4342              */
4343             assert(!temp_readonly(ots));
4344             if (!ts->mem_allocated) {
4345                 temp_allocate_frame(s, ots);
4346             }
4347             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4348             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4349             ots->mem_coherent = 1;
4350             return;
4351         }
4352     }
4353     set_temp_val_reg(s, ots, oreg);
4354     ots->mem_coherent = 0;
4355 
4356     if (NEED_SYNC_ARG(0)) {
4357         temp_sync(s, ots, allocated_regs, 0, 0);
4358     }
4359 }
4360 
4361 /*
4362  * Specialized code generation for INDEX_op_dup_vec.
4363  */
4364 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4365 {
4366     const TCGLifeData arg_life = op->life;
4367     TCGRegSet dup_out_regs, dup_in_regs;
4368     TCGTemp *its, *ots;
4369     TCGType itype, vtype;
4370     unsigned vece;
4371     int lowpart_ofs;
4372     bool ok;
4373 
4374     ots = arg_temp(op->args[0]);
4375     its = arg_temp(op->args[1]);
4376 
4377     /* ENV should not be modified.  */
4378     tcg_debug_assert(!temp_readonly(ots));
4379 
4380     itype = its->type;
4381     vece = TCGOP_VECE(op);
4382     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4383 
4384     if (its->val_type == TEMP_VAL_CONST) {
4385         /* Propagate constant via movi -> dupi.  */
4386         tcg_target_ulong val = its->val;
4387         if (IS_DEAD_ARG(1)) {
4388             temp_dead(s, its);
4389         }
4390         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4391         return;
4392     }
4393 
4394     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4395     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4396 
4397     /* Allocate the output register now.  */
4398     if (ots->val_type != TEMP_VAL_REG) {
4399         TCGRegSet allocated_regs = s->reserved_regs;
4400         TCGReg oreg;
4401 
4402         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4403             /* Make sure to not spill the input register. */
4404             tcg_regset_set_reg(allocated_regs, its->reg);
4405         }
4406         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4407                              output_pref(op, 0), ots->indirect_base);
4408         set_temp_val_reg(s, ots, oreg);
4409     }
4410 
4411     switch (its->val_type) {
4412     case TEMP_VAL_REG:
4413         /*
4414          * The dup constriaints must be broad, covering all possible VECE.
4415          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4416          * to fail, indicating that extra moves are required for that case.
4417          */
4418         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4419             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4420                 goto done;
4421             }
4422             /* Try again from memory or a vector input register.  */
4423         }
4424         if (!its->mem_coherent) {
4425             /*
4426              * The input register is not synced, and so an extra store
4427              * would be required to use memory.  Attempt an integer-vector
4428              * register move first.  We do not have a TCGRegSet for this.
4429              */
4430             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4431                 break;
4432             }
4433             /* Sync the temp back to its slot and load from there.  */
4434             temp_sync(s, its, s->reserved_regs, 0, 0);
4435         }
4436         /* fall through */
4437 
4438     case TEMP_VAL_MEM:
4439         lowpart_ofs = 0;
4440         if (HOST_BIG_ENDIAN) {
4441             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4442         }
4443         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4444                              its->mem_offset + lowpart_ofs)) {
4445             goto done;
4446         }
4447         /* Load the input into the destination vector register. */
4448         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4449         break;
4450 
4451     default:
4452         g_assert_not_reached();
4453     }
4454 
4455     /* We now have a vector input register, so dup must succeed. */
4456     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4457     tcg_debug_assert(ok);
4458 
4459  done:
4460     ots->mem_coherent = 0;
4461     if (IS_DEAD_ARG(1)) {
4462         temp_dead(s, its);
4463     }
4464     if (NEED_SYNC_ARG(0)) {
4465         temp_sync(s, ots, s->reserved_regs, 0, 0);
4466     }
4467     if (IS_DEAD_ARG(0)) {
4468         temp_dead(s, ots);
4469     }
4470 }
4471 
4472 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4473 {
4474     const TCGLifeData arg_life = op->life;
4475     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4476     TCGRegSet i_allocated_regs;
4477     TCGRegSet o_allocated_regs;
4478     int i, k, nb_iargs, nb_oargs;
4479     TCGReg reg;
4480     TCGArg arg;
4481     const TCGArgConstraint *arg_ct;
4482     TCGTemp *ts;
4483     TCGArg new_args[TCG_MAX_OP_ARGS];
4484     int const_args[TCG_MAX_OP_ARGS];
4485 
4486     nb_oargs = def->nb_oargs;
4487     nb_iargs = def->nb_iargs;
4488 
4489     /* copy constants */
4490     memcpy(new_args + nb_oargs + nb_iargs,
4491            op->args + nb_oargs + nb_iargs,
4492            sizeof(TCGArg) * def->nb_cargs);
4493 
4494     i_allocated_regs = s->reserved_regs;
4495     o_allocated_regs = s->reserved_regs;
4496 
4497     /* satisfy input constraints */
4498     for (k = 0; k < nb_iargs; k++) {
4499         TCGRegSet i_preferred_regs, i_required_regs;
4500         bool allocate_new_reg, copyto_new_reg;
4501         TCGTemp *ts2;
4502         int i1, i2;
4503 
4504         i = def->args_ct[nb_oargs + k].sort_index;
4505         arg = op->args[i];
4506         arg_ct = &def->args_ct[i];
4507         ts = arg_temp(arg);
4508 
4509         if (ts->val_type == TEMP_VAL_CONST
4510             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4511             /* constant is OK for instruction */
4512             const_args[i] = 1;
4513             new_args[i] = ts->val;
4514             continue;
4515         }
4516 
4517         reg = ts->reg;
4518         i_preferred_regs = 0;
4519         i_required_regs = arg_ct->regs;
4520         allocate_new_reg = false;
4521         copyto_new_reg = false;
4522 
4523         switch (arg_ct->pair) {
4524         case 0: /* not paired */
4525             if (arg_ct->ialias) {
4526                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4527 
4528                 /*
4529                  * If the input is readonly, then it cannot also be an
4530                  * output and aliased to itself.  If the input is not
4531                  * dead after the instruction, we must allocate a new
4532                  * register and move it.
4533                  */
4534                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4535                     allocate_new_reg = true;
4536                 } else if (ts->val_type == TEMP_VAL_REG) {
4537                     /*
4538                      * Check if the current register has already been
4539                      * allocated for another input.
4540                      */
4541                     allocate_new_reg =
4542                         tcg_regset_test_reg(i_allocated_regs, reg);
4543                 }
4544             }
4545             if (!allocate_new_reg) {
4546                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4547                           i_preferred_regs);
4548                 reg = ts->reg;
4549                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4550             }
4551             if (allocate_new_reg) {
4552                 /*
4553                  * Allocate a new register matching the constraint
4554                  * and move the temporary register into it.
4555                  */
4556                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4557                           i_allocated_regs, 0);
4558                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4559                                     i_preferred_regs, ts->indirect_base);
4560                 copyto_new_reg = true;
4561             }
4562             break;
4563 
4564         case 1:
4565             /* First of an input pair; if i1 == i2, the second is an output. */
4566             i1 = i;
4567             i2 = arg_ct->pair_index;
4568             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4569 
4570             /*
4571              * It is easier to default to allocating a new pair
4572              * and to identify a few cases where it's not required.
4573              */
4574             if (arg_ct->ialias) {
4575                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4576                 if (IS_DEAD_ARG(i1) &&
4577                     IS_DEAD_ARG(i2) &&
4578                     !temp_readonly(ts) &&
4579                     ts->val_type == TEMP_VAL_REG &&
4580                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4581                     tcg_regset_test_reg(i_required_regs, reg) &&
4582                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4583                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4584                     (ts2
4585                      ? ts2->val_type == TEMP_VAL_REG &&
4586                        ts2->reg == reg + 1 &&
4587                        !temp_readonly(ts2)
4588                      : s->reg_to_temp[reg + 1] == NULL)) {
4589                     break;
4590                 }
4591             } else {
4592                 /* Without aliasing, the pair must also be an input. */
4593                 tcg_debug_assert(ts2);
4594                 if (ts->val_type == TEMP_VAL_REG &&
4595                     ts2->val_type == TEMP_VAL_REG &&
4596                     ts2->reg == reg + 1 &&
4597                     tcg_regset_test_reg(i_required_regs, reg)) {
4598                     break;
4599                 }
4600             }
4601             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4602                                      0, ts->indirect_base);
4603             goto do_pair;
4604 
4605         case 2: /* pair second */
4606             reg = new_args[arg_ct->pair_index] + 1;
4607             goto do_pair;
4608 
4609         case 3: /* ialias with second output, no first input */
4610             tcg_debug_assert(arg_ct->ialias);
4611             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4612 
4613             if (IS_DEAD_ARG(i) &&
4614                 !temp_readonly(ts) &&
4615                 ts->val_type == TEMP_VAL_REG &&
4616                 reg > 0 &&
4617                 s->reg_to_temp[reg - 1] == NULL &&
4618                 tcg_regset_test_reg(i_required_regs, reg) &&
4619                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4620                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4621                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4622                 break;
4623             }
4624             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4625                                      i_allocated_regs, 0,
4626                                      ts->indirect_base);
4627             tcg_regset_set_reg(i_allocated_regs, reg);
4628             reg += 1;
4629             goto do_pair;
4630 
4631         do_pair:
4632             /*
4633              * If an aliased input is not dead after the instruction,
4634              * we must allocate a new register and move it.
4635              */
4636             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4637                 TCGRegSet t_allocated_regs = i_allocated_regs;
4638 
4639                 /*
4640                  * Because of the alias, and the continued life, make sure
4641                  * that the temp is somewhere *other* than the reg pair,
4642                  * and we get a copy in reg.
4643                  */
4644                 tcg_regset_set_reg(t_allocated_regs, reg);
4645                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4646                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4647                     /* If ts was already in reg, copy it somewhere else. */
4648                     TCGReg nr;
4649                     bool ok;
4650 
4651                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4652                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4653                                        t_allocated_regs, 0, ts->indirect_base);
4654                     ok = tcg_out_mov(s, ts->type, nr, reg);
4655                     tcg_debug_assert(ok);
4656 
4657                     set_temp_val_reg(s, ts, nr);
4658                 } else {
4659                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4660                               t_allocated_regs, 0);
4661                     copyto_new_reg = true;
4662                 }
4663             } else {
4664                 /* Preferably allocate to reg, otherwise copy. */
4665                 i_required_regs = (TCGRegSet)1 << reg;
4666                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4667                           i_preferred_regs);
4668                 copyto_new_reg = ts->reg != reg;
4669             }
4670             break;
4671 
4672         default:
4673             g_assert_not_reached();
4674         }
4675 
4676         if (copyto_new_reg) {
4677             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4678                 /*
4679                  * Cross register class move not supported.  Sync the
4680                  * temp back to its slot and load from there.
4681                  */
4682                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4683                 tcg_out_ld(s, ts->type, reg,
4684                            ts->mem_base->reg, ts->mem_offset);
4685             }
4686         }
4687         new_args[i] = reg;
4688         const_args[i] = 0;
4689         tcg_regset_set_reg(i_allocated_regs, reg);
4690     }
4691 
4692     /* mark dead temporaries and free the associated registers */
4693     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4694         if (IS_DEAD_ARG(i)) {
4695             temp_dead(s, arg_temp(op->args[i]));
4696         }
4697     }
4698 
4699     if (def->flags & TCG_OPF_COND_BRANCH) {
4700         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4701     } else if (def->flags & TCG_OPF_BB_END) {
4702         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4703     } else {
4704         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4705             /* XXX: permit generic clobber register list ? */
4706             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4707                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4708                     tcg_reg_free(s, i, i_allocated_regs);
4709                 }
4710             }
4711         }
4712         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4713             /* sync globals if the op has side effects and might trigger
4714                an exception. */
4715             sync_globals(s, i_allocated_regs);
4716         }
4717 
4718         /* satisfy the output constraints */
4719         for(k = 0; k < nb_oargs; k++) {
4720             i = def->args_ct[k].sort_index;
4721             arg = op->args[i];
4722             arg_ct = &def->args_ct[i];
4723             ts = arg_temp(arg);
4724 
4725             /* ENV should not be modified.  */
4726             tcg_debug_assert(!temp_readonly(ts));
4727 
4728             switch (arg_ct->pair) {
4729             case 0: /* not paired */
4730                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4731                     reg = new_args[arg_ct->alias_index];
4732                 } else if (arg_ct->newreg) {
4733                     reg = tcg_reg_alloc(s, arg_ct->regs,
4734                                         i_allocated_regs | o_allocated_regs,
4735                                         output_pref(op, k), ts->indirect_base);
4736                 } else {
4737                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4738                                         output_pref(op, k), ts->indirect_base);
4739                 }
4740                 break;
4741 
4742             case 1: /* first of pair */
4743                 tcg_debug_assert(!arg_ct->newreg);
4744                 if (arg_ct->oalias) {
4745                     reg = new_args[arg_ct->alias_index];
4746                     break;
4747                 }
4748                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4749                                          output_pref(op, k), ts->indirect_base);
4750                 break;
4751 
4752             case 2: /* second of pair */
4753                 tcg_debug_assert(!arg_ct->newreg);
4754                 if (arg_ct->oalias) {
4755                     reg = new_args[arg_ct->alias_index];
4756                 } else {
4757                     reg = new_args[arg_ct->pair_index] + 1;
4758                 }
4759                 break;
4760 
4761             case 3: /* first of pair, aliasing with a second input */
4762                 tcg_debug_assert(!arg_ct->newreg);
4763                 reg = new_args[arg_ct->pair_index] - 1;
4764                 break;
4765 
4766             default:
4767                 g_assert_not_reached();
4768             }
4769             tcg_regset_set_reg(o_allocated_regs, reg);
4770             set_temp_val_reg(s, ts, reg);
4771             ts->mem_coherent = 0;
4772             new_args[i] = reg;
4773         }
4774     }
4775 
4776     /* emit instruction */
4777     switch (op->opc) {
4778     case INDEX_op_ext8s_i32:
4779         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4780         break;
4781     case INDEX_op_ext8s_i64:
4782         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4783         break;
4784     case INDEX_op_ext8u_i32:
4785     case INDEX_op_ext8u_i64:
4786         tcg_out_ext8u(s, new_args[0], new_args[1]);
4787         break;
4788     case INDEX_op_ext16s_i32:
4789         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4790         break;
4791     case INDEX_op_ext16s_i64:
4792         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4793         break;
4794     case INDEX_op_ext16u_i32:
4795     case INDEX_op_ext16u_i64:
4796         tcg_out_ext16u(s, new_args[0], new_args[1]);
4797         break;
4798     case INDEX_op_ext32s_i64:
4799         tcg_out_ext32s(s, new_args[0], new_args[1]);
4800         break;
4801     case INDEX_op_ext32u_i64:
4802         tcg_out_ext32u(s, new_args[0], new_args[1]);
4803         break;
4804     case INDEX_op_ext_i32_i64:
4805         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4806         break;
4807     case INDEX_op_extu_i32_i64:
4808         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4809         break;
4810     case INDEX_op_extrl_i64_i32:
4811         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
4812         break;
4813     default:
4814         if (def->flags & TCG_OPF_VECTOR) {
4815             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4816                            new_args, const_args);
4817         } else {
4818             tcg_out_op(s, op->opc, new_args, const_args);
4819         }
4820         break;
4821     }
4822 
4823     /* move the outputs in the correct register if needed */
4824     for(i = 0; i < nb_oargs; i++) {
4825         ts = arg_temp(op->args[i]);
4826 
4827         /* ENV should not be modified.  */
4828         tcg_debug_assert(!temp_readonly(ts));
4829 
4830         if (NEED_SYNC_ARG(i)) {
4831             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4832         } else if (IS_DEAD_ARG(i)) {
4833             temp_dead(s, ts);
4834         }
4835     }
4836 }
4837 
4838 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4839 {
4840     const TCGLifeData arg_life = op->life;
4841     TCGTemp *ots, *itsl, *itsh;
4842     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4843 
4844     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4845     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4846     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4847 
4848     ots = arg_temp(op->args[0]);
4849     itsl = arg_temp(op->args[1]);
4850     itsh = arg_temp(op->args[2]);
4851 
4852     /* ENV should not be modified.  */
4853     tcg_debug_assert(!temp_readonly(ots));
4854 
4855     /* Allocate the output register now.  */
4856     if (ots->val_type != TEMP_VAL_REG) {
4857         TCGRegSet allocated_regs = s->reserved_regs;
4858         TCGRegSet dup_out_regs =
4859             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4860         TCGReg oreg;
4861 
4862         /* Make sure to not spill the input registers. */
4863         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4864             tcg_regset_set_reg(allocated_regs, itsl->reg);
4865         }
4866         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4867             tcg_regset_set_reg(allocated_regs, itsh->reg);
4868         }
4869 
4870         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4871                              output_pref(op, 0), ots->indirect_base);
4872         set_temp_val_reg(s, ots, oreg);
4873     }
4874 
4875     /* Promote dup2 of immediates to dupi_vec. */
4876     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4877         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4878         MemOp vece = MO_64;
4879 
4880         if (val == dup_const(MO_8, val)) {
4881             vece = MO_8;
4882         } else if (val == dup_const(MO_16, val)) {
4883             vece = MO_16;
4884         } else if (val == dup_const(MO_32, val)) {
4885             vece = MO_32;
4886         }
4887 
4888         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4889         goto done;
4890     }
4891 
4892     /* If the two inputs form one 64-bit value, try dupm_vec. */
4893     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
4894         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
4895         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
4896         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
4897 
4898         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
4899         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
4900 
4901         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4902                              its->mem_base->reg, its->mem_offset)) {
4903             goto done;
4904         }
4905     }
4906 
4907     /* Fall back to generic expansion. */
4908     return false;
4909 
4910  done:
4911     ots->mem_coherent = 0;
4912     if (IS_DEAD_ARG(1)) {
4913         temp_dead(s, itsl);
4914     }
4915     if (IS_DEAD_ARG(2)) {
4916         temp_dead(s, itsh);
4917     }
4918     if (NEED_SYNC_ARG(0)) {
4919         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4920     } else if (IS_DEAD_ARG(0)) {
4921         temp_dead(s, ots);
4922     }
4923     return true;
4924 }
4925 
4926 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
4927                          TCGRegSet allocated_regs)
4928 {
4929     if (ts->val_type == TEMP_VAL_REG) {
4930         if (ts->reg != reg) {
4931             tcg_reg_free(s, reg, allocated_regs);
4932             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4933                 /*
4934                  * Cross register class move not supported.  Sync the
4935                  * temp back to its slot and load from there.
4936                  */
4937                 temp_sync(s, ts, allocated_regs, 0, 0);
4938                 tcg_out_ld(s, ts->type, reg,
4939                            ts->mem_base->reg, ts->mem_offset);
4940             }
4941         }
4942     } else {
4943         TCGRegSet arg_set = 0;
4944 
4945         tcg_reg_free(s, reg, allocated_regs);
4946         tcg_regset_set_reg(arg_set, reg);
4947         temp_load(s, ts, arg_set, allocated_regs, 0);
4948     }
4949 }
4950 
4951 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
4952                          TCGRegSet allocated_regs)
4953 {
4954     /*
4955      * When the destination is on the stack, load up the temp and store.
4956      * If there are many call-saved registers, the temp might live to
4957      * see another use; otherwise it'll be discarded.
4958      */
4959     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
4960     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
4961                arg_slot_stk_ofs(arg_slot));
4962 }
4963 
4964 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
4965                             TCGTemp *ts, TCGRegSet *allocated_regs)
4966 {
4967     if (arg_slot_reg_p(l->arg_slot)) {
4968         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
4969         load_arg_reg(s, reg, ts, *allocated_regs);
4970         tcg_regset_set_reg(*allocated_regs, reg);
4971     } else {
4972         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
4973     }
4974 }
4975 
4976 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
4977                          intptr_t ref_off, TCGRegSet *allocated_regs)
4978 {
4979     TCGReg reg;
4980 
4981     if (arg_slot_reg_p(arg_slot)) {
4982         reg = tcg_target_call_iarg_regs[arg_slot];
4983         tcg_reg_free(s, reg, *allocated_regs);
4984         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4985         tcg_regset_set_reg(*allocated_regs, reg);
4986     } else {
4987         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
4988                             *allocated_regs, 0, false);
4989         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4990         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
4991                    arg_slot_stk_ofs(arg_slot));
4992     }
4993 }
4994 
4995 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4996 {
4997     const int nb_oargs = TCGOP_CALLO(op);
4998     const int nb_iargs = TCGOP_CALLI(op);
4999     const TCGLifeData arg_life = op->life;
5000     const TCGHelperInfo *info = tcg_call_info(op);
5001     TCGRegSet allocated_regs = s->reserved_regs;
5002     int i;
5003 
5004     /*
5005      * Move inputs into place in reverse order,
5006      * so that we place stacked arguments first.
5007      */
5008     for (i = nb_iargs - 1; i >= 0; --i) {
5009         const TCGCallArgumentLoc *loc = &info->in[i];
5010         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5011 
5012         switch (loc->kind) {
5013         case TCG_CALL_ARG_NORMAL:
5014         case TCG_CALL_ARG_EXTEND_U:
5015         case TCG_CALL_ARG_EXTEND_S:
5016             load_arg_normal(s, loc, ts, &allocated_regs);
5017             break;
5018         case TCG_CALL_ARG_BY_REF:
5019             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5020             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5021                          arg_slot_stk_ofs(loc->ref_slot),
5022                          &allocated_regs);
5023             break;
5024         case TCG_CALL_ARG_BY_REF_N:
5025             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5026             break;
5027         default:
5028             g_assert_not_reached();
5029         }
5030     }
5031 
5032     /* Mark dead temporaries and free the associated registers.  */
5033     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5034         if (IS_DEAD_ARG(i)) {
5035             temp_dead(s, arg_temp(op->args[i]));
5036         }
5037     }
5038 
5039     /* Clobber call registers.  */
5040     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5041         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5042             tcg_reg_free(s, i, allocated_regs);
5043         }
5044     }
5045 
5046     /*
5047      * Save globals if they might be written by the helper,
5048      * sync them if they might be read.
5049      */
5050     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5051         /* Nothing to do */
5052     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5053         sync_globals(s, allocated_regs);
5054     } else {
5055         save_globals(s, allocated_regs);
5056     }
5057 
5058     /*
5059      * If the ABI passes a pointer to the returned struct as the first
5060      * argument, load that now.  Pass a pointer to the output home slot.
5061      */
5062     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5063         TCGTemp *ts = arg_temp(op->args[0]);
5064 
5065         if (!ts->mem_allocated) {
5066             temp_allocate_frame(s, ts);
5067         }
5068         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5069     }
5070 
5071     tcg_out_call(s, tcg_call_func(op), info);
5072 
5073     /* Assign output registers and emit moves if needed.  */
5074     switch (info->out_kind) {
5075     case TCG_CALL_RET_NORMAL:
5076         for (i = 0; i < nb_oargs; i++) {
5077             TCGTemp *ts = arg_temp(op->args[i]);
5078             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5079 
5080             /* ENV should not be modified.  */
5081             tcg_debug_assert(!temp_readonly(ts));
5082 
5083             set_temp_val_reg(s, ts, reg);
5084             ts->mem_coherent = 0;
5085         }
5086         break;
5087 
5088     case TCG_CALL_RET_BY_VEC:
5089         {
5090             TCGTemp *ts = arg_temp(op->args[0]);
5091 
5092             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5093             tcg_debug_assert(ts->temp_subindex == 0);
5094             if (!ts->mem_allocated) {
5095                 temp_allocate_frame(s, ts);
5096             }
5097             tcg_out_st(s, TCG_TYPE_V128,
5098                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5099                        ts->mem_base->reg, ts->mem_offset);
5100         }
5101         /* fall through to mark all parts in memory */
5102 
5103     case TCG_CALL_RET_BY_REF:
5104         /* The callee has performed a write through the reference. */
5105         for (i = 0; i < nb_oargs; i++) {
5106             TCGTemp *ts = arg_temp(op->args[i]);
5107             ts->val_type = TEMP_VAL_MEM;
5108         }
5109         break;
5110 
5111     default:
5112         g_assert_not_reached();
5113     }
5114 
5115     /* Flush or discard output registers as needed. */
5116     for (i = 0; i < nb_oargs; i++) {
5117         TCGTemp *ts = arg_temp(op->args[i]);
5118         if (NEED_SYNC_ARG(i)) {
5119             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5120         } else if (IS_DEAD_ARG(i)) {
5121             temp_dead(s, ts);
5122         }
5123     }
5124 }
5125 
5126 /*
5127  * Similarly for qemu_ld/st slow path helpers.
5128  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5129  * using only the provided backend tcg_out_* functions.
5130  */
5131 
5132 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5133 {
5134     int ofs = arg_slot_stk_ofs(slot);
5135 
5136     /*
5137      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5138      * require extension to uint64_t, adjust the address for uint32_t.
5139      */
5140     if (HOST_BIG_ENDIAN &&
5141         TCG_TARGET_REG_BITS == 64 &&
5142         type == TCG_TYPE_I32) {
5143         ofs += 4;
5144     }
5145     return ofs;
5146 }
5147 
5148 static void tcg_out_helper_load_regs(TCGContext *s,
5149                                      unsigned nmov, TCGMovExtend *mov,
5150                                      unsigned ntmp, const int *tmp)
5151 {
5152     switch (nmov) {
5153     default:
5154         /* The backend must have provided enough temps for the worst case. */
5155         tcg_debug_assert(ntmp + 1 >= nmov);
5156 
5157         for (unsigned i = nmov - 1; i >= 2; --i) {
5158             TCGReg dst = mov[i].dst;
5159 
5160             for (unsigned j = 0; j < i; ++j) {
5161                 if (dst == mov[j].src) {
5162                     /*
5163                      * Conflict.
5164                      * Copy the source to a temporary, recurse for the
5165                      * remaining moves, perform the extension from our
5166                      * scratch on the way out.
5167                      */
5168                     TCGReg scratch = tmp[--ntmp];
5169                     tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src);
5170                     mov[i].src = scratch;
5171 
5172                     tcg_out_helper_load_regs(s, i, mov, ntmp, tmp);
5173                     tcg_out_movext1(s, &mov[i]);
5174                     return;
5175                 }
5176             }
5177 
5178             /* No conflicts: perform this move and continue. */
5179             tcg_out_movext1(s, &mov[i]);
5180         }
5181         /* fall through for the final two moves */
5182 
5183     case 2:
5184         tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1);
5185         return;
5186     case 1:
5187         tcg_out_movext1(s, mov);
5188         return;
5189     case 0:
5190         g_assert_not_reached();
5191     }
5192 }
5193 
5194 static void tcg_out_helper_load_slots(TCGContext *s,
5195                                       unsigned nmov, TCGMovExtend *mov,
5196                                       const TCGLdstHelperParam *parm)
5197 {
5198     unsigned i;
5199 
5200     /*
5201      * Start from the end, storing to the stack first.
5202      * This frees those registers, so we need not consider overlap.
5203      */
5204     for (i = nmov; i-- > 0; ) {
5205         unsigned slot = mov[i].dst;
5206 
5207         if (arg_slot_reg_p(slot)) {
5208             goto found_reg;
5209         }
5210 
5211         TCGReg src = mov[i].src;
5212         TCGType dst_type = mov[i].dst_type;
5213         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5214 
5215         /* The argument is going onto the stack; extend into scratch. */
5216         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5217             tcg_debug_assert(parm->ntmp != 0);
5218             mov[i].dst = src = parm->tmp[0];
5219             tcg_out_movext1(s, &mov[i]);
5220         }
5221 
5222         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5223                    tcg_out_helper_stk_ofs(dst_type, slot));
5224     }
5225     return;
5226 
5227  found_reg:
5228     /*
5229      * The remaining arguments are in registers.
5230      * Convert slot numbers to argument registers.
5231      */
5232     nmov = i + 1;
5233     for (i = 0; i < nmov; ++i) {
5234         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5235     }
5236     tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp);
5237 }
5238 
5239 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5240                                     TCGType type, tcg_target_long imm,
5241                                     const TCGLdstHelperParam *parm)
5242 {
5243     if (arg_slot_reg_p(slot)) {
5244         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5245     } else {
5246         int ofs = tcg_out_helper_stk_ofs(type, slot);
5247         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5248             tcg_debug_assert(parm->ntmp != 0);
5249             tcg_out_movi(s, type, parm->tmp[0], imm);
5250             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5251         }
5252     }
5253 }
5254 
5255 static void tcg_out_helper_load_common_args(TCGContext *s,
5256                                             const TCGLabelQemuLdst *ldst,
5257                                             const TCGLdstHelperParam *parm,
5258                                             const TCGHelperInfo *info,
5259                                             unsigned next_arg)
5260 {
5261     TCGMovExtend ptr_mov = {
5262         .dst_type = TCG_TYPE_PTR,
5263         .src_type = TCG_TYPE_PTR,
5264         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5265     };
5266     const TCGCallArgumentLoc *loc = &info->in[0];
5267     TCGType type;
5268     unsigned slot;
5269     tcg_target_ulong imm;
5270 
5271     /*
5272      * Handle env, which is always first.
5273      */
5274     ptr_mov.dst = loc->arg_slot;
5275     ptr_mov.src = TCG_AREG0;
5276     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5277 
5278     /*
5279      * Handle oi.
5280      */
5281     imm = ldst->oi;
5282     loc = &info->in[next_arg];
5283     type = TCG_TYPE_I32;
5284     switch (loc->kind) {
5285     case TCG_CALL_ARG_NORMAL:
5286         break;
5287     case TCG_CALL_ARG_EXTEND_U:
5288     case TCG_CALL_ARG_EXTEND_S:
5289         /* No extension required for MemOpIdx. */
5290         tcg_debug_assert(imm <= INT32_MAX);
5291         type = TCG_TYPE_REG;
5292         break;
5293     default:
5294         g_assert_not_reached();
5295     }
5296     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5297     next_arg++;
5298 
5299     /*
5300      * Handle ra.
5301      */
5302     loc = &info->in[next_arg];
5303     slot = loc->arg_slot;
5304     if (parm->ra_gen) {
5305         int arg_reg = -1;
5306         TCGReg ra_reg;
5307 
5308         if (arg_slot_reg_p(slot)) {
5309             arg_reg = tcg_target_call_iarg_regs[slot];
5310         }
5311         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5312 
5313         ptr_mov.dst = slot;
5314         ptr_mov.src = ra_reg;
5315         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5316     } else {
5317         imm = (uintptr_t)ldst->raddr;
5318         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5319     }
5320 }
5321 
5322 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5323                                        const TCGCallArgumentLoc *loc,
5324                                        TCGType dst_type, TCGType src_type,
5325                                        TCGReg lo, TCGReg hi)
5326 {
5327     if (dst_type <= TCG_TYPE_REG) {
5328         MemOp src_ext;
5329 
5330         switch (loc->kind) {
5331         case TCG_CALL_ARG_NORMAL:
5332             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5333             break;
5334         case TCG_CALL_ARG_EXTEND_U:
5335             dst_type = TCG_TYPE_REG;
5336             src_ext = MO_UL;
5337             break;
5338         case TCG_CALL_ARG_EXTEND_S:
5339             dst_type = TCG_TYPE_REG;
5340             src_ext = MO_SL;
5341             break;
5342         default:
5343             g_assert_not_reached();
5344         }
5345 
5346         mov[0].dst = loc->arg_slot;
5347         mov[0].dst_type = dst_type;
5348         mov[0].src = lo;
5349         mov[0].src_type = src_type;
5350         mov[0].src_ext = src_ext;
5351         return 1;
5352     }
5353 
5354     assert(TCG_TARGET_REG_BITS == 32);
5355 
5356     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5357     mov[0].src = lo;
5358     mov[0].dst_type = TCG_TYPE_I32;
5359     mov[0].src_type = TCG_TYPE_I32;
5360     mov[0].src_ext = MO_32;
5361 
5362     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5363     mov[1].src = hi;
5364     mov[1].dst_type = TCG_TYPE_I32;
5365     mov[1].src_type = TCG_TYPE_I32;
5366     mov[1].src_ext = MO_32;
5367 
5368     return 2;
5369 }
5370 
5371 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5372                                    const TCGLdstHelperParam *parm)
5373 {
5374     const TCGHelperInfo *info;
5375     const TCGCallArgumentLoc *loc;
5376     TCGMovExtend mov[2];
5377     unsigned next_arg, nmov;
5378     MemOp mop = get_memop(ldst->oi);
5379 
5380     switch (mop & MO_SIZE) {
5381     case MO_8:
5382     case MO_16:
5383     case MO_32:
5384         info = &info_helper_ld32_mmu;
5385         break;
5386     case MO_64:
5387         info = &info_helper_ld64_mmu;
5388         break;
5389     default:
5390         g_assert_not_reached();
5391     }
5392 
5393     /* Defer env argument. */
5394     next_arg = 1;
5395 
5396     loc = &info->in[next_arg];
5397     nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
5398                                   ldst->addrlo_reg, ldst->addrhi_reg);
5399     next_arg += nmov;
5400 
5401     tcg_out_helper_load_slots(s, nmov, mov, parm);
5402 
5403     /* No special attention for 32 and 64-bit return values. */
5404     tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL);
5405 
5406     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5407 }
5408 
5409 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5410                                   bool load_sign,
5411                                   const TCGLdstHelperParam *parm)
5412 {
5413     TCGMovExtend mov[2];
5414 
5415     if (ldst->type <= TCG_TYPE_REG) {
5416         MemOp mop = get_memop(ldst->oi);
5417 
5418         mov[0].dst = ldst->datalo_reg;
5419         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5420         mov[0].dst_type = ldst->type;
5421         mov[0].src_type = TCG_TYPE_REG;
5422 
5423         /*
5424          * If load_sign, then we allowed the helper to perform the
5425          * appropriate sign extension to tcg_target_ulong, and all
5426          * we need now is a plain move.
5427          *
5428          * If they do not, then we expect the relevant extension
5429          * instruction to be no more expensive than a move, and
5430          * we thus save the icache etc by only using one of two
5431          * helper functions.
5432          */
5433         if (load_sign || !(mop & MO_SIGN)) {
5434             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5435                 mov[0].src_ext = MO_32;
5436             } else {
5437                 mov[0].src_ext = MO_64;
5438             }
5439         } else {
5440             mov[0].src_ext = mop & MO_SSIZE;
5441         }
5442         tcg_out_movext1(s, mov);
5443     } else {
5444         assert(TCG_TARGET_REG_BITS == 32);
5445 
5446         mov[0].dst = ldst->datalo_reg;
5447         mov[0].src =
5448             tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5449         mov[0].dst_type = TCG_TYPE_I32;
5450         mov[0].src_type = TCG_TYPE_I32;
5451         mov[0].src_ext = MO_32;
5452 
5453         mov[1].dst = ldst->datahi_reg;
5454         mov[1].src =
5455             tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5456         mov[1].dst_type = TCG_TYPE_REG;
5457         mov[1].src_type = TCG_TYPE_REG;
5458         mov[1].src_ext = MO_32;
5459 
5460         tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5461     }
5462 }
5463 
5464 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5465                                    const TCGLdstHelperParam *parm)
5466 {
5467     const TCGHelperInfo *info;
5468     const TCGCallArgumentLoc *loc;
5469     TCGMovExtend mov[4];
5470     TCGType data_type;
5471     unsigned next_arg, nmov, n;
5472     MemOp mop = get_memop(ldst->oi);
5473 
5474     switch (mop & MO_SIZE) {
5475     case MO_8:
5476     case MO_16:
5477     case MO_32:
5478         info = &info_helper_st32_mmu;
5479         data_type = TCG_TYPE_I32;
5480         break;
5481     case MO_64:
5482         info = &info_helper_st64_mmu;
5483         data_type = TCG_TYPE_I64;
5484         break;
5485     default:
5486         g_assert_not_reached();
5487     }
5488 
5489     /* Defer env argument. */
5490     next_arg = 1;
5491     nmov = 0;
5492 
5493     /* Handle addr argument. */
5494     loc = &info->in[next_arg];
5495     n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
5496                                ldst->addrlo_reg, ldst->addrhi_reg);
5497     next_arg += n;
5498     nmov += n;
5499 
5500     /* Handle data argument. */
5501     loc = &info->in[next_arg];
5502     n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5503                                ldst->datalo_reg, ldst->datahi_reg);
5504     next_arg += n;
5505     nmov += n;
5506     tcg_debug_assert(nmov <= ARRAY_SIZE(mov));
5507 
5508     tcg_out_helper_load_slots(s, nmov, mov, parm);
5509     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5510 }
5511 
5512 #ifdef CONFIG_PROFILER
5513 
5514 /* avoid copy/paste errors */
5515 #define PROF_ADD(to, from, field)                       \
5516     do {                                                \
5517         (to)->field += qatomic_read(&((from)->field));  \
5518     } while (0)
5519 
5520 #define PROF_MAX(to, from, field)                                       \
5521     do {                                                                \
5522         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
5523         if (val__ > (to)->field) {                                      \
5524             (to)->field = val__;                                        \
5525         }                                                               \
5526     } while (0)
5527 
5528 /* Pass in a zero'ed @prof */
5529 static inline
5530 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
5531 {
5532     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5533     unsigned int i;
5534 
5535     for (i = 0; i < n_ctxs; i++) {
5536         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
5537         const TCGProfile *orig = &s->prof;
5538 
5539         if (counters) {
5540             PROF_ADD(prof, orig, cpu_exec_time);
5541             PROF_ADD(prof, orig, tb_count1);
5542             PROF_ADD(prof, orig, tb_count);
5543             PROF_ADD(prof, orig, op_count);
5544             PROF_MAX(prof, orig, op_count_max);
5545             PROF_ADD(prof, orig, temp_count);
5546             PROF_MAX(prof, orig, temp_count_max);
5547             PROF_ADD(prof, orig, del_op_count);
5548             PROF_ADD(prof, orig, code_in_len);
5549             PROF_ADD(prof, orig, code_out_len);
5550             PROF_ADD(prof, orig, search_out_len);
5551             PROF_ADD(prof, orig, interm_time);
5552             PROF_ADD(prof, orig, code_time);
5553             PROF_ADD(prof, orig, la_time);
5554             PROF_ADD(prof, orig, opt_time);
5555             PROF_ADD(prof, orig, restore_count);
5556             PROF_ADD(prof, orig, restore_time);
5557         }
5558         if (table) {
5559             int i;
5560 
5561             for (i = 0; i < NB_OPS; i++) {
5562                 PROF_ADD(prof, orig, table_op_count[i]);
5563             }
5564         }
5565     }
5566 }
5567 
5568 #undef PROF_ADD
5569 #undef PROF_MAX
5570 
5571 static void tcg_profile_snapshot_counters(TCGProfile *prof)
5572 {
5573     tcg_profile_snapshot(prof, true, false);
5574 }
5575 
5576 static void tcg_profile_snapshot_table(TCGProfile *prof)
5577 {
5578     tcg_profile_snapshot(prof, false, true);
5579 }
5580 
5581 void tcg_dump_op_count(GString *buf)
5582 {
5583     TCGProfile prof = {};
5584     int i;
5585 
5586     tcg_profile_snapshot_table(&prof);
5587     for (i = 0; i < NB_OPS; i++) {
5588         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
5589                                prof.table_op_count[i]);
5590     }
5591 }
5592 
5593 int64_t tcg_cpu_exec_time(void)
5594 {
5595     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5596     unsigned int i;
5597     int64_t ret = 0;
5598 
5599     for (i = 0; i < n_ctxs; i++) {
5600         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
5601         const TCGProfile *prof = &s->prof;
5602 
5603         ret += qatomic_read(&prof->cpu_exec_time);
5604     }
5605     return ret;
5606 }
5607 #else
5608 void tcg_dump_op_count(GString *buf)
5609 {
5610     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5611 }
5612 
5613 int64_t tcg_cpu_exec_time(void)
5614 {
5615     error_report("%s: TCG profiler not compiled", __func__);
5616     exit(EXIT_FAILURE);
5617 }
5618 #endif
5619 
5620 
5621 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
5622 {
5623 #ifdef CONFIG_PROFILER
5624     TCGProfile *prof = &s->prof;
5625 #endif
5626     int i, num_insns;
5627     TCGOp *op;
5628 
5629 #ifdef CONFIG_PROFILER
5630     {
5631         int n = 0;
5632 
5633         QTAILQ_FOREACH(op, &s->ops, link) {
5634             n++;
5635         }
5636         qatomic_set(&prof->op_count, prof->op_count + n);
5637         if (n > prof->op_count_max) {
5638             qatomic_set(&prof->op_count_max, n);
5639         }
5640 
5641         n = s->nb_temps;
5642         qatomic_set(&prof->temp_count, prof->temp_count + n);
5643         if (n > prof->temp_count_max) {
5644             qatomic_set(&prof->temp_count_max, n);
5645         }
5646     }
5647 #endif
5648 
5649 #ifdef DEBUG_DISAS
5650     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
5651                  && qemu_log_in_addr_range(pc_start))) {
5652         FILE *logfile = qemu_log_trylock();
5653         if (logfile) {
5654             fprintf(logfile, "OP:\n");
5655             tcg_dump_ops(s, logfile, false);
5656             fprintf(logfile, "\n");
5657             qemu_log_unlock(logfile);
5658         }
5659     }
5660 #endif
5661 
5662 #ifdef CONFIG_DEBUG_TCG
5663     /* Ensure all labels referenced have been emitted.  */
5664     {
5665         TCGLabel *l;
5666         bool error = false;
5667 
5668         QSIMPLEQ_FOREACH(l, &s->labels, next) {
5669             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
5670                 qemu_log_mask(CPU_LOG_TB_OP,
5671                               "$L%d referenced but not present.\n", l->id);
5672                 error = true;
5673             }
5674         }
5675         assert(!error);
5676     }
5677 #endif
5678 
5679 #ifdef CONFIG_PROFILER
5680     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
5681 #endif
5682 
5683 #ifdef USE_TCG_OPTIMIZATIONS
5684     tcg_optimize(s);
5685 #endif
5686 
5687 #ifdef CONFIG_PROFILER
5688     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
5689     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
5690 #endif
5691 
5692     reachable_code_pass(s);
5693     liveness_pass_0(s);
5694     liveness_pass_1(s);
5695 
5696     if (s->nb_indirects > 0) {
5697 #ifdef DEBUG_DISAS
5698         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
5699                      && qemu_log_in_addr_range(pc_start))) {
5700             FILE *logfile = qemu_log_trylock();
5701             if (logfile) {
5702                 fprintf(logfile, "OP before indirect lowering:\n");
5703                 tcg_dump_ops(s, logfile, false);
5704                 fprintf(logfile, "\n");
5705                 qemu_log_unlock(logfile);
5706             }
5707         }
5708 #endif
5709         /* Replace indirect temps with direct temps.  */
5710         if (liveness_pass_2(s)) {
5711             /* If changes were made, re-run liveness.  */
5712             liveness_pass_1(s);
5713         }
5714     }
5715 
5716 #ifdef CONFIG_PROFILER
5717     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
5718 #endif
5719 
5720 #ifdef DEBUG_DISAS
5721     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5722                  && qemu_log_in_addr_range(pc_start))) {
5723         FILE *logfile = qemu_log_trylock();
5724         if (logfile) {
5725             fprintf(logfile, "OP after optimization and liveness analysis:\n");
5726             tcg_dump_ops(s, logfile, true);
5727             fprintf(logfile, "\n");
5728             qemu_log_unlock(logfile);
5729         }
5730     }
5731 #endif
5732 
5733     /* Initialize goto_tb jump offsets. */
5734     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
5735     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
5736     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
5737     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
5738 
5739     tcg_reg_alloc_start(s);
5740 
5741     /*
5742      * Reset the buffer pointers when restarting after overflow.
5743      * TODO: Move this into translate-all.c with the rest of the
5744      * buffer management.  Having only this done here is confusing.
5745      */
5746     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
5747     s->code_ptr = s->code_buf;
5748 
5749 #ifdef TCG_TARGET_NEED_LDST_LABELS
5750     QSIMPLEQ_INIT(&s->ldst_labels);
5751 #endif
5752 #ifdef TCG_TARGET_NEED_POOL_LABELS
5753     s->pool_labels = NULL;
5754 #endif
5755 
5756     num_insns = -1;
5757     QTAILQ_FOREACH(op, &s->ops, link) {
5758         TCGOpcode opc = op->opc;
5759 
5760 #ifdef CONFIG_PROFILER
5761         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
5762 #endif
5763 
5764         switch (opc) {
5765         case INDEX_op_mov_i32:
5766         case INDEX_op_mov_i64:
5767         case INDEX_op_mov_vec:
5768             tcg_reg_alloc_mov(s, op);
5769             break;
5770         case INDEX_op_dup_vec:
5771             tcg_reg_alloc_dup(s, op);
5772             break;
5773         case INDEX_op_insn_start:
5774             if (num_insns >= 0) {
5775                 size_t off = tcg_current_code_size(s);
5776                 s->gen_insn_end_off[num_insns] = off;
5777                 /* Assert that we do not overflow our stored offset.  */
5778                 assert(s->gen_insn_end_off[num_insns] == off);
5779             }
5780             num_insns++;
5781             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
5782                 target_ulong a;
5783 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
5784                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
5785 #else
5786                 a = op->args[i];
5787 #endif
5788                 s->gen_insn_data[num_insns][i] = a;
5789             }
5790             break;
5791         case INDEX_op_discard:
5792             temp_dead(s, arg_temp(op->args[0]));
5793             break;
5794         case INDEX_op_set_label:
5795             tcg_reg_alloc_bb_end(s, s->reserved_regs);
5796             tcg_out_label(s, arg_label(op->args[0]));
5797             break;
5798         case INDEX_op_call:
5799             tcg_reg_alloc_call(s, op);
5800             break;
5801         case INDEX_op_exit_tb:
5802             tcg_out_exit_tb(s, op->args[0]);
5803             break;
5804         case INDEX_op_goto_tb:
5805             tcg_out_goto_tb(s, op->args[0]);
5806             break;
5807         case INDEX_op_dup2_vec:
5808             if (tcg_reg_alloc_dup2(s, op)) {
5809                 break;
5810             }
5811             /* fall through */
5812         default:
5813             /* Sanity check that we've not introduced any unhandled opcodes. */
5814             tcg_debug_assert(tcg_op_supported(opc));
5815             /* Note: in order to speed up the code, it would be much
5816                faster to have specialized register allocator functions for
5817                some common argument patterns */
5818             tcg_reg_alloc_op(s, op);
5819             break;
5820         }
5821         /* Test for (pending) buffer overflow.  The assumption is that any
5822            one operation beginning below the high water mark cannot overrun
5823            the buffer completely.  Thus we can test for overflow after
5824            generating code without having to check during generation.  */
5825         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
5826             return -1;
5827         }
5828         /* Test for TB overflow, as seen by gen_insn_end_off.  */
5829         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
5830             return -2;
5831         }
5832     }
5833     tcg_debug_assert(num_insns >= 0);
5834     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
5835 
5836     /* Generate TB finalization at the end of block */
5837 #ifdef TCG_TARGET_NEED_LDST_LABELS
5838     i = tcg_out_ldst_finalize(s);
5839     if (i < 0) {
5840         return i;
5841     }
5842 #endif
5843 #ifdef TCG_TARGET_NEED_POOL_LABELS
5844     i = tcg_out_pool_finalize(s);
5845     if (i < 0) {
5846         return i;
5847     }
5848 #endif
5849     if (!tcg_resolve_relocs(s)) {
5850         return -2;
5851     }
5852 
5853 #ifndef CONFIG_TCG_INTERPRETER
5854     /* flush instruction cache */
5855     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
5856                         (uintptr_t)s->code_buf,
5857                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
5858 #endif
5859 
5860     return tcg_current_code_size(s);
5861 }
5862 
5863 #ifdef CONFIG_PROFILER
5864 void tcg_dump_info(GString *buf)
5865 {
5866     TCGProfile prof = {};
5867     const TCGProfile *s;
5868     int64_t tb_count;
5869     int64_t tb_div_count;
5870     int64_t tot;
5871 
5872     tcg_profile_snapshot_counters(&prof);
5873     s = &prof;
5874     tb_count = s->tb_count;
5875     tb_div_count = tb_count ? tb_count : 1;
5876     tot = s->interm_time + s->code_time;
5877 
5878     g_string_append_printf(buf, "JIT cycles          %" PRId64
5879                            " (%0.3f s at 2.4 GHz)\n",
5880                            tot, tot / 2.4e9);
5881     g_string_append_printf(buf, "translated TBs      %" PRId64
5882                            " (aborted=%" PRId64 " %0.1f%%)\n",
5883                            tb_count, s->tb_count1 - tb_count,
5884                            (double)(s->tb_count1 - s->tb_count)
5885                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
5886     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
5887                            (double)s->op_count / tb_div_count, s->op_count_max);
5888     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
5889                            (double)s->del_op_count / tb_div_count);
5890     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
5891                            (double)s->temp_count / tb_div_count,
5892                            s->temp_count_max);
5893     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
5894                            (double)s->code_out_len / tb_div_count);
5895     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
5896                            (double)s->search_out_len / tb_div_count);
5897 
5898     g_string_append_printf(buf, "cycles/op           %0.1f\n",
5899                            s->op_count ? (double)tot / s->op_count : 0);
5900     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
5901                            s->code_in_len ? (double)tot / s->code_in_len : 0);
5902     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
5903                            s->code_out_len ? (double)tot / s->code_out_len : 0);
5904     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
5905                            s->search_out_len ?
5906                            (double)tot / s->search_out_len : 0);
5907     if (tot == 0) {
5908         tot = 1;
5909     }
5910     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
5911                            (double)s->interm_time / tot * 100.0);
5912     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
5913                            (double)s->code_time / tot * 100.0);
5914     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
5915                            (double)s->opt_time / (s->code_time ?
5916                                                   s->code_time : 1)
5917                            * 100.0);
5918     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
5919                            (double)s->la_time / (s->code_time ?
5920                                                  s->code_time : 1) * 100.0);
5921     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
5922                            s->restore_count);
5923     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
5924                            s->restore_count ?
5925                            (double)s->restore_time / s->restore_count : 0);
5926 }
5927 #else
5928 void tcg_dump_info(GString *buf)
5929 {
5930     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5931 }
5932 #endif
5933 
5934 #ifdef ELF_HOST_MACHINE
5935 /* In order to use this feature, the backend needs to do three things:
5936 
5937    (1) Define ELF_HOST_MACHINE to indicate both what value to
5938        put into the ELF image and to indicate support for the feature.
5939 
5940    (2) Define tcg_register_jit.  This should create a buffer containing
5941        the contents of a .debug_frame section that describes the post-
5942        prologue unwind info for the tcg machine.
5943 
5944    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
5945 */
5946 
5947 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
5948 typedef enum {
5949     JIT_NOACTION = 0,
5950     JIT_REGISTER_FN,
5951     JIT_UNREGISTER_FN
5952 } jit_actions_t;
5953 
5954 struct jit_code_entry {
5955     struct jit_code_entry *next_entry;
5956     struct jit_code_entry *prev_entry;
5957     const void *symfile_addr;
5958     uint64_t symfile_size;
5959 };
5960 
5961 struct jit_descriptor {
5962     uint32_t version;
5963     uint32_t action_flag;
5964     struct jit_code_entry *relevant_entry;
5965     struct jit_code_entry *first_entry;
5966 };
5967 
5968 void __jit_debug_register_code(void) __attribute__((noinline));
5969 void __jit_debug_register_code(void)
5970 {
5971     asm("");
5972 }
5973 
5974 /* Must statically initialize the version, because GDB may check
5975    the version before we can set it.  */
5976 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
5977 
5978 /* End GDB interface.  */
5979 
5980 static int find_string(const char *strtab, const char *str)
5981 {
5982     const char *p = strtab + 1;
5983 
5984     while (1) {
5985         if (strcmp(p, str) == 0) {
5986             return p - strtab;
5987         }
5988         p += strlen(p) + 1;
5989     }
5990 }
5991 
5992 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
5993                                  const void *debug_frame,
5994                                  size_t debug_frame_size)
5995 {
5996     struct __attribute__((packed)) DebugInfo {
5997         uint32_t  len;
5998         uint16_t  version;
5999         uint32_t  abbrev;
6000         uint8_t   ptr_size;
6001         uint8_t   cu_die;
6002         uint16_t  cu_lang;
6003         uintptr_t cu_low_pc;
6004         uintptr_t cu_high_pc;
6005         uint8_t   fn_die;
6006         char      fn_name[16];
6007         uintptr_t fn_low_pc;
6008         uintptr_t fn_high_pc;
6009         uint8_t   cu_eoc;
6010     };
6011 
6012     struct ElfImage {
6013         ElfW(Ehdr) ehdr;
6014         ElfW(Phdr) phdr;
6015         ElfW(Shdr) shdr[7];
6016         ElfW(Sym)  sym[2];
6017         struct DebugInfo di;
6018         uint8_t    da[24];
6019         char       str[80];
6020     };
6021 
6022     struct ElfImage *img;
6023 
6024     static const struct ElfImage img_template = {
6025         .ehdr = {
6026             .e_ident[EI_MAG0] = ELFMAG0,
6027             .e_ident[EI_MAG1] = ELFMAG1,
6028             .e_ident[EI_MAG2] = ELFMAG2,
6029             .e_ident[EI_MAG3] = ELFMAG3,
6030             .e_ident[EI_CLASS] = ELF_CLASS,
6031             .e_ident[EI_DATA] = ELF_DATA,
6032             .e_ident[EI_VERSION] = EV_CURRENT,
6033             .e_type = ET_EXEC,
6034             .e_machine = ELF_HOST_MACHINE,
6035             .e_version = EV_CURRENT,
6036             .e_phoff = offsetof(struct ElfImage, phdr),
6037             .e_shoff = offsetof(struct ElfImage, shdr),
6038             .e_ehsize = sizeof(ElfW(Shdr)),
6039             .e_phentsize = sizeof(ElfW(Phdr)),
6040             .e_phnum = 1,
6041             .e_shentsize = sizeof(ElfW(Shdr)),
6042             .e_shnum = ARRAY_SIZE(img->shdr),
6043             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6044 #ifdef ELF_HOST_FLAGS
6045             .e_flags = ELF_HOST_FLAGS,
6046 #endif
6047 #ifdef ELF_OSABI
6048             .e_ident[EI_OSABI] = ELF_OSABI,
6049 #endif
6050         },
6051         .phdr = {
6052             .p_type = PT_LOAD,
6053             .p_flags = PF_X,
6054         },
6055         .shdr = {
6056             [0] = { .sh_type = SHT_NULL },
6057             /* Trick: The contents of code_gen_buffer are not present in
6058                this fake ELF file; that got allocated elsewhere.  Therefore
6059                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6060                will not look for contents.  We can record any address.  */
6061             [1] = { /* .text */
6062                 .sh_type = SHT_NOBITS,
6063                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6064             },
6065             [2] = { /* .debug_info */
6066                 .sh_type = SHT_PROGBITS,
6067                 .sh_offset = offsetof(struct ElfImage, di),
6068                 .sh_size = sizeof(struct DebugInfo),
6069             },
6070             [3] = { /* .debug_abbrev */
6071                 .sh_type = SHT_PROGBITS,
6072                 .sh_offset = offsetof(struct ElfImage, da),
6073                 .sh_size = sizeof(img->da),
6074             },
6075             [4] = { /* .debug_frame */
6076                 .sh_type = SHT_PROGBITS,
6077                 .sh_offset = sizeof(struct ElfImage),
6078             },
6079             [5] = { /* .symtab */
6080                 .sh_type = SHT_SYMTAB,
6081                 .sh_offset = offsetof(struct ElfImage, sym),
6082                 .sh_size = sizeof(img->sym),
6083                 .sh_info = 1,
6084                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6085                 .sh_entsize = sizeof(ElfW(Sym)),
6086             },
6087             [6] = { /* .strtab */
6088                 .sh_type = SHT_STRTAB,
6089                 .sh_offset = offsetof(struct ElfImage, str),
6090                 .sh_size = sizeof(img->str),
6091             }
6092         },
6093         .sym = {
6094             [1] = { /* code_gen_buffer */
6095                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6096                 .st_shndx = 1,
6097             }
6098         },
6099         .di = {
6100             .len = sizeof(struct DebugInfo) - 4,
6101             .version = 2,
6102             .ptr_size = sizeof(void *),
6103             .cu_die = 1,
6104             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6105             .fn_die = 2,
6106             .fn_name = "code_gen_buffer"
6107         },
6108         .da = {
6109             1,          /* abbrev number (the cu) */
6110             0x11, 1,    /* DW_TAG_compile_unit, has children */
6111             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6112             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6113             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6114             0, 0,       /* end of abbrev */
6115             2,          /* abbrev number (the fn) */
6116             0x2e, 0,    /* DW_TAG_subprogram, no children */
6117             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6118             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6119             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6120             0, 0,       /* end of abbrev */
6121             0           /* no more abbrev */
6122         },
6123         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6124                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6125     };
6126 
6127     /* We only need a single jit entry; statically allocate it.  */
6128     static struct jit_code_entry one_entry;
6129 
6130     uintptr_t buf = (uintptr_t)buf_ptr;
6131     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6132     DebugFrameHeader *dfh;
6133 
6134     img = g_malloc(img_size);
6135     *img = img_template;
6136 
6137     img->phdr.p_vaddr = buf;
6138     img->phdr.p_paddr = buf;
6139     img->phdr.p_memsz = buf_size;
6140 
6141     img->shdr[1].sh_name = find_string(img->str, ".text");
6142     img->shdr[1].sh_addr = buf;
6143     img->shdr[1].sh_size = buf_size;
6144 
6145     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6146     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6147 
6148     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6149     img->shdr[4].sh_size = debug_frame_size;
6150 
6151     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6152     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6153 
6154     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6155     img->sym[1].st_value = buf;
6156     img->sym[1].st_size = buf_size;
6157 
6158     img->di.cu_low_pc = buf;
6159     img->di.cu_high_pc = buf + buf_size;
6160     img->di.fn_low_pc = buf;
6161     img->di.fn_high_pc = buf + buf_size;
6162 
6163     dfh = (DebugFrameHeader *)(img + 1);
6164     memcpy(dfh, debug_frame, debug_frame_size);
6165     dfh->fde.func_start = buf;
6166     dfh->fde.func_len = buf_size;
6167 
6168 #ifdef DEBUG_JIT
6169     /* Enable this block to be able to debug the ELF image file creation.
6170        One can use readelf, objdump, or other inspection utilities.  */
6171     {
6172         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6173         FILE *f = fopen(jit, "w+b");
6174         if (f) {
6175             if (fwrite(img, img_size, 1, f) != img_size) {
6176                 /* Avoid stupid unused return value warning for fwrite.  */
6177             }
6178             fclose(f);
6179         }
6180     }
6181 #endif
6182 
6183     one_entry.symfile_addr = img;
6184     one_entry.symfile_size = img_size;
6185 
6186     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6187     __jit_debug_descriptor.relevant_entry = &one_entry;
6188     __jit_debug_descriptor.first_entry = &one_entry;
6189     __jit_debug_register_code();
6190 }
6191 #else
6192 /* No support for the feature.  Provide the entry point expected by exec.c,
6193    and implement the internal function we declared earlier.  */
6194 
6195 static void tcg_register_jit_int(const void *buf, size_t size,
6196                                  const void *debug_frame,
6197                                  size_t debug_frame_size)
6198 {
6199 }
6200 
6201 void tcg_register_jit(const void *buf, size_t buf_size)
6202 {
6203 }
6204 #endif /* ELF_HOST_MACHINE */
6205 
6206 #if !TCG_TARGET_MAYBE_vec
6207 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6208 {
6209     g_assert_not_reached();
6210 }
6211 #endif
6212