xref: /openbmc/qemu/tcg/tcg.c (revision 597f9b2d30b612d760f9f48fe04cda29bf628e67)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/cacheflush.h"
38 #include "qemu/cacheinfo.h"
39 
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41    CPU definitions. Currently they are used for qemu_ld/st
42    instructions */
43 #define NO_CPU_IO_DEFS
44 
45 #include "exec/exec-all.h"
46 #include "tcg/tcg-op.h"
47 
48 #if UINTPTR_MAX == UINT32_MAX
49 # define ELF_CLASS  ELFCLASS32
50 #else
51 # define ELF_CLASS  ELFCLASS64
52 #endif
53 #if HOST_BIG_ENDIAN
54 # define ELF_DATA   ELFDATA2MSB
55 #else
56 # define ELF_DATA   ELFDATA2LSB
57 #endif
58 
59 #include "elf.h"
60 #include "exec/log.h"
61 #include "tcg/tcg-ldst.h"
62 #include "tcg-internal.h"
63 #include "accel/tcg/perf.h"
64 
65 /* Forward declarations for functions declared in tcg-target.c.inc and
66    used here. */
67 static void tcg_target_init(TCGContext *s);
68 static void tcg_target_qemu_prologue(TCGContext *s);
69 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
70                         intptr_t value, intptr_t addend);
71 
72 /* The CIE and FDE header definitions will be common to all hosts.  */
73 typedef struct {
74     uint32_t len __attribute__((aligned((sizeof(void *)))));
75     uint32_t id;
76     uint8_t version;
77     char augmentation[1];
78     uint8_t code_align;
79     uint8_t data_align;
80     uint8_t return_column;
81 } DebugFrameCIE;
82 
83 typedef struct QEMU_PACKED {
84     uint32_t len __attribute__((aligned((sizeof(void *)))));
85     uint32_t cie_offset;
86     uintptr_t func_start;
87     uintptr_t func_len;
88 } DebugFrameFDEHeader;
89 
90 typedef struct QEMU_PACKED {
91     DebugFrameCIE cie;
92     DebugFrameFDEHeader fde;
93 } DebugFrameHeader;
94 
95 static void tcg_register_jit_int(const void *buf, size_t size,
96                                  const void *debug_frame,
97                                  size_t debug_frame_size)
98     __attribute__((unused));
99 
100 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
102                        intptr_t arg2);
103 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
104 static void tcg_out_movi(TCGContext *s, TCGType type,
105                          TCGReg ret, tcg_target_long arg);
106 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
107 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
108 static void tcg_out_goto_tb(TCGContext *s, int which);
109 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
110                        const TCGArg args[TCG_MAX_OP_ARGS],
111                        const int const_args[TCG_MAX_OP_ARGS]);
112 #if TCG_TARGET_MAYBE_vec
113 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
114                             TCGReg dst, TCGReg src);
115 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
116                              TCGReg dst, TCGReg base, intptr_t offset);
117 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, int64_t arg);
119 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
120                            unsigned vecl, unsigned vece,
121                            const TCGArg args[TCG_MAX_OP_ARGS],
122                            const int const_args[TCG_MAX_OP_ARGS]);
123 #else
124 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
125                                    TCGReg dst, TCGReg src)
126 {
127     g_assert_not_reached();
128 }
129 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
130                                     TCGReg dst, TCGReg base, intptr_t offset)
131 {
132     g_assert_not_reached();
133 }
134 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
135                                     TCGReg dst, int64_t arg)
136 {
137     g_assert_not_reached();
138 }
139 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
140                                   unsigned vecl, unsigned vece,
141                                   const TCGArg args[TCG_MAX_OP_ARGS],
142                                   const int const_args[TCG_MAX_OP_ARGS])
143 {
144     g_assert_not_reached();
145 }
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148                        intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150                         TCGReg base, intptr_t ofs);
151 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
152                          const TCGHelperInfo *info);
153 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
154 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext *s);
157 #endif
158 
159 TCGContext tcg_init_ctx;
160 __thread TCGContext *tcg_ctx;
161 
162 TCGContext **tcg_ctxs;
163 unsigned int tcg_cur_ctxs;
164 unsigned int tcg_max_ctxs;
165 TCGv_env cpu_env = 0;
166 const void *tcg_code_gen_epilogue;
167 uintptr_t tcg_splitwx_diff;
168 
169 #ifndef CONFIG_TCG_INTERPRETER
170 tcg_prologue_fn *tcg_qemu_tb_exec;
171 #endif
172 
173 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
174 static TCGRegSet tcg_target_call_clobber_regs;
175 
176 #if TCG_TARGET_INSN_UNIT_SIZE == 1
177 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
178 {
179     *s->code_ptr++ = v;
180 }
181 
182 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
183                                                       uint8_t v)
184 {
185     *p = v;
186 }
187 #endif
188 
189 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
190 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
191 {
192     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
193         *s->code_ptr++ = v;
194     } else {
195         tcg_insn_unit *p = s->code_ptr;
196         memcpy(p, &v, sizeof(v));
197         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
198     }
199 }
200 
201 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
202                                                        uint16_t v)
203 {
204     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
205         *p = v;
206     } else {
207         memcpy(p, &v, sizeof(v));
208     }
209 }
210 #endif
211 
212 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
213 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
214 {
215     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
216         *s->code_ptr++ = v;
217     } else {
218         tcg_insn_unit *p = s->code_ptr;
219         memcpy(p, &v, sizeof(v));
220         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
221     }
222 }
223 
224 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
225                                                        uint32_t v)
226 {
227     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
228         *p = v;
229     } else {
230         memcpy(p, &v, sizeof(v));
231     }
232 }
233 #endif
234 
235 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
236 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
237 {
238     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
239         *s->code_ptr++ = v;
240     } else {
241         tcg_insn_unit *p = s->code_ptr;
242         memcpy(p, &v, sizeof(v));
243         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
244     }
245 }
246 
247 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
248                                                        uint64_t v)
249 {
250     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
251         *p = v;
252     } else {
253         memcpy(p, &v, sizeof(v));
254     }
255 }
256 #endif
257 
258 /* label relocation processing */
259 
260 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
261                           TCGLabel *l, intptr_t addend)
262 {
263     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
264 
265     r->type = type;
266     r->ptr = code_ptr;
267     r->addend = addend;
268     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
269 }
270 
271 static void tcg_out_label(TCGContext *s, TCGLabel *l)
272 {
273     tcg_debug_assert(!l->has_value);
274     l->has_value = 1;
275     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
276 }
277 
278 TCGLabel *gen_new_label(void)
279 {
280     TCGContext *s = tcg_ctx;
281     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
282 
283     memset(l, 0, sizeof(TCGLabel));
284     l->id = s->nb_labels++;
285     QSIMPLEQ_INIT(&l->relocs);
286 
287     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
288 
289     return l;
290 }
291 
292 static bool tcg_resolve_relocs(TCGContext *s)
293 {
294     TCGLabel *l;
295 
296     QSIMPLEQ_FOREACH(l, &s->labels, next) {
297         TCGRelocation *r;
298         uintptr_t value = l->u.value;
299 
300         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
301             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
302                 return false;
303             }
304         }
305     }
306     return true;
307 }
308 
309 static void set_jmp_reset_offset(TCGContext *s, int which)
310 {
311     /*
312      * We will check for overflow at the end of the opcode loop in
313      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
314      */
315     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
316 }
317 
318 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
319 {
320     /*
321      * We will check for overflow at the end of the opcode loop in
322      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
323      */
324     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
325 }
326 
327 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
328 {
329     /*
330      * Return the read-execute version of the pointer, for the benefit
331      * of any pc-relative addressing mode.
332      */
333     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
334 }
335 
336 /* Signal overflow, starting over with fewer guest insns. */
337 static G_NORETURN
338 void tcg_raise_tb_overflow(TCGContext *s)
339 {
340     siglongjmp(s->jmp_trans, -2);
341 }
342 
343 #define C_PFX1(P, A)                    P##A
344 #define C_PFX2(P, A, B)                 P##A##_##B
345 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
346 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
347 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
348 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
349 
350 /* Define an enumeration for the various combinations. */
351 
352 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
353 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
354 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
355 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
356 
357 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
358 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
359 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
360 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
361 
362 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
363 
364 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
365 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
366 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
367 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
368 
369 typedef enum {
370 #include "tcg-target-con-set.h"
371 } TCGConstraintSetIndex;
372 
373 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
374 
375 #undef C_O0_I1
376 #undef C_O0_I2
377 #undef C_O0_I3
378 #undef C_O0_I4
379 #undef C_O1_I1
380 #undef C_O1_I2
381 #undef C_O1_I3
382 #undef C_O1_I4
383 #undef C_N1_I2
384 #undef C_O2_I1
385 #undef C_O2_I2
386 #undef C_O2_I3
387 #undef C_O2_I4
388 
389 /* Put all of the constraint sets into an array, indexed by the enum. */
390 
391 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
392 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
393 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
394 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
395 
396 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
397 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
398 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
399 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
400 
401 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
402 
403 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
404 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
405 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
406 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
407 
408 static const TCGTargetOpDef constraint_sets[] = {
409 #include "tcg-target-con-set.h"
410 };
411 
412 
413 #undef C_O0_I1
414 #undef C_O0_I2
415 #undef C_O0_I3
416 #undef C_O0_I4
417 #undef C_O1_I1
418 #undef C_O1_I2
419 #undef C_O1_I3
420 #undef C_O1_I4
421 #undef C_N1_I2
422 #undef C_O2_I1
423 #undef C_O2_I2
424 #undef C_O2_I3
425 #undef C_O2_I4
426 
427 /* Expand the enumerator to be returned from tcg_target_op_def(). */
428 
429 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
430 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
431 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
432 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
433 
434 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
435 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
436 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
437 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
438 
439 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
440 
441 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
442 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
443 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
444 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
445 
446 #include "tcg-target.c.inc"
447 
448 static void alloc_tcg_plugin_context(TCGContext *s)
449 {
450 #ifdef CONFIG_PLUGIN
451     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
452     s->plugin_tb->insns =
453         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
454 #endif
455 }
456 
457 /*
458  * All TCG threads except the parent (i.e. the one that called tcg_context_init
459  * and registered the target's TCG globals) must register with this function
460  * before initiating translation.
461  *
462  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
463  * of tcg_region_init() for the reasoning behind this.
464  *
465  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
466  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
467  * is not used anymore for translation once this function is called.
468  *
469  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
470  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
471  */
472 #ifdef CONFIG_USER_ONLY
473 void tcg_register_thread(void)
474 {
475     tcg_ctx = &tcg_init_ctx;
476 }
477 #else
478 void tcg_register_thread(void)
479 {
480     TCGContext *s = g_malloc(sizeof(*s));
481     unsigned int i, n;
482 
483     *s = tcg_init_ctx;
484 
485     /* Relink mem_base.  */
486     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
487         if (tcg_init_ctx.temps[i].mem_base) {
488             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
489             tcg_debug_assert(b >= 0 && b < n);
490             s->temps[i].mem_base = &s->temps[b];
491         }
492     }
493 
494     /* Claim an entry in tcg_ctxs */
495     n = qatomic_fetch_inc(&tcg_cur_ctxs);
496     g_assert(n < tcg_max_ctxs);
497     qatomic_set(&tcg_ctxs[n], s);
498 
499     if (n > 0) {
500         alloc_tcg_plugin_context(s);
501         tcg_region_initial_alloc(s);
502     }
503 
504     tcg_ctx = s;
505 }
506 #endif /* !CONFIG_USER_ONLY */
507 
508 /* pool based memory allocation */
509 void *tcg_malloc_internal(TCGContext *s, int size)
510 {
511     TCGPool *p;
512     int pool_size;
513 
514     if (size > TCG_POOL_CHUNK_SIZE) {
515         /* big malloc: insert a new pool (XXX: could optimize) */
516         p = g_malloc(sizeof(TCGPool) + size);
517         p->size = size;
518         p->next = s->pool_first_large;
519         s->pool_first_large = p;
520         return p->data;
521     } else {
522         p = s->pool_current;
523         if (!p) {
524             p = s->pool_first;
525             if (!p)
526                 goto new_pool;
527         } else {
528             if (!p->next) {
529             new_pool:
530                 pool_size = TCG_POOL_CHUNK_SIZE;
531                 p = g_malloc(sizeof(TCGPool) + pool_size);
532                 p->size = pool_size;
533                 p->next = NULL;
534                 if (s->pool_current) {
535                     s->pool_current->next = p;
536                 } else {
537                     s->pool_first = p;
538                 }
539             } else {
540                 p = p->next;
541             }
542         }
543     }
544     s->pool_current = p;
545     s->pool_cur = p->data + size;
546     s->pool_end = p->data + p->size;
547     return p->data;
548 }
549 
550 void tcg_pool_reset(TCGContext *s)
551 {
552     TCGPool *p, *t;
553     for (p = s->pool_first_large; p; p = t) {
554         t = p->next;
555         g_free(p);
556     }
557     s->pool_first_large = NULL;
558     s->pool_cur = s->pool_end = NULL;
559     s->pool_current = NULL;
560 }
561 
562 #include "exec/helper-proto.h"
563 
564 static TCGHelperInfo all_helpers[] = {
565 #include "exec/helper-tcg.h"
566 };
567 static GHashTable *helper_table;
568 
569 #ifdef CONFIG_TCG_INTERPRETER
570 static ffi_type *typecode_to_ffi(int argmask)
571 {
572     /*
573      * libffi does not support __int128_t, so we have forced Int128
574      * to use the structure definition instead of the builtin type.
575      */
576     static ffi_type *ffi_type_i128_elements[3] = {
577         &ffi_type_uint64,
578         &ffi_type_uint64,
579         NULL
580     };
581     static ffi_type ffi_type_i128 = {
582         .size = 16,
583         .alignment = __alignof__(Int128),
584         .type = FFI_TYPE_STRUCT,
585         .elements = ffi_type_i128_elements,
586     };
587 
588     switch (argmask) {
589     case dh_typecode_void:
590         return &ffi_type_void;
591     case dh_typecode_i32:
592         return &ffi_type_uint32;
593     case dh_typecode_s32:
594         return &ffi_type_sint32;
595     case dh_typecode_i64:
596         return &ffi_type_uint64;
597     case dh_typecode_s64:
598         return &ffi_type_sint64;
599     case dh_typecode_ptr:
600         return &ffi_type_pointer;
601     case dh_typecode_i128:
602         return &ffi_type_i128;
603     }
604     g_assert_not_reached();
605 }
606 
607 static void init_ffi_layouts(void)
608 {
609     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
610     GHashTable *ffi_table = g_hash_table_new(NULL, NULL);
611 
612     for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
613         TCGHelperInfo *info = &all_helpers[i];
614         unsigned typemask = info->typemask;
615         gpointer hash = (gpointer)(uintptr_t)typemask;
616         struct {
617             ffi_cif cif;
618             ffi_type *args[];
619         } *ca;
620         ffi_status status;
621         int nargs;
622         ffi_cif *cif;
623 
624         cif = g_hash_table_lookup(ffi_table, hash);
625         if (cif) {
626             info->cif = cif;
627             continue;
628         }
629 
630         /* Ignoring the return type, find the last non-zero field. */
631         nargs = 32 - clz32(typemask >> 3);
632         nargs = DIV_ROUND_UP(nargs, 3);
633         assert(nargs <= MAX_CALL_IARGS);
634 
635         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
636         ca->cif.rtype = typecode_to_ffi(typemask & 7);
637         ca->cif.nargs = nargs;
638 
639         if (nargs != 0) {
640             ca->cif.arg_types = ca->args;
641             for (int j = 0; j < nargs; ++j) {
642                 int typecode = extract32(typemask, (j + 1) * 3, 3);
643                 ca->args[j] = typecode_to_ffi(typecode);
644             }
645         }
646 
647         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
648                               ca->cif.rtype, ca->cif.arg_types);
649         assert(status == FFI_OK);
650 
651         cif = &ca->cif;
652         info->cif = cif;
653         g_hash_table_insert(ffi_table, hash, (gpointer)cif);
654     }
655 
656     g_hash_table_destroy(ffi_table);
657 }
658 #endif /* CONFIG_TCG_INTERPRETER */
659 
660 typedef struct TCGCumulativeArgs {
661     int arg_idx;                /* tcg_gen_callN args[] */
662     int info_in_idx;            /* TCGHelperInfo in[] */
663     int arg_slot;               /* regs+stack slot */
664     int ref_slot;               /* stack slots for references */
665 } TCGCumulativeArgs;
666 
667 static void layout_arg_even(TCGCumulativeArgs *cum)
668 {
669     cum->arg_slot += cum->arg_slot & 1;
670 }
671 
672 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
673                          TCGCallArgumentKind kind)
674 {
675     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
676 
677     *loc = (TCGCallArgumentLoc){
678         .kind = kind,
679         .arg_idx = cum->arg_idx,
680         .arg_slot = cum->arg_slot,
681     };
682     cum->info_in_idx++;
683     cum->arg_slot++;
684 }
685 
686 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
687                                 TCGHelperInfo *info, int n)
688 {
689     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
690 
691     for (int i = 0; i < n; ++i) {
692         /* Layout all using the same arg_idx, adjusting the subindex. */
693         loc[i] = (TCGCallArgumentLoc){
694             .kind = TCG_CALL_ARG_NORMAL,
695             .arg_idx = cum->arg_idx,
696             .tmp_subindex = i,
697             .arg_slot = cum->arg_slot + i,
698         };
699     }
700     cum->info_in_idx += n;
701     cum->arg_slot += n;
702 }
703 
704 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
705 {
706     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
707     int n = 128 / TCG_TARGET_REG_BITS;
708 
709     /* The first subindex carries the pointer. */
710     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
711 
712     /*
713      * The callee is allowed to clobber memory associated with
714      * structure pass by-reference.  Therefore we must make copies.
715      * Allocate space from "ref_slot", which will be adjusted to
716      * follow the parameters on the stack.
717      */
718     loc[0].ref_slot = cum->ref_slot;
719 
720     /*
721      * Subsequent words also go into the reference slot, but
722      * do not accumulate into the regular arguments.
723      */
724     for (int i = 1; i < n; ++i) {
725         loc[i] = (TCGCallArgumentLoc){
726             .kind = TCG_CALL_ARG_BY_REF_N,
727             .arg_idx = cum->arg_idx,
728             .tmp_subindex = i,
729             .ref_slot = cum->ref_slot + i,
730         };
731     }
732     cum->info_in_idx += n;
733     cum->ref_slot += n;
734 }
735 
736 static void init_call_layout(TCGHelperInfo *info)
737 {
738     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
739     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
740     unsigned typemask = info->typemask;
741     unsigned typecode;
742     TCGCumulativeArgs cum = { };
743 
744     /*
745      * Parse and place any function return value.
746      */
747     typecode = typemask & 7;
748     switch (typecode) {
749     case dh_typecode_void:
750         info->nr_out = 0;
751         break;
752     case dh_typecode_i32:
753     case dh_typecode_s32:
754     case dh_typecode_ptr:
755         info->nr_out = 1;
756         info->out_kind = TCG_CALL_RET_NORMAL;
757         break;
758     case dh_typecode_i64:
759     case dh_typecode_s64:
760         info->nr_out = 64 / TCG_TARGET_REG_BITS;
761         info->out_kind = TCG_CALL_RET_NORMAL;
762         /* Query the last register now to trigger any assert early. */
763         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
764         break;
765     case dh_typecode_i128:
766         info->nr_out = 128 / TCG_TARGET_REG_BITS;
767         info->out_kind = TCG_TARGET_CALL_RET_I128;
768         switch (TCG_TARGET_CALL_RET_I128) {
769         case TCG_CALL_RET_NORMAL:
770             /* Query the last register now to trigger any assert early. */
771             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
772             break;
773         case TCG_CALL_RET_BY_VEC:
774             /* Query the single register now to trigger any assert early. */
775             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
776             break;
777         case TCG_CALL_RET_BY_REF:
778             /*
779              * Allocate the first argument to the output.
780              * We don't need to store this anywhere, just make it
781              * unavailable for use in the input loop below.
782              */
783             cum.arg_slot = 1;
784             break;
785         default:
786             qemu_build_not_reached();
787         }
788         break;
789     default:
790         g_assert_not_reached();
791     }
792 
793     /*
794      * Parse and place function arguments.
795      */
796     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
797         TCGCallArgumentKind kind;
798         TCGType type;
799 
800         typecode = typemask & 7;
801         switch (typecode) {
802         case dh_typecode_i32:
803         case dh_typecode_s32:
804             type = TCG_TYPE_I32;
805             break;
806         case dh_typecode_i64:
807         case dh_typecode_s64:
808             type = TCG_TYPE_I64;
809             break;
810         case dh_typecode_ptr:
811             type = TCG_TYPE_PTR;
812             break;
813         case dh_typecode_i128:
814             type = TCG_TYPE_I128;
815             break;
816         default:
817             g_assert_not_reached();
818         }
819 
820         switch (type) {
821         case TCG_TYPE_I32:
822             switch (TCG_TARGET_CALL_ARG_I32) {
823             case TCG_CALL_ARG_EVEN:
824                 layout_arg_even(&cum);
825                 /* fall through */
826             case TCG_CALL_ARG_NORMAL:
827                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
828                 break;
829             case TCG_CALL_ARG_EXTEND:
830                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
831                 layout_arg_1(&cum, info, kind);
832                 break;
833             default:
834                 qemu_build_not_reached();
835             }
836             break;
837 
838         case TCG_TYPE_I64:
839             switch (TCG_TARGET_CALL_ARG_I64) {
840             case TCG_CALL_ARG_EVEN:
841                 layout_arg_even(&cum);
842                 /* fall through */
843             case TCG_CALL_ARG_NORMAL:
844                 if (TCG_TARGET_REG_BITS == 32) {
845                     layout_arg_normal_n(&cum, info, 2);
846                 } else {
847                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
848                 }
849                 break;
850             default:
851                 qemu_build_not_reached();
852             }
853             break;
854 
855         case TCG_TYPE_I128:
856             switch (TCG_TARGET_CALL_ARG_I128) {
857             case TCG_CALL_ARG_EVEN:
858                 layout_arg_even(&cum);
859                 /* fall through */
860             case TCG_CALL_ARG_NORMAL:
861                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
862                 break;
863             case TCG_CALL_ARG_BY_REF:
864                 layout_arg_by_ref(&cum, info);
865                 break;
866             default:
867                 qemu_build_not_reached();
868             }
869             break;
870 
871         default:
872             g_assert_not_reached();
873         }
874     }
875     info->nr_in = cum.info_in_idx;
876 
877     /* Validate that we didn't overrun the input array. */
878     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
879     /* Validate the backend has enough argument space. */
880     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
881 
882     /*
883      * Relocate the "ref_slot" area to the end of the parameters.
884      * Minimizing this stack offset helps code size for x86,
885      * which has a signed 8-bit offset encoding.
886      */
887     if (cum.ref_slot != 0) {
888         int ref_base = 0;
889 
890         if (cum.arg_slot > max_reg_slots) {
891             int align = __alignof(Int128) / sizeof(tcg_target_long);
892 
893             ref_base = cum.arg_slot - max_reg_slots;
894             if (align > 1) {
895                 ref_base = ROUND_UP(ref_base, align);
896             }
897         }
898         assert(ref_base + cum.ref_slot <= max_stk_slots);
899 
900         if (ref_base != 0) {
901             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
902                 TCGCallArgumentLoc *loc = &info->in[i];
903                 switch (loc->kind) {
904                 case TCG_CALL_ARG_BY_REF:
905                 case TCG_CALL_ARG_BY_REF_N:
906                     loc->ref_slot += ref_base;
907                     break;
908                 default:
909                     break;
910                 }
911             }
912         }
913     }
914 }
915 
916 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
917 static void process_op_defs(TCGContext *s);
918 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
919                                             TCGReg reg, const char *name);
920 
921 static void tcg_context_init(unsigned max_cpus)
922 {
923     TCGContext *s = &tcg_init_ctx;
924     int op, total_args, n, i;
925     TCGOpDef *def;
926     TCGArgConstraint *args_ct;
927     TCGTemp *ts;
928 
929     memset(s, 0, sizeof(*s));
930     s->nb_globals = 0;
931 
932     /* Count total number of arguments and allocate the corresponding
933        space */
934     total_args = 0;
935     for(op = 0; op < NB_OPS; op++) {
936         def = &tcg_op_defs[op];
937         n = def->nb_iargs + def->nb_oargs;
938         total_args += n;
939     }
940 
941     args_ct = g_new0(TCGArgConstraint, total_args);
942 
943     for(op = 0; op < NB_OPS; op++) {
944         def = &tcg_op_defs[op];
945         def->args_ct = args_ct;
946         n = def->nb_iargs + def->nb_oargs;
947         args_ct += n;
948     }
949 
950     /* Register helpers.  */
951     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
952     helper_table = g_hash_table_new(NULL, NULL);
953 
954     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
955         init_call_layout(&all_helpers[i]);
956         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
957                             (gpointer)&all_helpers[i]);
958     }
959 
960 #ifdef CONFIG_TCG_INTERPRETER
961     init_ffi_layouts();
962 #endif
963 
964     tcg_target_init(s);
965     process_op_defs(s);
966 
967     /* Reverse the order of the saved registers, assuming they're all at
968        the start of tcg_target_reg_alloc_order.  */
969     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
970         int r = tcg_target_reg_alloc_order[n];
971         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
972             break;
973         }
974     }
975     for (i = 0; i < n; ++i) {
976         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
977     }
978     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
979         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
980     }
981 
982     alloc_tcg_plugin_context(s);
983 
984     tcg_ctx = s;
985     /*
986      * In user-mode we simply share the init context among threads, since we
987      * use a single region. See the documentation tcg_region_init() for the
988      * reasoning behind this.
989      * In softmmu we will have at most max_cpus TCG threads.
990      */
991 #ifdef CONFIG_USER_ONLY
992     tcg_ctxs = &tcg_ctx;
993     tcg_cur_ctxs = 1;
994     tcg_max_ctxs = 1;
995 #else
996     tcg_max_ctxs = max_cpus;
997     tcg_ctxs = g_new0(TCGContext *, max_cpus);
998 #endif
999 
1000     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1001     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1002     cpu_env = temp_tcgv_ptr(ts);
1003 }
1004 
1005 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1006 {
1007     tcg_context_init(max_cpus);
1008     tcg_region_init(tb_size, splitwx, max_cpus);
1009 }
1010 
1011 /*
1012  * Allocate TBs right before their corresponding translated code, making
1013  * sure that TBs and code are on different cache lines.
1014  */
1015 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1016 {
1017     uintptr_t align = qemu_icache_linesize;
1018     TranslationBlock *tb;
1019     void *next;
1020 
1021  retry:
1022     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1023     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1024 
1025     if (unlikely(next > s->code_gen_highwater)) {
1026         if (tcg_region_alloc(s)) {
1027             return NULL;
1028         }
1029         goto retry;
1030     }
1031     qatomic_set(&s->code_gen_ptr, next);
1032     s->data_gen_ptr = NULL;
1033     return tb;
1034 }
1035 
1036 void tcg_prologue_init(TCGContext *s)
1037 {
1038     size_t prologue_size;
1039 
1040     s->code_ptr = s->code_gen_ptr;
1041     s->code_buf = s->code_gen_ptr;
1042     s->data_gen_ptr = NULL;
1043 
1044 #ifndef CONFIG_TCG_INTERPRETER
1045     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1046 #endif
1047 
1048 #ifdef TCG_TARGET_NEED_POOL_LABELS
1049     s->pool_labels = NULL;
1050 #endif
1051 
1052     qemu_thread_jit_write();
1053     /* Generate the prologue.  */
1054     tcg_target_qemu_prologue(s);
1055 
1056 #ifdef TCG_TARGET_NEED_POOL_LABELS
1057     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1058     {
1059         int result = tcg_out_pool_finalize(s);
1060         tcg_debug_assert(result == 0);
1061     }
1062 #endif
1063 
1064     prologue_size = tcg_current_code_size(s);
1065     perf_report_prologue(s->code_gen_ptr, prologue_size);
1066 
1067 #ifndef CONFIG_TCG_INTERPRETER
1068     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1069                         (uintptr_t)s->code_buf, prologue_size);
1070 #endif
1071 
1072 #ifdef DEBUG_DISAS
1073     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1074         FILE *logfile = qemu_log_trylock();
1075         if (logfile) {
1076             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1077             if (s->data_gen_ptr) {
1078                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1079                 size_t data_size = prologue_size - code_size;
1080                 size_t i;
1081 
1082                 disas(logfile, s->code_gen_ptr, code_size);
1083 
1084                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1085                     if (sizeof(tcg_target_ulong) == 8) {
1086                         fprintf(logfile,
1087                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1088                                 (uintptr_t)s->data_gen_ptr + i,
1089                                 *(uint64_t *)(s->data_gen_ptr + i));
1090                     } else {
1091                         fprintf(logfile,
1092                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1093                                 (uintptr_t)s->data_gen_ptr + i,
1094                                 *(uint32_t *)(s->data_gen_ptr + i));
1095                     }
1096                 }
1097             } else {
1098                 disas(logfile, s->code_gen_ptr, prologue_size);
1099             }
1100             fprintf(logfile, "\n");
1101             qemu_log_unlock(logfile);
1102         }
1103     }
1104 #endif
1105 
1106 #ifndef CONFIG_TCG_INTERPRETER
1107     /*
1108      * Assert that goto_ptr is implemented completely, setting an epilogue.
1109      * For tci, we use NULL as the signal to return from the interpreter,
1110      * so skip this check.
1111      */
1112     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1113 #endif
1114 
1115     tcg_region_prologue_set(s);
1116 }
1117 
1118 void tcg_func_start(TCGContext *s)
1119 {
1120     tcg_pool_reset(s);
1121     s->nb_temps = s->nb_globals;
1122 
1123     /* No temps have been previously allocated for size or locality.  */
1124     memset(s->free_temps, 0, sizeof(s->free_temps));
1125 
1126     /* No constant temps have been previously allocated. */
1127     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1128         if (s->const_table[i]) {
1129             g_hash_table_remove_all(s->const_table[i]);
1130         }
1131     }
1132 
1133     s->nb_ops = 0;
1134     s->nb_labels = 0;
1135     s->current_frame_offset = s->frame_start;
1136 
1137 #ifdef CONFIG_DEBUG_TCG
1138     s->goto_tb_issue_mask = 0;
1139 #endif
1140 
1141     QTAILQ_INIT(&s->ops);
1142     QTAILQ_INIT(&s->free_ops);
1143     QSIMPLEQ_INIT(&s->labels);
1144 }
1145 
1146 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1147 {
1148     int n = s->nb_temps++;
1149 
1150     if (n >= TCG_MAX_TEMPS) {
1151         tcg_raise_tb_overflow(s);
1152     }
1153     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1154 }
1155 
1156 static TCGTemp *tcg_global_alloc(TCGContext *s)
1157 {
1158     TCGTemp *ts;
1159 
1160     tcg_debug_assert(s->nb_globals == s->nb_temps);
1161     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1162     s->nb_globals++;
1163     ts = tcg_temp_alloc(s);
1164     ts->kind = TEMP_GLOBAL;
1165 
1166     return ts;
1167 }
1168 
1169 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1170                                             TCGReg reg, const char *name)
1171 {
1172     TCGTemp *ts;
1173 
1174     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1175         tcg_abort();
1176     }
1177 
1178     ts = tcg_global_alloc(s);
1179     ts->base_type = type;
1180     ts->type = type;
1181     ts->kind = TEMP_FIXED;
1182     ts->reg = reg;
1183     ts->name = name;
1184     tcg_regset_set_reg(s->reserved_regs, reg);
1185 
1186     return ts;
1187 }
1188 
1189 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1190 {
1191     s->frame_start = start;
1192     s->frame_end = start + size;
1193     s->frame_temp
1194         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1195 }
1196 
1197 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1198                                      intptr_t offset, const char *name)
1199 {
1200     TCGContext *s = tcg_ctx;
1201     TCGTemp *base_ts = tcgv_ptr_temp(base);
1202     TCGTemp *ts = tcg_global_alloc(s);
1203     int indirect_reg = 0;
1204 
1205     switch (base_ts->kind) {
1206     case TEMP_FIXED:
1207         break;
1208     case TEMP_GLOBAL:
1209         /* We do not support double-indirect registers.  */
1210         tcg_debug_assert(!base_ts->indirect_reg);
1211         base_ts->indirect_base = 1;
1212         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1213                             ? 2 : 1);
1214         indirect_reg = 1;
1215         break;
1216     default:
1217         g_assert_not_reached();
1218     }
1219 
1220     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1221         TCGTemp *ts2 = tcg_global_alloc(s);
1222         char buf[64];
1223 
1224         ts->base_type = TCG_TYPE_I64;
1225         ts->type = TCG_TYPE_I32;
1226         ts->indirect_reg = indirect_reg;
1227         ts->mem_allocated = 1;
1228         ts->mem_base = base_ts;
1229         ts->mem_offset = offset;
1230         pstrcpy(buf, sizeof(buf), name);
1231         pstrcat(buf, sizeof(buf), "_0");
1232         ts->name = strdup(buf);
1233 
1234         tcg_debug_assert(ts2 == ts + 1);
1235         ts2->base_type = TCG_TYPE_I64;
1236         ts2->type = TCG_TYPE_I32;
1237         ts2->indirect_reg = indirect_reg;
1238         ts2->mem_allocated = 1;
1239         ts2->mem_base = base_ts;
1240         ts2->mem_offset = offset + 4;
1241         ts2->temp_subindex = 1;
1242         pstrcpy(buf, sizeof(buf), name);
1243         pstrcat(buf, sizeof(buf), "_1");
1244         ts2->name = strdup(buf);
1245     } else {
1246         ts->base_type = type;
1247         ts->type = type;
1248         ts->indirect_reg = indirect_reg;
1249         ts->mem_allocated = 1;
1250         ts->mem_base = base_ts;
1251         ts->mem_offset = offset;
1252         ts->name = name;
1253     }
1254     return ts;
1255 }
1256 
1257 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1258 {
1259     TCGContext *s = tcg_ctx;
1260     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1261     TCGTemp *ts;
1262     int idx, k;
1263 
1264     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1265     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1266     if (idx < TCG_MAX_TEMPS) {
1267         /* There is already an available temp with the right type.  */
1268         clear_bit(idx, s->free_temps[k].l);
1269 
1270         ts = &s->temps[idx];
1271         ts->temp_allocated = 1;
1272         tcg_debug_assert(ts->base_type == type);
1273         tcg_debug_assert(ts->kind == kind);
1274     } else {
1275         int i, n;
1276 
1277         switch (type) {
1278         case TCG_TYPE_I32:
1279         case TCG_TYPE_V64:
1280         case TCG_TYPE_V128:
1281         case TCG_TYPE_V256:
1282             n = 1;
1283             break;
1284         case TCG_TYPE_I64:
1285             n = 64 / TCG_TARGET_REG_BITS;
1286             break;
1287         case TCG_TYPE_I128:
1288             n = 128 / TCG_TARGET_REG_BITS;
1289             break;
1290         default:
1291             g_assert_not_reached();
1292         }
1293 
1294         ts = tcg_temp_alloc(s);
1295         ts->base_type = type;
1296         ts->temp_allocated = 1;
1297         ts->kind = kind;
1298 
1299         if (n == 1) {
1300             ts->type = type;
1301         } else {
1302             ts->type = TCG_TYPE_REG;
1303 
1304             for (i = 1; i < n; ++i) {
1305                 TCGTemp *ts2 = tcg_temp_alloc(s);
1306 
1307                 tcg_debug_assert(ts2 == ts + i);
1308                 ts2->base_type = type;
1309                 ts2->type = TCG_TYPE_REG;
1310                 ts2->temp_allocated = 1;
1311                 ts2->temp_subindex = i;
1312                 ts2->kind = kind;
1313             }
1314         }
1315     }
1316 
1317 #if defined(CONFIG_DEBUG_TCG)
1318     s->temps_in_use++;
1319 #endif
1320     return ts;
1321 }
1322 
1323 TCGv_vec tcg_temp_new_vec(TCGType type)
1324 {
1325     TCGTemp *t;
1326 
1327 #ifdef CONFIG_DEBUG_TCG
1328     switch (type) {
1329     case TCG_TYPE_V64:
1330         assert(TCG_TARGET_HAS_v64);
1331         break;
1332     case TCG_TYPE_V128:
1333         assert(TCG_TARGET_HAS_v128);
1334         break;
1335     case TCG_TYPE_V256:
1336         assert(TCG_TARGET_HAS_v256);
1337         break;
1338     default:
1339         g_assert_not_reached();
1340     }
1341 #endif
1342 
1343     t = tcg_temp_new_internal(type, 0);
1344     return temp_tcgv_vec(t);
1345 }
1346 
1347 /* Create a new temp of the same type as an existing temp.  */
1348 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1349 {
1350     TCGTemp *t = tcgv_vec_temp(match);
1351 
1352     tcg_debug_assert(t->temp_allocated != 0);
1353 
1354     t = tcg_temp_new_internal(t->base_type, 0);
1355     return temp_tcgv_vec(t);
1356 }
1357 
1358 void tcg_temp_free_internal(TCGTemp *ts)
1359 {
1360     TCGContext *s = tcg_ctx;
1361     int k, idx;
1362 
1363     switch (ts->kind) {
1364     case TEMP_CONST:
1365         /*
1366          * In order to simplify users of tcg_constant_*,
1367          * silently ignore free.
1368          */
1369         return;
1370     case TEMP_NORMAL:
1371     case TEMP_LOCAL:
1372         break;
1373     default:
1374         g_assert_not_reached();
1375     }
1376 
1377     tcg_debug_assert(ts->temp_allocated != 0);
1378     ts->temp_allocated = 0;
1379 
1380 #if defined(CONFIG_DEBUG_TCG)
1381     assert(s->temps_in_use > 0);
1382     s->temps_in_use--;
1383 #endif
1384 
1385     idx = temp_idx(ts);
1386     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1387     set_bit(idx, s->free_temps[k].l);
1388 }
1389 
1390 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1391 {
1392     TCGContext *s = tcg_ctx;
1393     GHashTable *h = s->const_table[type];
1394     TCGTemp *ts;
1395 
1396     if (h == NULL) {
1397         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1398         s->const_table[type] = h;
1399     }
1400 
1401     ts = g_hash_table_lookup(h, &val);
1402     if (ts == NULL) {
1403         int64_t *val_ptr;
1404 
1405         ts = tcg_temp_alloc(s);
1406 
1407         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1408             TCGTemp *ts2 = tcg_temp_alloc(s);
1409 
1410             tcg_debug_assert(ts2 == ts + 1);
1411 
1412             ts->base_type = TCG_TYPE_I64;
1413             ts->type = TCG_TYPE_I32;
1414             ts->kind = TEMP_CONST;
1415             ts->temp_allocated = 1;
1416 
1417             ts2->base_type = TCG_TYPE_I64;
1418             ts2->type = TCG_TYPE_I32;
1419             ts2->kind = TEMP_CONST;
1420             ts2->temp_allocated = 1;
1421             ts2->temp_subindex = 1;
1422 
1423             /*
1424              * Retain the full value of the 64-bit constant in the low
1425              * part, so that the hash table works.  Actual uses will
1426              * truncate the value to the low part.
1427              */
1428             ts[HOST_BIG_ENDIAN].val = val;
1429             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1430             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1431         } else {
1432             ts->base_type = type;
1433             ts->type = type;
1434             ts->kind = TEMP_CONST;
1435             ts->temp_allocated = 1;
1436             ts->val = val;
1437             val_ptr = &ts->val;
1438         }
1439         g_hash_table_insert(h, val_ptr, ts);
1440     }
1441 
1442     return ts;
1443 }
1444 
1445 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1446 {
1447     val = dup_const(vece, val);
1448     return temp_tcgv_vec(tcg_constant_internal(type, val));
1449 }
1450 
1451 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1452 {
1453     TCGTemp *t = tcgv_vec_temp(match);
1454 
1455     tcg_debug_assert(t->temp_allocated != 0);
1456     return tcg_constant_vec(t->base_type, vece, val);
1457 }
1458 
1459 TCGv_i32 tcg_const_i32(int32_t val)
1460 {
1461     TCGv_i32 t0;
1462     t0 = tcg_temp_new_i32();
1463     tcg_gen_movi_i32(t0, val);
1464     return t0;
1465 }
1466 
1467 TCGv_i64 tcg_const_i64(int64_t val)
1468 {
1469     TCGv_i64 t0;
1470     t0 = tcg_temp_new_i64();
1471     tcg_gen_movi_i64(t0, val);
1472     return t0;
1473 }
1474 
1475 TCGv_i32 tcg_const_local_i32(int32_t val)
1476 {
1477     TCGv_i32 t0;
1478     t0 = tcg_temp_local_new_i32();
1479     tcg_gen_movi_i32(t0, val);
1480     return t0;
1481 }
1482 
1483 TCGv_i64 tcg_const_local_i64(int64_t val)
1484 {
1485     TCGv_i64 t0;
1486     t0 = tcg_temp_local_new_i64();
1487     tcg_gen_movi_i64(t0, val);
1488     return t0;
1489 }
1490 
1491 #if defined(CONFIG_DEBUG_TCG)
1492 void tcg_clear_temp_count(void)
1493 {
1494     TCGContext *s = tcg_ctx;
1495     s->temps_in_use = 0;
1496 }
1497 
1498 int tcg_check_temp_count(void)
1499 {
1500     TCGContext *s = tcg_ctx;
1501     if (s->temps_in_use) {
1502         /* Clear the count so that we don't give another
1503          * warning immediately next time around.
1504          */
1505         s->temps_in_use = 0;
1506         return 1;
1507     }
1508     return 0;
1509 }
1510 #endif
1511 
1512 /* Return true if OP may appear in the opcode stream.
1513    Test the runtime variable that controls each opcode.  */
1514 bool tcg_op_supported(TCGOpcode op)
1515 {
1516     const bool have_vec
1517         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1518 
1519     switch (op) {
1520     case INDEX_op_discard:
1521     case INDEX_op_set_label:
1522     case INDEX_op_call:
1523     case INDEX_op_br:
1524     case INDEX_op_mb:
1525     case INDEX_op_insn_start:
1526     case INDEX_op_exit_tb:
1527     case INDEX_op_goto_tb:
1528     case INDEX_op_goto_ptr:
1529     case INDEX_op_qemu_ld_i32:
1530     case INDEX_op_qemu_st_i32:
1531     case INDEX_op_qemu_ld_i64:
1532     case INDEX_op_qemu_st_i64:
1533         return true;
1534 
1535     case INDEX_op_qemu_st8_i32:
1536         return TCG_TARGET_HAS_qemu_st8_i32;
1537 
1538     case INDEX_op_mov_i32:
1539     case INDEX_op_setcond_i32:
1540     case INDEX_op_brcond_i32:
1541     case INDEX_op_ld8u_i32:
1542     case INDEX_op_ld8s_i32:
1543     case INDEX_op_ld16u_i32:
1544     case INDEX_op_ld16s_i32:
1545     case INDEX_op_ld_i32:
1546     case INDEX_op_st8_i32:
1547     case INDEX_op_st16_i32:
1548     case INDEX_op_st_i32:
1549     case INDEX_op_add_i32:
1550     case INDEX_op_sub_i32:
1551     case INDEX_op_mul_i32:
1552     case INDEX_op_and_i32:
1553     case INDEX_op_or_i32:
1554     case INDEX_op_xor_i32:
1555     case INDEX_op_shl_i32:
1556     case INDEX_op_shr_i32:
1557     case INDEX_op_sar_i32:
1558         return true;
1559 
1560     case INDEX_op_movcond_i32:
1561         return TCG_TARGET_HAS_movcond_i32;
1562     case INDEX_op_div_i32:
1563     case INDEX_op_divu_i32:
1564         return TCG_TARGET_HAS_div_i32;
1565     case INDEX_op_rem_i32:
1566     case INDEX_op_remu_i32:
1567         return TCG_TARGET_HAS_rem_i32;
1568     case INDEX_op_div2_i32:
1569     case INDEX_op_divu2_i32:
1570         return TCG_TARGET_HAS_div2_i32;
1571     case INDEX_op_rotl_i32:
1572     case INDEX_op_rotr_i32:
1573         return TCG_TARGET_HAS_rot_i32;
1574     case INDEX_op_deposit_i32:
1575         return TCG_TARGET_HAS_deposit_i32;
1576     case INDEX_op_extract_i32:
1577         return TCG_TARGET_HAS_extract_i32;
1578     case INDEX_op_sextract_i32:
1579         return TCG_TARGET_HAS_sextract_i32;
1580     case INDEX_op_extract2_i32:
1581         return TCG_TARGET_HAS_extract2_i32;
1582     case INDEX_op_add2_i32:
1583         return TCG_TARGET_HAS_add2_i32;
1584     case INDEX_op_sub2_i32:
1585         return TCG_TARGET_HAS_sub2_i32;
1586     case INDEX_op_mulu2_i32:
1587         return TCG_TARGET_HAS_mulu2_i32;
1588     case INDEX_op_muls2_i32:
1589         return TCG_TARGET_HAS_muls2_i32;
1590     case INDEX_op_muluh_i32:
1591         return TCG_TARGET_HAS_muluh_i32;
1592     case INDEX_op_mulsh_i32:
1593         return TCG_TARGET_HAS_mulsh_i32;
1594     case INDEX_op_ext8s_i32:
1595         return TCG_TARGET_HAS_ext8s_i32;
1596     case INDEX_op_ext16s_i32:
1597         return TCG_TARGET_HAS_ext16s_i32;
1598     case INDEX_op_ext8u_i32:
1599         return TCG_TARGET_HAS_ext8u_i32;
1600     case INDEX_op_ext16u_i32:
1601         return TCG_TARGET_HAS_ext16u_i32;
1602     case INDEX_op_bswap16_i32:
1603         return TCG_TARGET_HAS_bswap16_i32;
1604     case INDEX_op_bswap32_i32:
1605         return TCG_TARGET_HAS_bswap32_i32;
1606     case INDEX_op_not_i32:
1607         return TCG_TARGET_HAS_not_i32;
1608     case INDEX_op_neg_i32:
1609         return TCG_TARGET_HAS_neg_i32;
1610     case INDEX_op_andc_i32:
1611         return TCG_TARGET_HAS_andc_i32;
1612     case INDEX_op_orc_i32:
1613         return TCG_TARGET_HAS_orc_i32;
1614     case INDEX_op_eqv_i32:
1615         return TCG_TARGET_HAS_eqv_i32;
1616     case INDEX_op_nand_i32:
1617         return TCG_TARGET_HAS_nand_i32;
1618     case INDEX_op_nor_i32:
1619         return TCG_TARGET_HAS_nor_i32;
1620     case INDEX_op_clz_i32:
1621         return TCG_TARGET_HAS_clz_i32;
1622     case INDEX_op_ctz_i32:
1623         return TCG_TARGET_HAS_ctz_i32;
1624     case INDEX_op_ctpop_i32:
1625         return TCG_TARGET_HAS_ctpop_i32;
1626 
1627     case INDEX_op_brcond2_i32:
1628     case INDEX_op_setcond2_i32:
1629         return TCG_TARGET_REG_BITS == 32;
1630 
1631     case INDEX_op_mov_i64:
1632     case INDEX_op_setcond_i64:
1633     case INDEX_op_brcond_i64:
1634     case INDEX_op_ld8u_i64:
1635     case INDEX_op_ld8s_i64:
1636     case INDEX_op_ld16u_i64:
1637     case INDEX_op_ld16s_i64:
1638     case INDEX_op_ld32u_i64:
1639     case INDEX_op_ld32s_i64:
1640     case INDEX_op_ld_i64:
1641     case INDEX_op_st8_i64:
1642     case INDEX_op_st16_i64:
1643     case INDEX_op_st32_i64:
1644     case INDEX_op_st_i64:
1645     case INDEX_op_add_i64:
1646     case INDEX_op_sub_i64:
1647     case INDEX_op_mul_i64:
1648     case INDEX_op_and_i64:
1649     case INDEX_op_or_i64:
1650     case INDEX_op_xor_i64:
1651     case INDEX_op_shl_i64:
1652     case INDEX_op_shr_i64:
1653     case INDEX_op_sar_i64:
1654     case INDEX_op_ext_i32_i64:
1655     case INDEX_op_extu_i32_i64:
1656         return TCG_TARGET_REG_BITS == 64;
1657 
1658     case INDEX_op_movcond_i64:
1659         return TCG_TARGET_HAS_movcond_i64;
1660     case INDEX_op_div_i64:
1661     case INDEX_op_divu_i64:
1662         return TCG_TARGET_HAS_div_i64;
1663     case INDEX_op_rem_i64:
1664     case INDEX_op_remu_i64:
1665         return TCG_TARGET_HAS_rem_i64;
1666     case INDEX_op_div2_i64:
1667     case INDEX_op_divu2_i64:
1668         return TCG_TARGET_HAS_div2_i64;
1669     case INDEX_op_rotl_i64:
1670     case INDEX_op_rotr_i64:
1671         return TCG_TARGET_HAS_rot_i64;
1672     case INDEX_op_deposit_i64:
1673         return TCG_TARGET_HAS_deposit_i64;
1674     case INDEX_op_extract_i64:
1675         return TCG_TARGET_HAS_extract_i64;
1676     case INDEX_op_sextract_i64:
1677         return TCG_TARGET_HAS_sextract_i64;
1678     case INDEX_op_extract2_i64:
1679         return TCG_TARGET_HAS_extract2_i64;
1680     case INDEX_op_extrl_i64_i32:
1681         return TCG_TARGET_HAS_extrl_i64_i32;
1682     case INDEX_op_extrh_i64_i32:
1683         return TCG_TARGET_HAS_extrh_i64_i32;
1684     case INDEX_op_ext8s_i64:
1685         return TCG_TARGET_HAS_ext8s_i64;
1686     case INDEX_op_ext16s_i64:
1687         return TCG_TARGET_HAS_ext16s_i64;
1688     case INDEX_op_ext32s_i64:
1689         return TCG_TARGET_HAS_ext32s_i64;
1690     case INDEX_op_ext8u_i64:
1691         return TCG_TARGET_HAS_ext8u_i64;
1692     case INDEX_op_ext16u_i64:
1693         return TCG_TARGET_HAS_ext16u_i64;
1694     case INDEX_op_ext32u_i64:
1695         return TCG_TARGET_HAS_ext32u_i64;
1696     case INDEX_op_bswap16_i64:
1697         return TCG_TARGET_HAS_bswap16_i64;
1698     case INDEX_op_bswap32_i64:
1699         return TCG_TARGET_HAS_bswap32_i64;
1700     case INDEX_op_bswap64_i64:
1701         return TCG_TARGET_HAS_bswap64_i64;
1702     case INDEX_op_not_i64:
1703         return TCG_TARGET_HAS_not_i64;
1704     case INDEX_op_neg_i64:
1705         return TCG_TARGET_HAS_neg_i64;
1706     case INDEX_op_andc_i64:
1707         return TCG_TARGET_HAS_andc_i64;
1708     case INDEX_op_orc_i64:
1709         return TCG_TARGET_HAS_orc_i64;
1710     case INDEX_op_eqv_i64:
1711         return TCG_TARGET_HAS_eqv_i64;
1712     case INDEX_op_nand_i64:
1713         return TCG_TARGET_HAS_nand_i64;
1714     case INDEX_op_nor_i64:
1715         return TCG_TARGET_HAS_nor_i64;
1716     case INDEX_op_clz_i64:
1717         return TCG_TARGET_HAS_clz_i64;
1718     case INDEX_op_ctz_i64:
1719         return TCG_TARGET_HAS_ctz_i64;
1720     case INDEX_op_ctpop_i64:
1721         return TCG_TARGET_HAS_ctpop_i64;
1722     case INDEX_op_add2_i64:
1723         return TCG_TARGET_HAS_add2_i64;
1724     case INDEX_op_sub2_i64:
1725         return TCG_TARGET_HAS_sub2_i64;
1726     case INDEX_op_mulu2_i64:
1727         return TCG_TARGET_HAS_mulu2_i64;
1728     case INDEX_op_muls2_i64:
1729         return TCG_TARGET_HAS_muls2_i64;
1730     case INDEX_op_muluh_i64:
1731         return TCG_TARGET_HAS_muluh_i64;
1732     case INDEX_op_mulsh_i64:
1733         return TCG_TARGET_HAS_mulsh_i64;
1734 
1735     case INDEX_op_mov_vec:
1736     case INDEX_op_dup_vec:
1737     case INDEX_op_dupm_vec:
1738     case INDEX_op_ld_vec:
1739     case INDEX_op_st_vec:
1740     case INDEX_op_add_vec:
1741     case INDEX_op_sub_vec:
1742     case INDEX_op_and_vec:
1743     case INDEX_op_or_vec:
1744     case INDEX_op_xor_vec:
1745     case INDEX_op_cmp_vec:
1746         return have_vec;
1747     case INDEX_op_dup2_vec:
1748         return have_vec && TCG_TARGET_REG_BITS == 32;
1749     case INDEX_op_not_vec:
1750         return have_vec && TCG_TARGET_HAS_not_vec;
1751     case INDEX_op_neg_vec:
1752         return have_vec && TCG_TARGET_HAS_neg_vec;
1753     case INDEX_op_abs_vec:
1754         return have_vec && TCG_TARGET_HAS_abs_vec;
1755     case INDEX_op_andc_vec:
1756         return have_vec && TCG_TARGET_HAS_andc_vec;
1757     case INDEX_op_orc_vec:
1758         return have_vec && TCG_TARGET_HAS_orc_vec;
1759     case INDEX_op_nand_vec:
1760         return have_vec && TCG_TARGET_HAS_nand_vec;
1761     case INDEX_op_nor_vec:
1762         return have_vec && TCG_TARGET_HAS_nor_vec;
1763     case INDEX_op_eqv_vec:
1764         return have_vec && TCG_TARGET_HAS_eqv_vec;
1765     case INDEX_op_mul_vec:
1766         return have_vec && TCG_TARGET_HAS_mul_vec;
1767     case INDEX_op_shli_vec:
1768     case INDEX_op_shri_vec:
1769     case INDEX_op_sari_vec:
1770         return have_vec && TCG_TARGET_HAS_shi_vec;
1771     case INDEX_op_shls_vec:
1772     case INDEX_op_shrs_vec:
1773     case INDEX_op_sars_vec:
1774         return have_vec && TCG_TARGET_HAS_shs_vec;
1775     case INDEX_op_shlv_vec:
1776     case INDEX_op_shrv_vec:
1777     case INDEX_op_sarv_vec:
1778         return have_vec && TCG_TARGET_HAS_shv_vec;
1779     case INDEX_op_rotli_vec:
1780         return have_vec && TCG_TARGET_HAS_roti_vec;
1781     case INDEX_op_rotls_vec:
1782         return have_vec && TCG_TARGET_HAS_rots_vec;
1783     case INDEX_op_rotlv_vec:
1784     case INDEX_op_rotrv_vec:
1785         return have_vec && TCG_TARGET_HAS_rotv_vec;
1786     case INDEX_op_ssadd_vec:
1787     case INDEX_op_usadd_vec:
1788     case INDEX_op_sssub_vec:
1789     case INDEX_op_ussub_vec:
1790         return have_vec && TCG_TARGET_HAS_sat_vec;
1791     case INDEX_op_smin_vec:
1792     case INDEX_op_umin_vec:
1793     case INDEX_op_smax_vec:
1794     case INDEX_op_umax_vec:
1795         return have_vec && TCG_TARGET_HAS_minmax_vec;
1796     case INDEX_op_bitsel_vec:
1797         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1798     case INDEX_op_cmpsel_vec:
1799         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1800 
1801     default:
1802         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1803         return true;
1804     }
1805 }
1806 
1807 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
1808 
1809 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1810 {
1811     const TCGHelperInfo *info;
1812     TCGv_i64 extend_free[MAX_CALL_IARGS];
1813     int n_extend = 0;
1814     TCGOp *op;
1815     int i, n, pi = 0, total_args;
1816 
1817     info = g_hash_table_lookup(helper_table, (gpointer)func);
1818     total_args = info->nr_out + info->nr_in + 2;
1819     op = tcg_op_alloc(INDEX_op_call, total_args);
1820 
1821 #ifdef CONFIG_PLUGIN
1822     /* Flag helpers that may affect guest state */
1823     if (tcg_ctx->plugin_insn &&
1824         !(info->flags & TCG_CALL_PLUGIN) &&
1825         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1826         tcg_ctx->plugin_insn->calls_helpers = true;
1827     }
1828 #endif
1829 
1830     TCGOP_CALLO(op) = n = info->nr_out;
1831     switch (n) {
1832     case 0:
1833         tcg_debug_assert(ret == NULL);
1834         break;
1835     case 1:
1836         tcg_debug_assert(ret != NULL);
1837         op->args[pi++] = temp_arg(ret);
1838         break;
1839     case 2:
1840     case 4:
1841         tcg_debug_assert(ret != NULL);
1842         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
1843         tcg_debug_assert(ret->temp_subindex == 0);
1844         for (i = 0; i < n; ++i) {
1845             op->args[pi++] = temp_arg(ret + i);
1846         }
1847         break;
1848     default:
1849         g_assert_not_reached();
1850     }
1851 
1852     TCGOP_CALLI(op) = n = info->nr_in;
1853     for (i = 0; i < n; i++) {
1854         const TCGCallArgumentLoc *loc = &info->in[i];
1855         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
1856 
1857         switch (loc->kind) {
1858         case TCG_CALL_ARG_NORMAL:
1859         case TCG_CALL_ARG_BY_REF:
1860         case TCG_CALL_ARG_BY_REF_N:
1861             op->args[pi++] = temp_arg(ts);
1862             break;
1863 
1864         case TCG_CALL_ARG_EXTEND_U:
1865         case TCG_CALL_ARG_EXTEND_S:
1866             {
1867                 TCGv_i64 temp = tcg_temp_new_i64();
1868                 TCGv_i32 orig = temp_tcgv_i32(ts);
1869 
1870                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
1871                     tcg_gen_ext_i32_i64(temp, orig);
1872                 } else {
1873                     tcg_gen_extu_i32_i64(temp, orig);
1874                 }
1875                 op->args[pi++] = tcgv_i64_arg(temp);
1876                 extend_free[n_extend++] = temp;
1877             }
1878             break;
1879 
1880         default:
1881             g_assert_not_reached();
1882         }
1883     }
1884     op->args[pi++] = (uintptr_t)func;
1885     op->args[pi++] = (uintptr_t)info;
1886     tcg_debug_assert(pi == total_args);
1887 
1888     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
1889 
1890     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
1891     for (i = 0; i < n_extend; ++i) {
1892         tcg_temp_free_i64(extend_free[i]);
1893     }
1894 }
1895 
1896 static void tcg_reg_alloc_start(TCGContext *s)
1897 {
1898     int i, n;
1899 
1900     for (i = 0, n = s->nb_temps; i < n; i++) {
1901         TCGTemp *ts = &s->temps[i];
1902         TCGTempVal val = TEMP_VAL_MEM;
1903 
1904         switch (ts->kind) {
1905         case TEMP_CONST:
1906             val = TEMP_VAL_CONST;
1907             break;
1908         case TEMP_FIXED:
1909             val = TEMP_VAL_REG;
1910             break;
1911         case TEMP_GLOBAL:
1912             break;
1913         case TEMP_NORMAL:
1914         case TEMP_EBB:
1915             val = TEMP_VAL_DEAD;
1916             /* fall through */
1917         case TEMP_LOCAL:
1918             ts->mem_allocated = 0;
1919             break;
1920         default:
1921             g_assert_not_reached();
1922         }
1923         ts->val_type = val;
1924     }
1925 
1926     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1927 }
1928 
1929 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1930                                  TCGTemp *ts)
1931 {
1932     int idx = temp_idx(ts);
1933 
1934     switch (ts->kind) {
1935     case TEMP_FIXED:
1936     case TEMP_GLOBAL:
1937         pstrcpy(buf, buf_size, ts->name);
1938         break;
1939     case TEMP_LOCAL:
1940         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1941         break;
1942     case TEMP_EBB:
1943         snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals);
1944         break;
1945     case TEMP_NORMAL:
1946         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1947         break;
1948     case TEMP_CONST:
1949         switch (ts->type) {
1950         case TCG_TYPE_I32:
1951             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1952             break;
1953 #if TCG_TARGET_REG_BITS > 32
1954         case TCG_TYPE_I64:
1955             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1956             break;
1957 #endif
1958         case TCG_TYPE_V64:
1959         case TCG_TYPE_V128:
1960         case TCG_TYPE_V256:
1961             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1962                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1963             break;
1964         default:
1965             g_assert_not_reached();
1966         }
1967         break;
1968     }
1969     return buf;
1970 }
1971 
1972 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1973                              int buf_size, TCGArg arg)
1974 {
1975     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1976 }
1977 
1978 static const char * const cond_name[] =
1979 {
1980     [TCG_COND_NEVER] = "never",
1981     [TCG_COND_ALWAYS] = "always",
1982     [TCG_COND_EQ] = "eq",
1983     [TCG_COND_NE] = "ne",
1984     [TCG_COND_LT] = "lt",
1985     [TCG_COND_GE] = "ge",
1986     [TCG_COND_LE] = "le",
1987     [TCG_COND_GT] = "gt",
1988     [TCG_COND_LTU] = "ltu",
1989     [TCG_COND_GEU] = "geu",
1990     [TCG_COND_LEU] = "leu",
1991     [TCG_COND_GTU] = "gtu"
1992 };
1993 
1994 static const char * const ldst_name[] =
1995 {
1996     [MO_UB]   = "ub",
1997     [MO_SB]   = "sb",
1998     [MO_LEUW] = "leuw",
1999     [MO_LESW] = "lesw",
2000     [MO_LEUL] = "leul",
2001     [MO_LESL] = "lesl",
2002     [MO_LEUQ] = "leq",
2003     [MO_BEUW] = "beuw",
2004     [MO_BESW] = "besw",
2005     [MO_BEUL] = "beul",
2006     [MO_BESL] = "besl",
2007     [MO_BEUQ] = "beq",
2008 };
2009 
2010 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2011 #ifdef TARGET_ALIGNED_ONLY
2012     [MO_UNALN >> MO_ASHIFT]    = "un+",
2013     [MO_ALIGN >> MO_ASHIFT]    = "",
2014 #else
2015     [MO_UNALN >> MO_ASHIFT]    = "",
2016     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2017 #endif
2018     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2019     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2020     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2021     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2022     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2023     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2024 };
2025 
2026 static const char bswap_flag_name[][6] = {
2027     [TCG_BSWAP_IZ] = "iz",
2028     [TCG_BSWAP_OZ] = "oz",
2029     [TCG_BSWAP_OS] = "os",
2030     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2031     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2032 };
2033 
2034 static inline bool tcg_regset_single(TCGRegSet d)
2035 {
2036     return (d & (d - 1)) == 0;
2037 }
2038 
2039 static inline TCGReg tcg_regset_first(TCGRegSet d)
2040 {
2041     if (TCG_TARGET_NB_REGS <= 32) {
2042         return ctz32(d);
2043     } else {
2044         return ctz64(d);
2045     }
2046 }
2047 
2048 /* Return only the number of characters output -- no error return. */
2049 #define ne_fprintf(...) \
2050     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2051 
2052 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2053 {
2054     char buf[128];
2055     TCGOp *op;
2056 
2057     QTAILQ_FOREACH(op, &s->ops, link) {
2058         int i, k, nb_oargs, nb_iargs, nb_cargs;
2059         const TCGOpDef *def;
2060         TCGOpcode c;
2061         int col = 0;
2062 
2063         c = op->opc;
2064         def = &tcg_op_defs[c];
2065 
2066         if (c == INDEX_op_insn_start) {
2067             nb_oargs = 0;
2068             col += ne_fprintf(f, "\n ----");
2069 
2070             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2071                 target_ulong a;
2072 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2073                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2074 #else
2075                 a = op->args[i];
2076 #endif
2077                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
2078             }
2079         } else if (c == INDEX_op_call) {
2080             const TCGHelperInfo *info = tcg_call_info(op);
2081             void *func = tcg_call_func(op);
2082 
2083             /* variable number of arguments */
2084             nb_oargs = TCGOP_CALLO(op);
2085             nb_iargs = TCGOP_CALLI(op);
2086             nb_cargs = def->nb_cargs;
2087 
2088             col += ne_fprintf(f, " %s ", def->name);
2089 
2090             /*
2091              * Print the function name from TCGHelperInfo, if available.
2092              * Note that plugins have a template function for the info,
2093              * but the actual function pointer comes from the plugin.
2094              */
2095             if (func == info->func) {
2096                 col += ne_fprintf(f, "%s", info->name);
2097             } else {
2098                 col += ne_fprintf(f, "plugin(%p)", func);
2099             }
2100 
2101             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2102             for (i = 0; i < nb_oargs; i++) {
2103                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2104                                                             op->args[i]));
2105             }
2106             for (i = 0; i < nb_iargs; i++) {
2107                 TCGArg arg = op->args[nb_oargs + i];
2108                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2109                 col += ne_fprintf(f, ",%s", t);
2110             }
2111         } else {
2112             col += ne_fprintf(f, " %s ", def->name);
2113 
2114             nb_oargs = def->nb_oargs;
2115             nb_iargs = def->nb_iargs;
2116             nb_cargs = def->nb_cargs;
2117 
2118             if (def->flags & TCG_OPF_VECTOR) {
2119                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2120                                   8 << TCGOP_VECE(op));
2121             }
2122 
2123             k = 0;
2124             for (i = 0; i < nb_oargs; i++) {
2125                 const char *sep =  k ? "," : "";
2126                 col += ne_fprintf(f, "%s%s", sep,
2127                                   tcg_get_arg_str(s, buf, sizeof(buf),
2128                                                   op->args[k++]));
2129             }
2130             for (i = 0; i < nb_iargs; i++) {
2131                 const char *sep =  k ? "," : "";
2132                 col += ne_fprintf(f, "%s%s", sep,
2133                                   tcg_get_arg_str(s, buf, sizeof(buf),
2134                                                   op->args[k++]));
2135             }
2136             switch (c) {
2137             case INDEX_op_brcond_i32:
2138             case INDEX_op_setcond_i32:
2139             case INDEX_op_movcond_i32:
2140             case INDEX_op_brcond2_i32:
2141             case INDEX_op_setcond2_i32:
2142             case INDEX_op_brcond_i64:
2143             case INDEX_op_setcond_i64:
2144             case INDEX_op_movcond_i64:
2145             case INDEX_op_cmp_vec:
2146             case INDEX_op_cmpsel_vec:
2147                 if (op->args[k] < ARRAY_SIZE(cond_name)
2148                     && cond_name[op->args[k]]) {
2149                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2150                 } else {
2151                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2152                 }
2153                 i = 1;
2154                 break;
2155             case INDEX_op_qemu_ld_i32:
2156             case INDEX_op_qemu_st_i32:
2157             case INDEX_op_qemu_st8_i32:
2158             case INDEX_op_qemu_ld_i64:
2159             case INDEX_op_qemu_st_i64:
2160                 {
2161                     MemOpIdx oi = op->args[k++];
2162                     MemOp op = get_memop(oi);
2163                     unsigned ix = get_mmuidx(oi);
2164 
2165                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2166                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2167                     } else {
2168                         const char *s_al, *s_op;
2169                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2170                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2171                         col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
2172                     }
2173                     i = 1;
2174                 }
2175                 break;
2176             case INDEX_op_bswap16_i32:
2177             case INDEX_op_bswap16_i64:
2178             case INDEX_op_bswap32_i32:
2179             case INDEX_op_bswap32_i64:
2180             case INDEX_op_bswap64_i64:
2181                 {
2182                     TCGArg flags = op->args[k];
2183                     const char *name = NULL;
2184 
2185                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2186                         name = bswap_flag_name[flags];
2187                     }
2188                     if (name) {
2189                         col += ne_fprintf(f, ",%s", name);
2190                     } else {
2191                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2192                     }
2193                     i = k = 1;
2194                 }
2195                 break;
2196             default:
2197                 i = 0;
2198                 break;
2199             }
2200             switch (c) {
2201             case INDEX_op_set_label:
2202             case INDEX_op_br:
2203             case INDEX_op_brcond_i32:
2204             case INDEX_op_brcond_i64:
2205             case INDEX_op_brcond2_i32:
2206                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2207                                   arg_label(op->args[k])->id);
2208                 i++, k++;
2209                 break;
2210             default:
2211                 break;
2212             }
2213             for (; i < nb_cargs; i++, k++) {
2214                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2215                                   op->args[k]);
2216             }
2217         }
2218 
2219         if (have_prefs || op->life) {
2220             for (; col < 40; ++col) {
2221                 putc(' ', f);
2222             }
2223         }
2224 
2225         if (op->life) {
2226             unsigned life = op->life;
2227 
2228             if (life & (SYNC_ARG * 3)) {
2229                 ne_fprintf(f, "  sync:");
2230                 for (i = 0; i < 2; ++i) {
2231                     if (life & (SYNC_ARG << i)) {
2232                         ne_fprintf(f, " %d", i);
2233                     }
2234                 }
2235             }
2236             life /= DEAD_ARG;
2237             if (life) {
2238                 ne_fprintf(f, "  dead:");
2239                 for (i = 0; life; ++i, life >>= 1) {
2240                     if (life & 1) {
2241                         ne_fprintf(f, " %d", i);
2242                     }
2243                 }
2244             }
2245         }
2246 
2247         if (have_prefs) {
2248             for (i = 0; i < nb_oargs; ++i) {
2249                 TCGRegSet set = output_pref(op, i);
2250 
2251                 if (i == 0) {
2252                     ne_fprintf(f, "  pref=");
2253                 } else {
2254                     ne_fprintf(f, ",");
2255                 }
2256                 if (set == 0) {
2257                     ne_fprintf(f, "none");
2258                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2259                     ne_fprintf(f, "all");
2260 #ifdef CONFIG_DEBUG_TCG
2261                 } else if (tcg_regset_single(set)) {
2262                     TCGReg reg = tcg_regset_first(set);
2263                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2264 #endif
2265                 } else if (TCG_TARGET_NB_REGS <= 32) {
2266                     ne_fprintf(f, "0x%x", (uint32_t)set);
2267                 } else {
2268                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2269                 }
2270             }
2271         }
2272 
2273         putc('\n', f);
2274     }
2275 }
2276 
2277 /* we give more priority to constraints with less registers */
2278 static int get_constraint_priority(const TCGOpDef *def, int k)
2279 {
2280     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2281     int n = ctpop64(arg_ct->regs);
2282 
2283     /*
2284      * Sort constraints of a single register first, which includes output
2285      * aliases (which must exactly match the input already allocated).
2286      */
2287     if (n == 1 || arg_ct->oalias) {
2288         return INT_MAX;
2289     }
2290 
2291     /*
2292      * Sort register pairs next, first then second immediately after.
2293      * Arbitrarily sort multiple pairs by the index of the first reg;
2294      * there shouldn't be many pairs.
2295      */
2296     switch (arg_ct->pair) {
2297     case 1:
2298     case 3:
2299         return (k + 1) * 2;
2300     case 2:
2301         return (arg_ct->pair_index + 1) * 2 - 1;
2302     }
2303 
2304     /* Finally, sort by decreasing register count. */
2305     assert(n > 1);
2306     return -n;
2307 }
2308 
2309 /* sort from highest priority to lowest */
2310 static void sort_constraints(TCGOpDef *def, int start, int n)
2311 {
2312     int i, j;
2313     TCGArgConstraint *a = def->args_ct;
2314 
2315     for (i = 0; i < n; i++) {
2316         a[start + i].sort_index = start + i;
2317     }
2318     if (n <= 1) {
2319         return;
2320     }
2321     for (i = 0; i < n - 1; i++) {
2322         for (j = i + 1; j < n; j++) {
2323             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2324             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2325             if (p1 < p2) {
2326                 int tmp = a[start + i].sort_index;
2327                 a[start + i].sort_index = a[start + j].sort_index;
2328                 a[start + j].sort_index = tmp;
2329             }
2330         }
2331     }
2332 }
2333 
2334 static void process_op_defs(TCGContext *s)
2335 {
2336     TCGOpcode op;
2337 
2338     for (op = 0; op < NB_OPS; op++) {
2339         TCGOpDef *def = &tcg_op_defs[op];
2340         const TCGTargetOpDef *tdefs;
2341         bool saw_alias_pair = false;
2342         int i, o, i2, o2, nb_args;
2343 
2344         if (def->flags & TCG_OPF_NOT_PRESENT) {
2345             continue;
2346         }
2347 
2348         nb_args = def->nb_iargs + def->nb_oargs;
2349         if (nb_args == 0) {
2350             continue;
2351         }
2352 
2353         /*
2354          * Macro magic should make it impossible, but double-check that
2355          * the array index is in range.  Since the signness of an enum
2356          * is implementation defined, force the result to unsigned.
2357          */
2358         unsigned con_set = tcg_target_op_def(op);
2359         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2360         tdefs = &constraint_sets[con_set];
2361 
2362         for (i = 0; i < nb_args; i++) {
2363             const char *ct_str = tdefs->args_ct_str[i];
2364             bool input_p = i >= def->nb_oargs;
2365 
2366             /* Incomplete TCGTargetOpDef entry. */
2367             tcg_debug_assert(ct_str != NULL);
2368 
2369             switch (*ct_str) {
2370             case '0' ... '9':
2371                 o = *ct_str - '0';
2372                 tcg_debug_assert(input_p);
2373                 tcg_debug_assert(o < def->nb_oargs);
2374                 tcg_debug_assert(def->args_ct[o].regs != 0);
2375                 tcg_debug_assert(!def->args_ct[o].oalias);
2376                 def->args_ct[i] = def->args_ct[o];
2377                 /* The output sets oalias.  */
2378                 def->args_ct[o].oalias = 1;
2379                 def->args_ct[o].alias_index = i;
2380                 /* The input sets ialias. */
2381                 def->args_ct[i].ialias = 1;
2382                 def->args_ct[i].alias_index = o;
2383                 if (def->args_ct[i].pair) {
2384                     saw_alias_pair = true;
2385                 }
2386                 tcg_debug_assert(ct_str[1] == '\0');
2387                 continue;
2388 
2389             case '&':
2390                 tcg_debug_assert(!input_p);
2391                 def->args_ct[i].newreg = true;
2392                 ct_str++;
2393                 break;
2394 
2395             case 'p': /* plus */
2396                 /* Allocate to the register after the previous. */
2397                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2398                 o = i - 1;
2399                 tcg_debug_assert(!def->args_ct[o].pair);
2400                 tcg_debug_assert(!def->args_ct[o].ct);
2401                 def->args_ct[i] = (TCGArgConstraint){
2402                     .pair = 2,
2403                     .pair_index = o,
2404                     .regs = def->args_ct[o].regs << 1,
2405                 };
2406                 def->args_ct[o].pair = 1;
2407                 def->args_ct[o].pair_index = i;
2408                 tcg_debug_assert(ct_str[1] == '\0');
2409                 continue;
2410 
2411             case 'm': /* minus */
2412                 /* Allocate to the register before the previous. */
2413                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2414                 o = i - 1;
2415                 tcg_debug_assert(!def->args_ct[o].pair);
2416                 tcg_debug_assert(!def->args_ct[o].ct);
2417                 def->args_ct[i] = (TCGArgConstraint){
2418                     .pair = 1,
2419                     .pair_index = o,
2420                     .regs = def->args_ct[o].regs >> 1,
2421                 };
2422                 def->args_ct[o].pair = 2;
2423                 def->args_ct[o].pair_index = i;
2424                 tcg_debug_assert(ct_str[1] == '\0');
2425                 continue;
2426             }
2427 
2428             do {
2429                 switch (*ct_str) {
2430                 case 'i':
2431                     def->args_ct[i].ct |= TCG_CT_CONST;
2432                     break;
2433 
2434                 /* Include all of the target-specific constraints. */
2435 
2436 #undef CONST
2437 #define CONST(CASE, MASK) \
2438     case CASE: def->args_ct[i].ct |= MASK; break;
2439 #define REGS(CASE, MASK) \
2440     case CASE: def->args_ct[i].regs |= MASK; break;
2441 
2442 #include "tcg-target-con-str.h"
2443 
2444 #undef REGS
2445 #undef CONST
2446                 default:
2447                 case '0' ... '9':
2448                 case '&':
2449                 case 'p':
2450                 case 'm':
2451                     /* Typo in TCGTargetOpDef constraint. */
2452                     g_assert_not_reached();
2453                 }
2454             } while (*++ct_str != '\0');
2455         }
2456 
2457         /* TCGTargetOpDef entry with too much information? */
2458         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2459 
2460         /*
2461          * Fix up output pairs that are aliased with inputs.
2462          * When we created the alias, we copied pair from the output.
2463          * There are three cases:
2464          *    (1a) Pairs of inputs alias pairs of outputs.
2465          *    (1b) One input aliases the first of a pair of outputs.
2466          *    (2)  One input aliases the second of a pair of outputs.
2467          *
2468          * Case 1a is handled by making sure that the pair_index'es are
2469          * properly updated so that they appear the same as a pair of inputs.
2470          *
2471          * Case 1b is handled by setting the pair_index of the input to
2472          * itself, simply so it doesn't point to an unrelated argument.
2473          * Since we don't encounter the "second" during the input allocation
2474          * phase, nothing happens with the second half of the input pair.
2475          *
2476          * Case 2 is handled by setting the second input to pair=3, the
2477          * first output to pair=3, and the pair_index'es to match.
2478          */
2479         if (saw_alias_pair) {
2480             for (i = def->nb_oargs; i < nb_args; i++) {
2481                 /*
2482                  * Since [0-9pm] must be alone in the constraint string,
2483                  * the only way they can both be set is if the pair comes
2484                  * from the output alias.
2485                  */
2486                 if (!def->args_ct[i].ialias) {
2487                     continue;
2488                 }
2489                 switch (def->args_ct[i].pair) {
2490                 case 0:
2491                     break;
2492                 case 1:
2493                     o = def->args_ct[i].alias_index;
2494                     o2 = def->args_ct[o].pair_index;
2495                     tcg_debug_assert(def->args_ct[o].pair == 1);
2496                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2497                     if (def->args_ct[o2].oalias) {
2498                         /* Case 1a */
2499                         i2 = def->args_ct[o2].alias_index;
2500                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2501                         def->args_ct[i2].pair_index = i;
2502                         def->args_ct[i].pair_index = i2;
2503                     } else {
2504                         /* Case 1b */
2505                         def->args_ct[i].pair_index = i;
2506                     }
2507                     break;
2508                 case 2:
2509                     o = def->args_ct[i].alias_index;
2510                     o2 = def->args_ct[o].pair_index;
2511                     tcg_debug_assert(def->args_ct[o].pair == 2);
2512                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2513                     if (def->args_ct[o2].oalias) {
2514                         /* Case 1a */
2515                         i2 = def->args_ct[o2].alias_index;
2516                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2517                         def->args_ct[i2].pair_index = i;
2518                         def->args_ct[i].pair_index = i2;
2519                     } else {
2520                         /* Case 2 */
2521                         def->args_ct[i].pair = 3;
2522                         def->args_ct[o2].pair = 3;
2523                         def->args_ct[i].pair_index = o2;
2524                         def->args_ct[o2].pair_index = i;
2525                     }
2526                     break;
2527                 default:
2528                     g_assert_not_reached();
2529                 }
2530             }
2531         }
2532 
2533         /* sort the constraints (XXX: this is just an heuristic) */
2534         sort_constraints(def, 0, def->nb_oargs);
2535         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2536     }
2537 }
2538 
2539 void tcg_op_remove(TCGContext *s, TCGOp *op)
2540 {
2541     TCGLabel *label;
2542 
2543     switch (op->opc) {
2544     case INDEX_op_br:
2545         label = arg_label(op->args[0]);
2546         label->refs--;
2547         break;
2548     case INDEX_op_brcond_i32:
2549     case INDEX_op_brcond_i64:
2550         label = arg_label(op->args[3]);
2551         label->refs--;
2552         break;
2553     case INDEX_op_brcond2_i32:
2554         label = arg_label(op->args[5]);
2555         label->refs--;
2556         break;
2557     default:
2558         break;
2559     }
2560 
2561     QTAILQ_REMOVE(&s->ops, op, link);
2562     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2563     s->nb_ops--;
2564 
2565 #ifdef CONFIG_PROFILER
2566     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2567 #endif
2568 }
2569 
2570 void tcg_remove_ops_after(TCGOp *op)
2571 {
2572     TCGContext *s = tcg_ctx;
2573 
2574     while (true) {
2575         TCGOp *last = tcg_last_op();
2576         if (last == op) {
2577             return;
2578         }
2579         tcg_op_remove(s, last);
2580     }
2581 }
2582 
2583 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
2584 {
2585     TCGContext *s = tcg_ctx;
2586     TCGOp *op = NULL;
2587 
2588     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
2589         QTAILQ_FOREACH(op, &s->free_ops, link) {
2590             if (nargs <= op->nargs) {
2591                 QTAILQ_REMOVE(&s->free_ops, op, link);
2592                 nargs = op->nargs;
2593                 goto found;
2594             }
2595         }
2596     }
2597 
2598     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
2599     nargs = MAX(4, nargs);
2600     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
2601 
2602  found:
2603     memset(op, 0, offsetof(TCGOp, link));
2604     op->opc = opc;
2605     op->nargs = nargs;
2606 
2607     /* Check for bitfield overflow. */
2608     tcg_debug_assert(op->nargs == nargs);
2609 
2610     s->nb_ops++;
2611     return op;
2612 }
2613 
2614 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
2615 {
2616     TCGOp *op = tcg_op_alloc(opc, nargs);
2617     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2618     return op;
2619 }
2620 
2621 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
2622                             TCGOpcode opc, unsigned nargs)
2623 {
2624     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2625     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2626     return new_op;
2627 }
2628 
2629 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
2630                            TCGOpcode opc, unsigned nargs)
2631 {
2632     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2633     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2634     return new_op;
2635 }
2636 
2637 /* Reachable analysis : remove unreachable code.  */
2638 static void reachable_code_pass(TCGContext *s)
2639 {
2640     TCGOp *op, *op_next;
2641     bool dead = false;
2642 
2643     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2644         bool remove = dead;
2645         TCGLabel *label;
2646 
2647         switch (op->opc) {
2648         case INDEX_op_set_label:
2649             label = arg_label(op->args[0]);
2650             if (label->refs == 0) {
2651                 /*
2652                  * While there is an occasional backward branch, virtually
2653                  * all branches generated by the translators are forward.
2654                  * Which means that generally we will have already removed
2655                  * all references to the label that will be, and there is
2656                  * little to be gained by iterating.
2657                  */
2658                 remove = true;
2659             } else {
2660                 /* Once we see a label, insns become live again.  */
2661                 dead = false;
2662                 remove = false;
2663 
2664                 /*
2665                  * Optimization can fold conditional branches to unconditional.
2666                  * If we find a label with one reference which is preceded by
2667                  * an unconditional branch to it, remove both.  This needed to
2668                  * wait until the dead code in between them was removed.
2669                  */
2670                 if (label->refs == 1) {
2671                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2672                     if (op_prev->opc == INDEX_op_br &&
2673                         label == arg_label(op_prev->args[0])) {
2674                         tcg_op_remove(s, op_prev);
2675                         remove = true;
2676                     }
2677                 }
2678             }
2679             break;
2680 
2681         case INDEX_op_br:
2682         case INDEX_op_exit_tb:
2683         case INDEX_op_goto_ptr:
2684             /* Unconditional branches; everything following is dead.  */
2685             dead = true;
2686             break;
2687 
2688         case INDEX_op_call:
2689             /* Notice noreturn helper calls, raising exceptions.  */
2690             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2691                 dead = true;
2692             }
2693             break;
2694 
2695         case INDEX_op_insn_start:
2696             /* Never remove -- we need to keep these for unwind.  */
2697             remove = false;
2698             break;
2699 
2700         default:
2701             break;
2702         }
2703 
2704         if (remove) {
2705             tcg_op_remove(s, op);
2706         }
2707     }
2708 }
2709 
2710 #define TS_DEAD  1
2711 #define TS_MEM   2
2712 
2713 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2714 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2715 
2716 /* For liveness_pass_1, the register preferences for a given temp.  */
2717 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2718 {
2719     return ts->state_ptr;
2720 }
2721 
2722 /* For liveness_pass_1, reset the preferences for a given temp to the
2723  * maximal regset for its type.
2724  */
2725 static inline void la_reset_pref(TCGTemp *ts)
2726 {
2727     *la_temp_pref(ts)
2728         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2729 }
2730 
2731 /* liveness analysis: end of function: all temps are dead, and globals
2732    should be in memory. */
2733 static void la_func_end(TCGContext *s, int ng, int nt)
2734 {
2735     int i;
2736 
2737     for (i = 0; i < ng; ++i) {
2738         s->temps[i].state = TS_DEAD | TS_MEM;
2739         la_reset_pref(&s->temps[i]);
2740     }
2741     for (i = ng; i < nt; ++i) {
2742         s->temps[i].state = TS_DEAD;
2743         la_reset_pref(&s->temps[i]);
2744     }
2745 }
2746 
2747 /* liveness analysis: end of basic block: all temps are dead, globals
2748    and local temps should be in memory. */
2749 static void la_bb_end(TCGContext *s, int ng, int nt)
2750 {
2751     int i;
2752 
2753     for (i = 0; i < nt; ++i) {
2754         TCGTemp *ts = &s->temps[i];
2755         int state;
2756 
2757         switch (ts->kind) {
2758         case TEMP_FIXED:
2759         case TEMP_GLOBAL:
2760         case TEMP_LOCAL:
2761             state = TS_DEAD | TS_MEM;
2762             break;
2763         case TEMP_NORMAL:
2764         case TEMP_EBB:
2765         case TEMP_CONST:
2766             state = TS_DEAD;
2767             break;
2768         default:
2769             g_assert_not_reached();
2770         }
2771         ts->state = state;
2772         la_reset_pref(ts);
2773     }
2774 }
2775 
2776 /* liveness analysis: sync globals back to memory.  */
2777 static void la_global_sync(TCGContext *s, int ng)
2778 {
2779     int i;
2780 
2781     for (i = 0; i < ng; ++i) {
2782         int state = s->temps[i].state;
2783         s->temps[i].state = state | TS_MEM;
2784         if (state == TS_DEAD) {
2785             /* If the global was previously dead, reset prefs.  */
2786             la_reset_pref(&s->temps[i]);
2787         }
2788     }
2789 }
2790 
2791 /*
2792  * liveness analysis: conditional branch: all temps are dead unless
2793  * explicitly live-across-conditional-branch, globals and local temps
2794  * should be synced.
2795  */
2796 static void la_bb_sync(TCGContext *s, int ng, int nt)
2797 {
2798     la_global_sync(s, ng);
2799 
2800     for (int i = ng; i < nt; ++i) {
2801         TCGTemp *ts = &s->temps[i];
2802         int state;
2803 
2804         switch (ts->kind) {
2805         case TEMP_LOCAL:
2806             state = ts->state;
2807             ts->state = state | TS_MEM;
2808             if (state != TS_DEAD) {
2809                 continue;
2810             }
2811             break;
2812         case TEMP_NORMAL:
2813             s->temps[i].state = TS_DEAD;
2814             break;
2815         case TEMP_EBB:
2816         case TEMP_CONST:
2817             continue;
2818         default:
2819             g_assert_not_reached();
2820         }
2821         la_reset_pref(&s->temps[i]);
2822     }
2823 }
2824 
2825 /* liveness analysis: sync globals back to memory and kill.  */
2826 static void la_global_kill(TCGContext *s, int ng)
2827 {
2828     int i;
2829 
2830     for (i = 0; i < ng; i++) {
2831         s->temps[i].state = TS_DEAD | TS_MEM;
2832         la_reset_pref(&s->temps[i]);
2833     }
2834 }
2835 
2836 /* liveness analysis: note live globals crossing calls.  */
2837 static void la_cross_call(TCGContext *s, int nt)
2838 {
2839     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2840     int i;
2841 
2842     for (i = 0; i < nt; i++) {
2843         TCGTemp *ts = &s->temps[i];
2844         if (!(ts->state & TS_DEAD)) {
2845             TCGRegSet *pset = la_temp_pref(ts);
2846             TCGRegSet set = *pset;
2847 
2848             set &= mask;
2849             /* If the combination is not possible, restart.  */
2850             if (set == 0) {
2851                 set = tcg_target_available_regs[ts->type] & mask;
2852             }
2853             *pset = set;
2854         }
2855     }
2856 }
2857 
2858 /* Liveness analysis : update the opc_arg_life array to tell if a
2859    given input arguments is dead. Instructions updating dead
2860    temporaries are removed. */
2861 static void liveness_pass_1(TCGContext *s)
2862 {
2863     int nb_globals = s->nb_globals;
2864     int nb_temps = s->nb_temps;
2865     TCGOp *op, *op_prev;
2866     TCGRegSet *prefs;
2867     int i;
2868 
2869     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2870     for (i = 0; i < nb_temps; ++i) {
2871         s->temps[i].state_ptr = prefs + i;
2872     }
2873 
2874     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2875     la_func_end(s, nb_globals, nb_temps);
2876 
2877     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2878         int nb_iargs, nb_oargs;
2879         TCGOpcode opc_new, opc_new2;
2880         bool have_opc_new2;
2881         TCGLifeData arg_life = 0;
2882         TCGTemp *ts;
2883         TCGOpcode opc = op->opc;
2884         const TCGOpDef *def = &tcg_op_defs[opc];
2885 
2886         switch (opc) {
2887         case INDEX_op_call:
2888             {
2889                 const TCGHelperInfo *info = tcg_call_info(op);
2890                 int call_flags = tcg_call_flags(op);
2891 
2892                 nb_oargs = TCGOP_CALLO(op);
2893                 nb_iargs = TCGOP_CALLI(op);
2894 
2895                 /* pure functions can be removed if their result is unused */
2896                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2897                     for (i = 0; i < nb_oargs; i++) {
2898                         ts = arg_temp(op->args[i]);
2899                         if (ts->state != TS_DEAD) {
2900                             goto do_not_remove_call;
2901                         }
2902                     }
2903                     goto do_remove;
2904                 }
2905             do_not_remove_call:
2906 
2907                 /* Output args are dead.  */
2908                 for (i = 0; i < nb_oargs; i++) {
2909                     ts = arg_temp(op->args[i]);
2910                     if (ts->state & TS_DEAD) {
2911                         arg_life |= DEAD_ARG << i;
2912                     }
2913                     if (ts->state & TS_MEM) {
2914                         arg_life |= SYNC_ARG << i;
2915                     }
2916                     ts->state = TS_DEAD;
2917                     la_reset_pref(ts);
2918                 }
2919 
2920                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
2921                 memset(op->output_pref, 0, sizeof(op->output_pref));
2922 
2923                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2924                                     TCG_CALL_NO_READ_GLOBALS))) {
2925                     la_global_kill(s, nb_globals);
2926                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2927                     la_global_sync(s, nb_globals);
2928                 }
2929 
2930                 /* Record arguments that die in this helper.  */
2931                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2932                     ts = arg_temp(op->args[i]);
2933                     if (ts->state & TS_DEAD) {
2934                         arg_life |= DEAD_ARG << i;
2935                     }
2936                 }
2937 
2938                 /* For all live registers, remove call-clobbered prefs.  */
2939                 la_cross_call(s, nb_temps);
2940 
2941                 /*
2942                  * Input arguments are live for preceding opcodes.
2943                  *
2944                  * For those arguments that die, and will be allocated in
2945                  * registers, clear the register set for that arg, to be
2946                  * filled in below.  For args that will be on the stack,
2947                  * reset to any available reg.  Process arguments in reverse
2948                  * order so that if a temp is used more than once, the stack
2949                  * reset to max happens before the register reset to 0.
2950                  */
2951                 for (i = nb_iargs - 1; i >= 0; i--) {
2952                     const TCGCallArgumentLoc *loc = &info->in[i];
2953                     ts = arg_temp(op->args[nb_oargs + i]);
2954 
2955                     if (ts->state & TS_DEAD) {
2956                         switch (loc->kind) {
2957                         case TCG_CALL_ARG_NORMAL:
2958                         case TCG_CALL_ARG_EXTEND_U:
2959                         case TCG_CALL_ARG_EXTEND_S:
2960                             if (REG_P(loc)) {
2961                                 *la_temp_pref(ts) = 0;
2962                                 break;
2963                             }
2964                             /* fall through */
2965                         default:
2966                             *la_temp_pref(ts) =
2967                                 tcg_target_available_regs[ts->type];
2968                             break;
2969                         }
2970                         ts->state &= ~TS_DEAD;
2971                     }
2972                 }
2973 
2974                 /*
2975                  * For each input argument, add its input register to prefs.
2976                  * If a temp is used once, this produces a single set bit;
2977                  * if a temp is used multiple times, this produces a set.
2978                  */
2979                 for (i = 0; i < nb_iargs; i++) {
2980                     const TCGCallArgumentLoc *loc = &info->in[i];
2981                     ts = arg_temp(op->args[nb_oargs + i]);
2982 
2983                     switch (loc->kind) {
2984                     case TCG_CALL_ARG_NORMAL:
2985                     case TCG_CALL_ARG_EXTEND_U:
2986                     case TCG_CALL_ARG_EXTEND_S:
2987                         if (REG_P(loc)) {
2988                             tcg_regset_set_reg(*la_temp_pref(ts),
2989                                 tcg_target_call_iarg_regs[loc->arg_slot]);
2990                         }
2991                         break;
2992                     default:
2993                         break;
2994                     }
2995                 }
2996             }
2997             break;
2998         case INDEX_op_insn_start:
2999             break;
3000         case INDEX_op_discard:
3001             /* mark the temporary as dead */
3002             ts = arg_temp(op->args[0]);
3003             ts->state = TS_DEAD;
3004             la_reset_pref(ts);
3005             break;
3006 
3007         case INDEX_op_add2_i32:
3008             opc_new = INDEX_op_add_i32;
3009             goto do_addsub2;
3010         case INDEX_op_sub2_i32:
3011             opc_new = INDEX_op_sub_i32;
3012             goto do_addsub2;
3013         case INDEX_op_add2_i64:
3014             opc_new = INDEX_op_add_i64;
3015             goto do_addsub2;
3016         case INDEX_op_sub2_i64:
3017             opc_new = INDEX_op_sub_i64;
3018         do_addsub2:
3019             nb_iargs = 4;
3020             nb_oargs = 2;
3021             /* Test if the high part of the operation is dead, but not
3022                the low part.  The result can be optimized to a simple
3023                add or sub.  This happens often for x86_64 guest when the
3024                cpu mode is set to 32 bit.  */
3025             if (arg_temp(op->args[1])->state == TS_DEAD) {
3026                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3027                     goto do_remove;
3028                 }
3029                 /* Replace the opcode and adjust the args in place,
3030                    leaving 3 unused args at the end.  */
3031                 op->opc = opc = opc_new;
3032                 op->args[1] = op->args[2];
3033                 op->args[2] = op->args[4];
3034                 /* Fall through and mark the single-word operation live.  */
3035                 nb_iargs = 2;
3036                 nb_oargs = 1;
3037             }
3038             goto do_not_remove;
3039 
3040         case INDEX_op_mulu2_i32:
3041             opc_new = INDEX_op_mul_i32;
3042             opc_new2 = INDEX_op_muluh_i32;
3043             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3044             goto do_mul2;
3045         case INDEX_op_muls2_i32:
3046             opc_new = INDEX_op_mul_i32;
3047             opc_new2 = INDEX_op_mulsh_i32;
3048             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3049             goto do_mul2;
3050         case INDEX_op_mulu2_i64:
3051             opc_new = INDEX_op_mul_i64;
3052             opc_new2 = INDEX_op_muluh_i64;
3053             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3054             goto do_mul2;
3055         case INDEX_op_muls2_i64:
3056             opc_new = INDEX_op_mul_i64;
3057             opc_new2 = INDEX_op_mulsh_i64;
3058             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3059             goto do_mul2;
3060         do_mul2:
3061             nb_iargs = 2;
3062             nb_oargs = 2;
3063             if (arg_temp(op->args[1])->state == TS_DEAD) {
3064                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3065                     /* Both parts of the operation are dead.  */
3066                     goto do_remove;
3067                 }
3068                 /* The high part of the operation is dead; generate the low. */
3069                 op->opc = opc = opc_new;
3070                 op->args[1] = op->args[2];
3071                 op->args[2] = op->args[3];
3072             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3073                 /* The low part of the operation is dead; generate the high. */
3074                 op->opc = opc = opc_new2;
3075                 op->args[0] = op->args[1];
3076                 op->args[1] = op->args[2];
3077                 op->args[2] = op->args[3];
3078             } else {
3079                 goto do_not_remove;
3080             }
3081             /* Mark the single-word operation live.  */
3082             nb_oargs = 1;
3083             goto do_not_remove;
3084 
3085         default:
3086             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3087             nb_iargs = def->nb_iargs;
3088             nb_oargs = def->nb_oargs;
3089 
3090             /* Test if the operation can be removed because all
3091                its outputs are dead. We assume that nb_oargs == 0
3092                implies side effects */
3093             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3094                 for (i = 0; i < nb_oargs; i++) {
3095                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3096                         goto do_not_remove;
3097                     }
3098                 }
3099                 goto do_remove;
3100             }
3101             goto do_not_remove;
3102 
3103         do_remove:
3104             tcg_op_remove(s, op);
3105             break;
3106 
3107         do_not_remove:
3108             for (i = 0; i < nb_oargs; i++) {
3109                 ts = arg_temp(op->args[i]);
3110 
3111                 /* Remember the preference of the uses that followed.  */
3112                 if (i < ARRAY_SIZE(op->output_pref)) {
3113                     op->output_pref[i] = *la_temp_pref(ts);
3114                 }
3115 
3116                 /* Output args are dead.  */
3117                 if (ts->state & TS_DEAD) {
3118                     arg_life |= DEAD_ARG << i;
3119                 }
3120                 if (ts->state & TS_MEM) {
3121                     arg_life |= SYNC_ARG << i;
3122                 }
3123                 ts->state = TS_DEAD;
3124                 la_reset_pref(ts);
3125             }
3126 
3127             /* If end of basic block, update.  */
3128             if (def->flags & TCG_OPF_BB_EXIT) {
3129                 la_func_end(s, nb_globals, nb_temps);
3130             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3131                 la_bb_sync(s, nb_globals, nb_temps);
3132             } else if (def->flags & TCG_OPF_BB_END) {
3133                 la_bb_end(s, nb_globals, nb_temps);
3134             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3135                 la_global_sync(s, nb_globals);
3136                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3137                     la_cross_call(s, nb_temps);
3138                 }
3139             }
3140 
3141             /* Record arguments that die in this opcode.  */
3142             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3143                 ts = arg_temp(op->args[i]);
3144                 if (ts->state & TS_DEAD) {
3145                     arg_life |= DEAD_ARG << i;
3146                 }
3147             }
3148 
3149             /* Input arguments are live for preceding opcodes.  */
3150             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3151                 ts = arg_temp(op->args[i]);
3152                 if (ts->state & TS_DEAD) {
3153                     /* For operands that were dead, initially allow
3154                        all regs for the type.  */
3155                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3156                     ts->state &= ~TS_DEAD;
3157                 }
3158             }
3159 
3160             /* Incorporate constraints for this operand.  */
3161             switch (opc) {
3162             case INDEX_op_mov_i32:
3163             case INDEX_op_mov_i64:
3164                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3165                    have proper constraints.  That said, special case
3166                    moves to propagate preferences backward.  */
3167                 if (IS_DEAD_ARG(1)) {
3168                     *la_temp_pref(arg_temp(op->args[0]))
3169                         = *la_temp_pref(arg_temp(op->args[1]));
3170                 }
3171                 break;
3172 
3173             default:
3174                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3175                     const TCGArgConstraint *ct = &def->args_ct[i];
3176                     TCGRegSet set, *pset;
3177 
3178                     ts = arg_temp(op->args[i]);
3179                     pset = la_temp_pref(ts);
3180                     set = *pset;
3181 
3182                     set &= ct->regs;
3183                     if (ct->ialias) {
3184                         set &= output_pref(op, ct->alias_index);
3185                     }
3186                     /* If the combination is not possible, restart.  */
3187                     if (set == 0) {
3188                         set = ct->regs;
3189                     }
3190                     *pset = set;
3191                 }
3192                 break;
3193             }
3194             break;
3195         }
3196         op->life = arg_life;
3197     }
3198 }
3199 
3200 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3201 static bool liveness_pass_2(TCGContext *s)
3202 {
3203     int nb_globals = s->nb_globals;
3204     int nb_temps, i;
3205     bool changes = false;
3206     TCGOp *op, *op_next;
3207 
3208     /* Create a temporary for each indirect global.  */
3209     for (i = 0; i < nb_globals; ++i) {
3210         TCGTemp *its = &s->temps[i];
3211         if (its->indirect_reg) {
3212             TCGTemp *dts = tcg_temp_alloc(s);
3213             dts->type = its->type;
3214             dts->base_type = its->base_type;
3215             dts->temp_subindex = its->temp_subindex;
3216             dts->kind = TEMP_EBB;
3217             its->state_ptr = dts;
3218         } else {
3219             its->state_ptr = NULL;
3220         }
3221         /* All globals begin dead.  */
3222         its->state = TS_DEAD;
3223     }
3224     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3225         TCGTemp *its = &s->temps[i];
3226         its->state_ptr = NULL;
3227         its->state = TS_DEAD;
3228     }
3229 
3230     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3231         TCGOpcode opc = op->opc;
3232         const TCGOpDef *def = &tcg_op_defs[opc];
3233         TCGLifeData arg_life = op->life;
3234         int nb_iargs, nb_oargs, call_flags;
3235         TCGTemp *arg_ts, *dir_ts;
3236 
3237         if (opc == INDEX_op_call) {
3238             nb_oargs = TCGOP_CALLO(op);
3239             nb_iargs = TCGOP_CALLI(op);
3240             call_flags = tcg_call_flags(op);
3241         } else {
3242             nb_iargs = def->nb_iargs;
3243             nb_oargs = def->nb_oargs;
3244 
3245             /* Set flags similar to how calls require.  */
3246             if (def->flags & TCG_OPF_COND_BRANCH) {
3247                 /* Like reading globals: sync_globals */
3248                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3249             } else if (def->flags & TCG_OPF_BB_END) {
3250                 /* Like writing globals: save_globals */
3251                 call_flags = 0;
3252             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3253                 /* Like reading globals: sync_globals */
3254                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3255             } else {
3256                 /* No effect on globals.  */
3257                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3258                               TCG_CALL_NO_WRITE_GLOBALS);
3259             }
3260         }
3261 
3262         /* Make sure that input arguments are available.  */
3263         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3264             arg_ts = arg_temp(op->args[i]);
3265             dir_ts = arg_ts->state_ptr;
3266             if (dir_ts && arg_ts->state == TS_DEAD) {
3267                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3268                                   ? INDEX_op_ld_i32
3269                                   : INDEX_op_ld_i64);
3270                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3271 
3272                 lop->args[0] = temp_arg(dir_ts);
3273                 lop->args[1] = temp_arg(arg_ts->mem_base);
3274                 lop->args[2] = arg_ts->mem_offset;
3275 
3276                 /* Loaded, but synced with memory.  */
3277                 arg_ts->state = TS_MEM;
3278             }
3279         }
3280 
3281         /* Perform input replacement, and mark inputs that became dead.
3282            No action is required except keeping temp_state up to date
3283            so that we reload when needed.  */
3284         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3285             arg_ts = arg_temp(op->args[i]);
3286             dir_ts = arg_ts->state_ptr;
3287             if (dir_ts) {
3288                 op->args[i] = temp_arg(dir_ts);
3289                 changes = true;
3290                 if (IS_DEAD_ARG(i)) {
3291                     arg_ts->state = TS_DEAD;
3292                 }
3293             }
3294         }
3295 
3296         /* Liveness analysis should ensure that the following are
3297            all correct, for call sites and basic block end points.  */
3298         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3299             /* Nothing to do */
3300         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3301             for (i = 0; i < nb_globals; ++i) {
3302                 /* Liveness should see that globals are synced back,
3303                    that is, either TS_DEAD or TS_MEM.  */
3304                 arg_ts = &s->temps[i];
3305                 tcg_debug_assert(arg_ts->state_ptr == 0
3306                                  || arg_ts->state != 0);
3307             }
3308         } else {
3309             for (i = 0; i < nb_globals; ++i) {
3310                 /* Liveness should see that globals are saved back,
3311                    that is, TS_DEAD, waiting to be reloaded.  */
3312                 arg_ts = &s->temps[i];
3313                 tcg_debug_assert(arg_ts->state_ptr == 0
3314                                  || arg_ts->state == TS_DEAD);
3315             }
3316         }
3317 
3318         /* Outputs become available.  */
3319         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3320             arg_ts = arg_temp(op->args[0]);
3321             dir_ts = arg_ts->state_ptr;
3322             if (dir_ts) {
3323                 op->args[0] = temp_arg(dir_ts);
3324                 changes = true;
3325 
3326                 /* The output is now live and modified.  */
3327                 arg_ts->state = 0;
3328 
3329                 if (NEED_SYNC_ARG(0)) {
3330                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3331                                       ? INDEX_op_st_i32
3332                                       : INDEX_op_st_i64);
3333                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3334                     TCGTemp *out_ts = dir_ts;
3335 
3336                     if (IS_DEAD_ARG(0)) {
3337                         out_ts = arg_temp(op->args[1]);
3338                         arg_ts->state = TS_DEAD;
3339                         tcg_op_remove(s, op);
3340                     } else {
3341                         arg_ts->state = TS_MEM;
3342                     }
3343 
3344                     sop->args[0] = temp_arg(out_ts);
3345                     sop->args[1] = temp_arg(arg_ts->mem_base);
3346                     sop->args[2] = arg_ts->mem_offset;
3347                 } else {
3348                     tcg_debug_assert(!IS_DEAD_ARG(0));
3349                 }
3350             }
3351         } else {
3352             for (i = 0; i < nb_oargs; i++) {
3353                 arg_ts = arg_temp(op->args[i]);
3354                 dir_ts = arg_ts->state_ptr;
3355                 if (!dir_ts) {
3356                     continue;
3357                 }
3358                 op->args[i] = temp_arg(dir_ts);
3359                 changes = true;
3360 
3361                 /* The output is now live and modified.  */
3362                 arg_ts->state = 0;
3363 
3364                 /* Sync outputs upon their last write.  */
3365                 if (NEED_SYNC_ARG(i)) {
3366                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3367                                       ? INDEX_op_st_i32
3368                                       : INDEX_op_st_i64);
3369                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3370 
3371                     sop->args[0] = temp_arg(dir_ts);
3372                     sop->args[1] = temp_arg(arg_ts->mem_base);
3373                     sop->args[2] = arg_ts->mem_offset;
3374 
3375                     arg_ts->state = TS_MEM;
3376                 }
3377                 /* Drop outputs that are dead.  */
3378                 if (IS_DEAD_ARG(i)) {
3379                     arg_ts->state = TS_DEAD;
3380                 }
3381             }
3382         }
3383     }
3384 
3385     return changes;
3386 }
3387 
3388 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3389 {
3390     intptr_t off;
3391     int size, align;
3392 
3393     /* When allocating an object, look at the full type. */
3394     size = tcg_type_size(ts->base_type);
3395     switch (ts->base_type) {
3396     case TCG_TYPE_I32:
3397         align = 4;
3398         break;
3399     case TCG_TYPE_I64:
3400     case TCG_TYPE_V64:
3401         align = 8;
3402         break;
3403     case TCG_TYPE_I128:
3404     case TCG_TYPE_V128:
3405     case TCG_TYPE_V256:
3406         /*
3407          * Note that we do not require aligned storage for V256,
3408          * and that we provide alignment for I128 to match V128,
3409          * even if that's above what the host ABI requires.
3410          */
3411         align = 16;
3412         break;
3413     default:
3414         g_assert_not_reached();
3415     }
3416 
3417     /*
3418      * Assume the stack is sufficiently aligned.
3419      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3420      * and do not require 16 byte vector alignment.  This seems slightly
3421      * easier than fully parameterizing the above switch statement.
3422      */
3423     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3424     off = ROUND_UP(s->current_frame_offset, align);
3425 
3426     /* If we've exhausted the stack frame, restart with a smaller TB. */
3427     if (off + size > s->frame_end) {
3428         tcg_raise_tb_overflow(s);
3429     }
3430     s->current_frame_offset = off + size;
3431 #if defined(__sparc__)
3432     off += TCG_TARGET_STACK_BIAS;
3433 #endif
3434 
3435     /* If the object was subdivided, assign memory to all the parts. */
3436     if (ts->base_type != ts->type) {
3437         int part_size = tcg_type_size(ts->type);
3438         int part_count = size / part_size;
3439 
3440         /*
3441          * Each part is allocated sequentially in tcg_temp_new_internal.
3442          * Jump back to the first part by subtracting the current index.
3443          */
3444         ts -= ts->temp_subindex;
3445         for (int i = 0; i < part_count; ++i) {
3446             ts[i].mem_offset = off + i * part_size;
3447             ts[i].mem_base = s->frame_temp;
3448             ts[i].mem_allocated = 1;
3449         }
3450     } else {
3451         ts->mem_offset = off;
3452         ts->mem_base = s->frame_temp;
3453         ts->mem_allocated = 1;
3454     }
3455 }
3456 
3457 /* Assign @reg to @ts, and update reg_to_temp[]. */
3458 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
3459 {
3460     if (ts->val_type == TEMP_VAL_REG) {
3461         TCGReg old = ts->reg;
3462         tcg_debug_assert(s->reg_to_temp[old] == ts);
3463         if (old == reg) {
3464             return;
3465         }
3466         s->reg_to_temp[old] = NULL;
3467     }
3468     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3469     s->reg_to_temp[reg] = ts;
3470     ts->val_type = TEMP_VAL_REG;
3471     ts->reg = reg;
3472 }
3473 
3474 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
3475 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
3476 {
3477     tcg_debug_assert(type != TEMP_VAL_REG);
3478     if (ts->val_type == TEMP_VAL_REG) {
3479         TCGReg reg = ts->reg;
3480         tcg_debug_assert(s->reg_to_temp[reg] == ts);
3481         s->reg_to_temp[reg] = NULL;
3482     }
3483     ts->val_type = type;
3484 }
3485 
3486 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3487 
3488 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3489    mark it free; otherwise mark it dead.  */
3490 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3491 {
3492     TCGTempVal new_type;
3493 
3494     switch (ts->kind) {
3495     case TEMP_FIXED:
3496         return;
3497     case TEMP_GLOBAL:
3498     case TEMP_LOCAL:
3499         new_type = TEMP_VAL_MEM;
3500         break;
3501     case TEMP_NORMAL:
3502     case TEMP_EBB:
3503         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3504         break;
3505     case TEMP_CONST:
3506         new_type = TEMP_VAL_CONST;
3507         break;
3508     default:
3509         g_assert_not_reached();
3510     }
3511     set_temp_val_nonreg(s, ts, new_type);
3512 }
3513 
3514 /* Mark a temporary as dead.  */
3515 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3516 {
3517     temp_free_or_dead(s, ts, 1);
3518 }
3519 
3520 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3521    registers needs to be allocated to store a constant.  If 'free_or_dead'
3522    is non-zero, subsequently release the temporary; if it is positive, the
3523    temp is dead; if it is negative, the temp is free.  */
3524 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3525                       TCGRegSet preferred_regs, int free_or_dead)
3526 {
3527     if (!temp_readonly(ts) && !ts->mem_coherent) {
3528         if (!ts->mem_allocated) {
3529             temp_allocate_frame(s, ts);
3530         }
3531         switch (ts->val_type) {
3532         case TEMP_VAL_CONST:
3533             /* If we're going to free the temp immediately, then we won't
3534                require it later in a register, so attempt to store the
3535                constant to memory directly.  */
3536             if (free_or_dead
3537                 && tcg_out_sti(s, ts->type, ts->val,
3538                                ts->mem_base->reg, ts->mem_offset)) {
3539                 break;
3540             }
3541             temp_load(s, ts, tcg_target_available_regs[ts->type],
3542                       allocated_regs, preferred_regs);
3543             /* fallthrough */
3544 
3545         case TEMP_VAL_REG:
3546             tcg_out_st(s, ts->type, ts->reg,
3547                        ts->mem_base->reg, ts->mem_offset);
3548             break;
3549 
3550         case TEMP_VAL_MEM:
3551             break;
3552 
3553         case TEMP_VAL_DEAD:
3554         default:
3555             tcg_abort();
3556         }
3557         ts->mem_coherent = 1;
3558     }
3559     if (free_or_dead) {
3560         temp_free_or_dead(s, ts, free_or_dead);
3561     }
3562 }
3563 
3564 /* free register 'reg' by spilling the corresponding temporary if necessary */
3565 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3566 {
3567     TCGTemp *ts = s->reg_to_temp[reg];
3568     if (ts != NULL) {
3569         temp_sync(s, ts, allocated_regs, 0, -1);
3570     }
3571 }
3572 
3573 /**
3574  * tcg_reg_alloc:
3575  * @required_regs: Set of registers in which we must allocate.
3576  * @allocated_regs: Set of registers which must be avoided.
3577  * @preferred_regs: Set of registers we should prefer.
3578  * @rev: True if we search the registers in "indirect" order.
3579  *
3580  * The allocated register must be in @required_regs & ~@allocated_regs,
3581  * but if we can put it in @preferred_regs we may save a move later.
3582  */
3583 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3584                             TCGRegSet allocated_regs,
3585                             TCGRegSet preferred_regs, bool rev)
3586 {
3587     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3588     TCGRegSet reg_ct[2];
3589     const int *order;
3590 
3591     reg_ct[1] = required_regs & ~allocated_regs;
3592     tcg_debug_assert(reg_ct[1] != 0);
3593     reg_ct[0] = reg_ct[1] & preferred_regs;
3594 
3595     /* Skip the preferred_regs option if it cannot be satisfied,
3596        or if the preference made no difference.  */
3597     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3598 
3599     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3600 
3601     /* Try free registers, preferences first.  */
3602     for (j = f; j < 2; j++) {
3603         TCGRegSet set = reg_ct[j];
3604 
3605         if (tcg_regset_single(set)) {
3606             /* One register in the set.  */
3607             TCGReg reg = tcg_regset_first(set);
3608             if (s->reg_to_temp[reg] == NULL) {
3609                 return reg;
3610             }
3611         } else {
3612             for (i = 0; i < n; i++) {
3613                 TCGReg reg = order[i];
3614                 if (s->reg_to_temp[reg] == NULL &&
3615                     tcg_regset_test_reg(set, reg)) {
3616                     return reg;
3617                 }
3618             }
3619         }
3620     }
3621 
3622     /* We must spill something.  */
3623     for (j = f; j < 2; j++) {
3624         TCGRegSet set = reg_ct[j];
3625 
3626         if (tcg_regset_single(set)) {
3627             /* One register in the set.  */
3628             TCGReg reg = tcg_regset_first(set);
3629             tcg_reg_free(s, reg, allocated_regs);
3630             return reg;
3631         } else {
3632             for (i = 0; i < n; i++) {
3633                 TCGReg reg = order[i];
3634                 if (tcg_regset_test_reg(set, reg)) {
3635                     tcg_reg_free(s, reg, allocated_regs);
3636                     return reg;
3637                 }
3638             }
3639         }
3640     }
3641 
3642     tcg_abort();
3643 }
3644 
3645 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
3646                                  TCGRegSet allocated_regs,
3647                                  TCGRegSet preferred_regs, bool rev)
3648 {
3649     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3650     TCGRegSet reg_ct[2];
3651     const int *order;
3652 
3653     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
3654     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
3655     tcg_debug_assert(reg_ct[1] != 0);
3656     reg_ct[0] = reg_ct[1] & preferred_regs;
3657 
3658     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3659 
3660     /*
3661      * Skip the preferred_regs option if it cannot be satisfied,
3662      * or if the preference made no difference.
3663      */
3664     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3665 
3666     /*
3667      * Minimize the number of flushes by looking for 2 free registers first,
3668      * then a single flush, then two flushes.
3669      */
3670     for (fmin = 2; fmin >= 0; fmin--) {
3671         for (j = k; j < 2; j++) {
3672             TCGRegSet set = reg_ct[j];
3673 
3674             for (i = 0; i < n; i++) {
3675                 TCGReg reg = order[i];
3676 
3677                 if (tcg_regset_test_reg(set, reg)) {
3678                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
3679                     if (f >= fmin) {
3680                         tcg_reg_free(s, reg, allocated_regs);
3681                         tcg_reg_free(s, reg + 1, allocated_regs);
3682                         return reg;
3683                     }
3684                 }
3685             }
3686         }
3687     }
3688     tcg_abort();
3689 }
3690 
3691 /* Make sure the temporary is in a register.  If needed, allocate the register
3692    from DESIRED while avoiding ALLOCATED.  */
3693 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3694                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3695 {
3696     TCGReg reg;
3697 
3698     switch (ts->val_type) {
3699     case TEMP_VAL_REG:
3700         return;
3701     case TEMP_VAL_CONST:
3702         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3703                             preferred_regs, ts->indirect_base);
3704         if (ts->type <= TCG_TYPE_I64) {
3705             tcg_out_movi(s, ts->type, reg, ts->val);
3706         } else {
3707             uint64_t val = ts->val;
3708             MemOp vece = MO_64;
3709 
3710             /*
3711              * Find the minimal vector element that matches the constant.
3712              * The targets will, in general, have to do this search anyway,
3713              * do this generically.
3714              */
3715             if (val == dup_const(MO_8, val)) {
3716                 vece = MO_8;
3717             } else if (val == dup_const(MO_16, val)) {
3718                 vece = MO_16;
3719             } else if (val == dup_const(MO_32, val)) {
3720                 vece = MO_32;
3721             }
3722 
3723             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3724         }
3725         ts->mem_coherent = 0;
3726         break;
3727     case TEMP_VAL_MEM:
3728         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3729                             preferred_regs, ts->indirect_base);
3730         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3731         ts->mem_coherent = 1;
3732         break;
3733     case TEMP_VAL_DEAD:
3734     default:
3735         tcg_abort();
3736     }
3737     set_temp_val_reg(s, ts, reg);
3738 }
3739 
3740 /* Save a temporary to memory. 'allocated_regs' is used in case a
3741    temporary registers needs to be allocated to store a constant.  */
3742 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3743 {
3744     /* The liveness analysis already ensures that globals are back
3745        in memory. Keep an tcg_debug_assert for safety. */
3746     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3747 }
3748 
3749 /* save globals to their canonical location and assume they can be
3750    modified be the following code. 'allocated_regs' is used in case a
3751    temporary registers needs to be allocated to store a constant. */
3752 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3753 {
3754     int i, n;
3755 
3756     for (i = 0, n = s->nb_globals; i < n; i++) {
3757         temp_save(s, &s->temps[i], allocated_regs);
3758     }
3759 }
3760 
3761 /* sync globals to their canonical location and assume they can be
3762    read by the following code. 'allocated_regs' is used in case a
3763    temporary registers needs to be allocated to store a constant. */
3764 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3765 {
3766     int i, n;
3767 
3768     for (i = 0, n = s->nb_globals; i < n; i++) {
3769         TCGTemp *ts = &s->temps[i];
3770         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3771                          || ts->kind == TEMP_FIXED
3772                          || ts->mem_coherent);
3773     }
3774 }
3775 
3776 /* at the end of a basic block, we assume all temporaries are dead and
3777    all globals are stored at their canonical location. */
3778 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3779 {
3780     int i;
3781 
3782     for (i = s->nb_globals; i < s->nb_temps; i++) {
3783         TCGTemp *ts = &s->temps[i];
3784 
3785         switch (ts->kind) {
3786         case TEMP_LOCAL:
3787             temp_save(s, ts, allocated_regs);
3788             break;
3789         case TEMP_NORMAL:
3790         case TEMP_EBB:
3791             /* The liveness analysis already ensures that temps are dead.
3792                Keep an tcg_debug_assert for safety. */
3793             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3794             break;
3795         case TEMP_CONST:
3796             /* Similarly, we should have freed any allocated register. */
3797             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3798             break;
3799         default:
3800             g_assert_not_reached();
3801         }
3802     }
3803 
3804     save_globals(s, allocated_regs);
3805 }
3806 
3807 /*
3808  * At a conditional branch, we assume all temporaries are dead unless
3809  * explicitly live-across-conditional-branch; all globals and local
3810  * temps are synced to their location.
3811  */
3812 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3813 {
3814     sync_globals(s, allocated_regs);
3815 
3816     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3817         TCGTemp *ts = &s->temps[i];
3818         /*
3819          * The liveness analysis already ensures that temps are dead.
3820          * Keep tcg_debug_asserts for safety.
3821          */
3822         switch (ts->kind) {
3823         case TEMP_LOCAL:
3824             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3825             break;
3826         case TEMP_NORMAL:
3827             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3828             break;
3829         case TEMP_EBB:
3830         case TEMP_CONST:
3831             break;
3832         default:
3833             g_assert_not_reached();
3834         }
3835     }
3836 }
3837 
3838 /*
3839  * Specialized code generation for INDEX_op_mov_* with a constant.
3840  */
3841 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3842                                   tcg_target_ulong val, TCGLifeData arg_life,
3843                                   TCGRegSet preferred_regs)
3844 {
3845     /* ENV should not be modified.  */
3846     tcg_debug_assert(!temp_readonly(ots));
3847 
3848     /* The movi is not explicitly generated here.  */
3849     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
3850     ots->val = val;
3851     ots->mem_coherent = 0;
3852     if (NEED_SYNC_ARG(0)) {
3853         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3854     } else if (IS_DEAD_ARG(0)) {
3855         temp_dead(s, ots);
3856     }
3857 }
3858 
3859 /*
3860  * Specialized code generation for INDEX_op_mov_*.
3861  */
3862 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3863 {
3864     const TCGLifeData arg_life = op->life;
3865     TCGRegSet allocated_regs, preferred_regs;
3866     TCGTemp *ts, *ots;
3867     TCGType otype, itype;
3868     TCGReg oreg, ireg;
3869 
3870     allocated_regs = s->reserved_regs;
3871     preferred_regs = output_pref(op, 0);
3872     ots = arg_temp(op->args[0]);
3873     ts = arg_temp(op->args[1]);
3874 
3875     /* ENV should not be modified.  */
3876     tcg_debug_assert(!temp_readonly(ots));
3877 
3878     /* Note that otype != itype for no-op truncation.  */
3879     otype = ots->type;
3880     itype = ts->type;
3881 
3882     if (ts->val_type == TEMP_VAL_CONST) {
3883         /* propagate constant or generate sti */
3884         tcg_target_ulong val = ts->val;
3885         if (IS_DEAD_ARG(1)) {
3886             temp_dead(s, ts);
3887         }
3888         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3889         return;
3890     }
3891 
3892     /* If the source value is in memory we're going to be forced
3893        to have it in a register in order to perform the copy.  Copy
3894        the SOURCE value into its own register first, that way we
3895        don't have to reload SOURCE the next time it is used. */
3896     if (ts->val_type == TEMP_VAL_MEM) {
3897         temp_load(s, ts, tcg_target_available_regs[itype],
3898                   allocated_regs, preferred_regs);
3899     }
3900     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3901     ireg = ts->reg;
3902 
3903     if (IS_DEAD_ARG(0)) {
3904         /* mov to a non-saved dead register makes no sense (even with
3905            liveness analysis disabled). */
3906         tcg_debug_assert(NEED_SYNC_ARG(0));
3907         if (!ots->mem_allocated) {
3908             temp_allocate_frame(s, ots);
3909         }
3910         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
3911         if (IS_DEAD_ARG(1)) {
3912             temp_dead(s, ts);
3913         }
3914         temp_dead(s, ots);
3915         return;
3916     }
3917 
3918     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3919         /*
3920          * The mov can be suppressed.  Kill input first, so that it
3921          * is unlinked from reg_to_temp, then set the output to the
3922          * reg that we saved from the input.
3923          */
3924         temp_dead(s, ts);
3925         oreg = ireg;
3926     } else {
3927         if (ots->val_type == TEMP_VAL_REG) {
3928             oreg = ots->reg;
3929         } else {
3930             /* Make sure to not spill the input register during allocation. */
3931             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3932                                  allocated_regs | ((TCGRegSet)1 << ireg),
3933                                  preferred_regs, ots->indirect_base);
3934         }
3935         if (!tcg_out_mov(s, otype, oreg, ireg)) {
3936             /*
3937              * Cross register class move not supported.
3938              * Store the source register into the destination slot
3939              * and leave the destination temp as TEMP_VAL_MEM.
3940              */
3941             assert(!temp_readonly(ots));
3942             if (!ts->mem_allocated) {
3943                 temp_allocate_frame(s, ots);
3944             }
3945             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
3946             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
3947             ots->mem_coherent = 1;
3948             return;
3949         }
3950     }
3951     set_temp_val_reg(s, ots, oreg);
3952     ots->mem_coherent = 0;
3953 
3954     if (NEED_SYNC_ARG(0)) {
3955         temp_sync(s, ots, allocated_regs, 0, 0);
3956     }
3957 }
3958 
3959 /*
3960  * Specialized code generation for INDEX_op_dup_vec.
3961  */
3962 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3963 {
3964     const TCGLifeData arg_life = op->life;
3965     TCGRegSet dup_out_regs, dup_in_regs;
3966     TCGTemp *its, *ots;
3967     TCGType itype, vtype;
3968     unsigned vece;
3969     int lowpart_ofs;
3970     bool ok;
3971 
3972     ots = arg_temp(op->args[0]);
3973     its = arg_temp(op->args[1]);
3974 
3975     /* ENV should not be modified.  */
3976     tcg_debug_assert(!temp_readonly(ots));
3977 
3978     itype = its->type;
3979     vece = TCGOP_VECE(op);
3980     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3981 
3982     if (its->val_type == TEMP_VAL_CONST) {
3983         /* Propagate constant via movi -> dupi.  */
3984         tcg_target_ulong val = its->val;
3985         if (IS_DEAD_ARG(1)) {
3986             temp_dead(s, its);
3987         }
3988         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
3989         return;
3990     }
3991 
3992     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3993     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3994 
3995     /* Allocate the output register now.  */
3996     if (ots->val_type != TEMP_VAL_REG) {
3997         TCGRegSet allocated_regs = s->reserved_regs;
3998         TCGReg oreg;
3999 
4000         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4001             /* Make sure to not spill the input register. */
4002             tcg_regset_set_reg(allocated_regs, its->reg);
4003         }
4004         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4005                              output_pref(op, 0), ots->indirect_base);
4006         set_temp_val_reg(s, ots, oreg);
4007     }
4008 
4009     switch (its->val_type) {
4010     case TEMP_VAL_REG:
4011         /*
4012          * The dup constriaints must be broad, covering all possible VECE.
4013          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4014          * to fail, indicating that extra moves are required for that case.
4015          */
4016         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4017             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4018                 goto done;
4019             }
4020             /* Try again from memory or a vector input register.  */
4021         }
4022         if (!its->mem_coherent) {
4023             /*
4024              * The input register is not synced, and so an extra store
4025              * would be required to use memory.  Attempt an integer-vector
4026              * register move first.  We do not have a TCGRegSet for this.
4027              */
4028             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4029                 break;
4030             }
4031             /* Sync the temp back to its slot and load from there.  */
4032             temp_sync(s, its, s->reserved_regs, 0, 0);
4033         }
4034         /* fall through */
4035 
4036     case TEMP_VAL_MEM:
4037         lowpart_ofs = 0;
4038         if (HOST_BIG_ENDIAN) {
4039             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4040         }
4041         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4042                              its->mem_offset + lowpart_ofs)) {
4043             goto done;
4044         }
4045         /* Load the input into the destination vector register. */
4046         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4047         break;
4048 
4049     default:
4050         g_assert_not_reached();
4051     }
4052 
4053     /* We now have a vector input register, so dup must succeed. */
4054     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4055     tcg_debug_assert(ok);
4056 
4057  done:
4058     ots->mem_coherent = 0;
4059     if (IS_DEAD_ARG(1)) {
4060         temp_dead(s, its);
4061     }
4062     if (NEED_SYNC_ARG(0)) {
4063         temp_sync(s, ots, s->reserved_regs, 0, 0);
4064     }
4065     if (IS_DEAD_ARG(0)) {
4066         temp_dead(s, ots);
4067     }
4068 }
4069 
4070 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4071 {
4072     const TCGLifeData arg_life = op->life;
4073     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4074     TCGRegSet i_allocated_regs;
4075     TCGRegSet o_allocated_regs;
4076     int i, k, nb_iargs, nb_oargs;
4077     TCGReg reg;
4078     TCGArg arg;
4079     const TCGArgConstraint *arg_ct;
4080     TCGTemp *ts;
4081     TCGArg new_args[TCG_MAX_OP_ARGS];
4082     int const_args[TCG_MAX_OP_ARGS];
4083 
4084     nb_oargs = def->nb_oargs;
4085     nb_iargs = def->nb_iargs;
4086 
4087     /* copy constants */
4088     memcpy(new_args + nb_oargs + nb_iargs,
4089            op->args + nb_oargs + nb_iargs,
4090            sizeof(TCGArg) * def->nb_cargs);
4091 
4092     i_allocated_regs = s->reserved_regs;
4093     o_allocated_regs = s->reserved_regs;
4094 
4095     /* satisfy input constraints */
4096     for (k = 0; k < nb_iargs; k++) {
4097         TCGRegSet i_preferred_regs, i_required_regs;
4098         bool allocate_new_reg, copyto_new_reg;
4099         TCGTemp *ts2;
4100         int i1, i2;
4101 
4102         i = def->args_ct[nb_oargs + k].sort_index;
4103         arg = op->args[i];
4104         arg_ct = &def->args_ct[i];
4105         ts = arg_temp(arg);
4106 
4107         if (ts->val_type == TEMP_VAL_CONST
4108             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4109             /* constant is OK for instruction */
4110             const_args[i] = 1;
4111             new_args[i] = ts->val;
4112             continue;
4113         }
4114 
4115         reg = ts->reg;
4116         i_preferred_regs = 0;
4117         i_required_regs = arg_ct->regs;
4118         allocate_new_reg = false;
4119         copyto_new_reg = false;
4120 
4121         switch (arg_ct->pair) {
4122         case 0: /* not paired */
4123             if (arg_ct->ialias) {
4124                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4125 
4126                 /*
4127                  * If the input is readonly, then it cannot also be an
4128                  * output and aliased to itself.  If the input is not
4129                  * dead after the instruction, we must allocate a new
4130                  * register and move it.
4131                  */
4132                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4133                     allocate_new_reg = true;
4134                 } else if (ts->val_type == TEMP_VAL_REG) {
4135                     /*
4136                      * Check if the current register has already been
4137                      * allocated for another input.
4138                      */
4139                     allocate_new_reg =
4140                         tcg_regset_test_reg(i_allocated_regs, reg);
4141                 }
4142             }
4143             if (!allocate_new_reg) {
4144                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4145                           i_preferred_regs);
4146                 reg = ts->reg;
4147                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4148             }
4149             if (allocate_new_reg) {
4150                 /*
4151                  * Allocate a new register matching the constraint
4152                  * and move the temporary register into it.
4153                  */
4154                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4155                           i_allocated_regs, 0);
4156                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4157                                     i_preferred_regs, ts->indirect_base);
4158                 copyto_new_reg = true;
4159             }
4160             break;
4161 
4162         case 1:
4163             /* First of an input pair; if i1 == i2, the second is an output. */
4164             i1 = i;
4165             i2 = arg_ct->pair_index;
4166             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4167 
4168             /*
4169              * It is easier to default to allocating a new pair
4170              * and to identify a few cases where it's not required.
4171              */
4172             if (arg_ct->ialias) {
4173                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4174                 if (IS_DEAD_ARG(i1) &&
4175                     IS_DEAD_ARG(i2) &&
4176                     !temp_readonly(ts) &&
4177                     ts->val_type == TEMP_VAL_REG &&
4178                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4179                     tcg_regset_test_reg(i_required_regs, reg) &&
4180                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4181                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4182                     (ts2
4183                      ? ts2->val_type == TEMP_VAL_REG &&
4184                        ts2->reg == reg + 1 &&
4185                        !temp_readonly(ts2)
4186                      : s->reg_to_temp[reg + 1] == NULL)) {
4187                     break;
4188                 }
4189             } else {
4190                 /* Without aliasing, the pair must also be an input. */
4191                 tcg_debug_assert(ts2);
4192                 if (ts->val_type == TEMP_VAL_REG &&
4193                     ts2->val_type == TEMP_VAL_REG &&
4194                     ts2->reg == reg + 1 &&
4195                     tcg_regset_test_reg(i_required_regs, reg)) {
4196                     break;
4197                 }
4198             }
4199             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4200                                      0, ts->indirect_base);
4201             goto do_pair;
4202 
4203         case 2: /* pair second */
4204             reg = new_args[arg_ct->pair_index] + 1;
4205             goto do_pair;
4206 
4207         case 3: /* ialias with second output, no first input */
4208             tcg_debug_assert(arg_ct->ialias);
4209             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4210 
4211             if (IS_DEAD_ARG(i) &&
4212                 !temp_readonly(ts) &&
4213                 ts->val_type == TEMP_VAL_REG &&
4214                 reg > 0 &&
4215                 s->reg_to_temp[reg - 1] == NULL &&
4216                 tcg_regset_test_reg(i_required_regs, reg) &&
4217                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4218                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4219                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4220                 break;
4221             }
4222             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4223                                      i_allocated_regs, 0,
4224                                      ts->indirect_base);
4225             tcg_regset_set_reg(i_allocated_regs, reg);
4226             reg += 1;
4227             goto do_pair;
4228 
4229         do_pair:
4230             /*
4231              * If an aliased input is not dead after the instruction,
4232              * we must allocate a new register and move it.
4233              */
4234             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4235                 TCGRegSet t_allocated_regs = i_allocated_regs;
4236 
4237                 /*
4238                  * Because of the alias, and the continued life, make sure
4239                  * that the temp is somewhere *other* than the reg pair,
4240                  * and we get a copy in reg.
4241                  */
4242                 tcg_regset_set_reg(t_allocated_regs, reg);
4243                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4244                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4245                     /* If ts was already in reg, copy it somewhere else. */
4246                     TCGReg nr;
4247                     bool ok;
4248 
4249                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4250                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4251                                        t_allocated_regs, 0, ts->indirect_base);
4252                     ok = tcg_out_mov(s, ts->type, nr, reg);
4253                     tcg_debug_assert(ok);
4254 
4255                     set_temp_val_reg(s, ts, nr);
4256                 } else {
4257                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4258                               t_allocated_regs, 0);
4259                     copyto_new_reg = true;
4260                 }
4261             } else {
4262                 /* Preferably allocate to reg, otherwise copy. */
4263                 i_required_regs = (TCGRegSet)1 << reg;
4264                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4265                           i_preferred_regs);
4266                 copyto_new_reg = ts->reg != reg;
4267             }
4268             break;
4269 
4270         default:
4271             g_assert_not_reached();
4272         }
4273 
4274         if (copyto_new_reg) {
4275             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4276                 /*
4277                  * Cross register class move not supported.  Sync the
4278                  * temp back to its slot and load from there.
4279                  */
4280                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4281                 tcg_out_ld(s, ts->type, reg,
4282                            ts->mem_base->reg, ts->mem_offset);
4283             }
4284         }
4285         new_args[i] = reg;
4286         const_args[i] = 0;
4287         tcg_regset_set_reg(i_allocated_regs, reg);
4288     }
4289 
4290     /* mark dead temporaries and free the associated registers */
4291     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4292         if (IS_DEAD_ARG(i)) {
4293             temp_dead(s, arg_temp(op->args[i]));
4294         }
4295     }
4296 
4297     if (def->flags & TCG_OPF_COND_BRANCH) {
4298         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4299     } else if (def->flags & TCG_OPF_BB_END) {
4300         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4301     } else {
4302         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4303             /* XXX: permit generic clobber register list ? */
4304             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4305                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4306                     tcg_reg_free(s, i, i_allocated_regs);
4307                 }
4308             }
4309         }
4310         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4311             /* sync globals if the op has side effects and might trigger
4312                an exception. */
4313             sync_globals(s, i_allocated_regs);
4314         }
4315 
4316         /* satisfy the output constraints */
4317         for(k = 0; k < nb_oargs; k++) {
4318             i = def->args_ct[k].sort_index;
4319             arg = op->args[i];
4320             arg_ct = &def->args_ct[i];
4321             ts = arg_temp(arg);
4322 
4323             /* ENV should not be modified.  */
4324             tcg_debug_assert(!temp_readonly(ts));
4325 
4326             switch (arg_ct->pair) {
4327             case 0: /* not paired */
4328                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4329                     reg = new_args[arg_ct->alias_index];
4330                 } else if (arg_ct->newreg) {
4331                     reg = tcg_reg_alloc(s, arg_ct->regs,
4332                                         i_allocated_regs | o_allocated_regs,
4333                                         output_pref(op, k), ts->indirect_base);
4334                 } else {
4335                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4336                                         output_pref(op, k), ts->indirect_base);
4337                 }
4338                 break;
4339 
4340             case 1: /* first of pair */
4341                 tcg_debug_assert(!arg_ct->newreg);
4342                 if (arg_ct->oalias) {
4343                     reg = new_args[arg_ct->alias_index];
4344                     break;
4345                 }
4346                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4347                                          output_pref(op, k), ts->indirect_base);
4348                 break;
4349 
4350             case 2: /* second of pair */
4351                 tcg_debug_assert(!arg_ct->newreg);
4352                 if (arg_ct->oalias) {
4353                     reg = new_args[arg_ct->alias_index];
4354                 } else {
4355                     reg = new_args[arg_ct->pair_index] + 1;
4356                 }
4357                 break;
4358 
4359             case 3: /* first of pair, aliasing with a second input */
4360                 tcg_debug_assert(!arg_ct->newreg);
4361                 reg = new_args[arg_ct->pair_index] - 1;
4362                 break;
4363 
4364             default:
4365                 g_assert_not_reached();
4366             }
4367             tcg_regset_set_reg(o_allocated_regs, reg);
4368             set_temp_val_reg(s, ts, reg);
4369             ts->mem_coherent = 0;
4370             new_args[i] = reg;
4371         }
4372     }
4373 
4374     /* emit instruction */
4375     if (def->flags & TCG_OPF_VECTOR) {
4376         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4377                        new_args, const_args);
4378     } else {
4379         tcg_out_op(s, op->opc, new_args, const_args);
4380     }
4381 
4382     /* move the outputs in the correct register if needed */
4383     for(i = 0; i < nb_oargs; i++) {
4384         ts = arg_temp(op->args[i]);
4385 
4386         /* ENV should not be modified.  */
4387         tcg_debug_assert(!temp_readonly(ts));
4388 
4389         if (NEED_SYNC_ARG(i)) {
4390             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4391         } else if (IS_DEAD_ARG(i)) {
4392             temp_dead(s, ts);
4393         }
4394     }
4395 }
4396 
4397 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4398 {
4399     const TCGLifeData arg_life = op->life;
4400     TCGTemp *ots, *itsl, *itsh;
4401     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4402 
4403     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4404     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4405     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4406 
4407     ots = arg_temp(op->args[0]);
4408     itsl = arg_temp(op->args[1]);
4409     itsh = arg_temp(op->args[2]);
4410 
4411     /* ENV should not be modified.  */
4412     tcg_debug_assert(!temp_readonly(ots));
4413 
4414     /* Allocate the output register now.  */
4415     if (ots->val_type != TEMP_VAL_REG) {
4416         TCGRegSet allocated_regs = s->reserved_regs;
4417         TCGRegSet dup_out_regs =
4418             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4419         TCGReg oreg;
4420 
4421         /* Make sure to not spill the input registers. */
4422         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4423             tcg_regset_set_reg(allocated_regs, itsl->reg);
4424         }
4425         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4426             tcg_regset_set_reg(allocated_regs, itsh->reg);
4427         }
4428 
4429         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4430                              output_pref(op, 0), ots->indirect_base);
4431         set_temp_val_reg(s, ots, oreg);
4432     }
4433 
4434     /* Promote dup2 of immediates to dupi_vec. */
4435     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4436         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4437         MemOp vece = MO_64;
4438 
4439         if (val == dup_const(MO_8, val)) {
4440             vece = MO_8;
4441         } else if (val == dup_const(MO_16, val)) {
4442             vece = MO_16;
4443         } else if (val == dup_const(MO_32, val)) {
4444             vece = MO_32;
4445         }
4446 
4447         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4448         goto done;
4449     }
4450 
4451     /* If the two inputs form one 64-bit value, try dupm_vec. */
4452     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
4453         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
4454         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
4455         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
4456 
4457         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
4458         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
4459 
4460         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4461                              its->mem_base->reg, its->mem_offset)) {
4462             goto done;
4463         }
4464     }
4465 
4466     /* Fall back to generic expansion. */
4467     return false;
4468 
4469  done:
4470     ots->mem_coherent = 0;
4471     if (IS_DEAD_ARG(1)) {
4472         temp_dead(s, itsl);
4473     }
4474     if (IS_DEAD_ARG(2)) {
4475         temp_dead(s, itsh);
4476     }
4477     if (NEED_SYNC_ARG(0)) {
4478         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4479     } else if (IS_DEAD_ARG(0)) {
4480         temp_dead(s, ots);
4481     }
4482     return true;
4483 }
4484 
4485 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
4486                          TCGRegSet allocated_regs)
4487 {
4488     if (ts->val_type == TEMP_VAL_REG) {
4489         if (ts->reg != reg) {
4490             tcg_reg_free(s, reg, allocated_regs);
4491             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4492                 /*
4493                  * Cross register class move not supported.  Sync the
4494                  * temp back to its slot and load from there.
4495                  */
4496                 temp_sync(s, ts, allocated_regs, 0, 0);
4497                 tcg_out_ld(s, ts->type, reg,
4498                            ts->mem_base->reg, ts->mem_offset);
4499             }
4500         }
4501     } else {
4502         TCGRegSet arg_set = 0;
4503 
4504         tcg_reg_free(s, reg, allocated_regs);
4505         tcg_regset_set_reg(arg_set, reg);
4506         temp_load(s, ts, arg_set, allocated_regs, 0);
4507     }
4508 }
4509 
4510 static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts,
4511                          TCGRegSet allocated_regs)
4512 {
4513     /*
4514      * When the destination is on the stack, load up the temp and store.
4515      * If there are many call-saved registers, the temp might live to
4516      * see another use; otherwise it'll be discarded.
4517      */
4518     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
4519     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
4520                TCG_TARGET_CALL_STACK_OFFSET +
4521                stk_slot * sizeof(tcg_target_long));
4522 }
4523 
4524 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
4525                             TCGTemp *ts, TCGRegSet *allocated_regs)
4526 {
4527     if (REG_P(l)) {
4528         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
4529         load_arg_reg(s, reg, ts, *allocated_regs);
4530         tcg_regset_set_reg(*allocated_regs, reg);
4531     } else {
4532         load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs),
4533                      ts, *allocated_regs);
4534     }
4535 }
4536 
4537 static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base,
4538                          intptr_t ref_off, TCGRegSet *allocated_regs)
4539 {
4540     TCGReg reg;
4541     int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
4542 
4543     if (stk_slot < 0) {
4544         reg = tcg_target_call_iarg_regs[arg_slot];
4545         tcg_reg_free(s, reg, *allocated_regs);
4546         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4547         tcg_regset_set_reg(*allocated_regs, reg);
4548     } else {
4549         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
4550                             *allocated_regs, 0, false);
4551         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4552         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
4553                    TCG_TARGET_CALL_STACK_OFFSET
4554                    + stk_slot * sizeof(tcg_target_long));
4555     }
4556 }
4557 
4558 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4559 {
4560     const int nb_oargs = TCGOP_CALLO(op);
4561     const int nb_iargs = TCGOP_CALLI(op);
4562     const TCGLifeData arg_life = op->life;
4563     const TCGHelperInfo *info = tcg_call_info(op);
4564     TCGRegSet allocated_regs = s->reserved_regs;
4565     int i;
4566 
4567     /*
4568      * Move inputs into place in reverse order,
4569      * so that we place stacked arguments first.
4570      */
4571     for (i = nb_iargs - 1; i >= 0; --i) {
4572         const TCGCallArgumentLoc *loc = &info->in[i];
4573         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
4574 
4575         switch (loc->kind) {
4576         case TCG_CALL_ARG_NORMAL:
4577         case TCG_CALL_ARG_EXTEND_U:
4578         case TCG_CALL_ARG_EXTEND_S:
4579             load_arg_normal(s, loc, ts, &allocated_regs);
4580             break;
4581         case TCG_CALL_ARG_BY_REF:
4582             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4583             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
4584                          TCG_TARGET_CALL_STACK_OFFSET
4585                          + loc->ref_slot * sizeof(tcg_target_long),
4586                          &allocated_regs);
4587             break;
4588         case TCG_CALL_ARG_BY_REF_N:
4589             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4590             break;
4591         default:
4592             g_assert_not_reached();
4593         }
4594     }
4595 
4596     /* Mark dead temporaries and free the associated registers.  */
4597     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4598         if (IS_DEAD_ARG(i)) {
4599             temp_dead(s, arg_temp(op->args[i]));
4600         }
4601     }
4602 
4603     /* Clobber call registers.  */
4604     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4605         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4606             tcg_reg_free(s, i, allocated_regs);
4607         }
4608     }
4609 
4610     /*
4611      * Save globals if they might be written by the helper,
4612      * sync them if they might be read.
4613      */
4614     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
4615         /* Nothing to do */
4616     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
4617         sync_globals(s, allocated_regs);
4618     } else {
4619         save_globals(s, allocated_regs);
4620     }
4621 
4622     /*
4623      * If the ABI passes a pointer to the returned struct as the first
4624      * argument, load that now.  Pass a pointer to the output home slot.
4625      */
4626     if (info->out_kind == TCG_CALL_RET_BY_REF) {
4627         TCGTemp *ts = arg_temp(op->args[0]);
4628 
4629         if (!ts->mem_allocated) {
4630             temp_allocate_frame(s, ts);
4631         }
4632         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
4633     }
4634 
4635     tcg_out_call(s, tcg_call_func(op), info);
4636 
4637     /* Assign output registers and emit moves if needed.  */
4638     switch (info->out_kind) {
4639     case TCG_CALL_RET_NORMAL:
4640         for (i = 0; i < nb_oargs; i++) {
4641             TCGTemp *ts = arg_temp(op->args[i]);
4642             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
4643 
4644             /* ENV should not be modified.  */
4645             tcg_debug_assert(!temp_readonly(ts));
4646 
4647             set_temp_val_reg(s, ts, reg);
4648             ts->mem_coherent = 0;
4649         }
4650         break;
4651 
4652     case TCG_CALL_RET_BY_VEC:
4653         {
4654             TCGTemp *ts = arg_temp(op->args[0]);
4655 
4656             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
4657             tcg_debug_assert(ts->temp_subindex == 0);
4658             if (!ts->mem_allocated) {
4659                 temp_allocate_frame(s, ts);
4660             }
4661             tcg_out_st(s, TCG_TYPE_V128,
4662                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
4663                        ts->mem_base->reg, ts->mem_offset);
4664         }
4665         /* fall through to mark all parts in memory */
4666 
4667     case TCG_CALL_RET_BY_REF:
4668         /* The callee has performed a write through the reference. */
4669         for (i = 0; i < nb_oargs; i++) {
4670             TCGTemp *ts = arg_temp(op->args[i]);
4671             ts->val_type = TEMP_VAL_MEM;
4672         }
4673         break;
4674 
4675     default:
4676         g_assert_not_reached();
4677     }
4678 
4679     /* Flush or discard output registers as needed. */
4680     for (i = 0; i < nb_oargs; i++) {
4681         TCGTemp *ts = arg_temp(op->args[i]);
4682         if (NEED_SYNC_ARG(i)) {
4683             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
4684         } else if (IS_DEAD_ARG(i)) {
4685             temp_dead(s, ts);
4686         }
4687     }
4688 }
4689 
4690 #ifdef CONFIG_PROFILER
4691 
4692 /* avoid copy/paste errors */
4693 #define PROF_ADD(to, from, field)                       \
4694     do {                                                \
4695         (to)->field += qatomic_read(&((from)->field));  \
4696     } while (0)
4697 
4698 #define PROF_MAX(to, from, field)                                       \
4699     do {                                                                \
4700         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4701         if (val__ > (to)->field) {                                      \
4702             (to)->field = val__;                                        \
4703         }                                                               \
4704     } while (0)
4705 
4706 /* Pass in a zero'ed @prof */
4707 static inline
4708 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4709 {
4710     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4711     unsigned int i;
4712 
4713     for (i = 0; i < n_ctxs; i++) {
4714         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4715         const TCGProfile *orig = &s->prof;
4716 
4717         if (counters) {
4718             PROF_ADD(prof, orig, cpu_exec_time);
4719             PROF_ADD(prof, orig, tb_count1);
4720             PROF_ADD(prof, orig, tb_count);
4721             PROF_ADD(prof, orig, op_count);
4722             PROF_MAX(prof, orig, op_count_max);
4723             PROF_ADD(prof, orig, temp_count);
4724             PROF_MAX(prof, orig, temp_count_max);
4725             PROF_ADD(prof, orig, del_op_count);
4726             PROF_ADD(prof, orig, code_in_len);
4727             PROF_ADD(prof, orig, code_out_len);
4728             PROF_ADD(prof, orig, search_out_len);
4729             PROF_ADD(prof, orig, interm_time);
4730             PROF_ADD(prof, orig, code_time);
4731             PROF_ADD(prof, orig, la_time);
4732             PROF_ADD(prof, orig, opt_time);
4733             PROF_ADD(prof, orig, restore_count);
4734             PROF_ADD(prof, orig, restore_time);
4735         }
4736         if (table) {
4737             int i;
4738 
4739             for (i = 0; i < NB_OPS; i++) {
4740                 PROF_ADD(prof, orig, table_op_count[i]);
4741             }
4742         }
4743     }
4744 }
4745 
4746 #undef PROF_ADD
4747 #undef PROF_MAX
4748 
4749 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4750 {
4751     tcg_profile_snapshot(prof, true, false);
4752 }
4753 
4754 static void tcg_profile_snapshot_table(TCGProfile *prof)
4755 {
4756     tcg_profile_snapshot(prof, false, true);
4757 }
4758 
4759 void tcg_dump_op_count(GString *buf)
4760 {
4761     TCGProfile prof = {};
4762     int i;
4763 
4764     tcg_profile_snapshot_table(&prof);
4765     for (i = 0; i < NB_OPS; i++) {
4766         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4767                                prof.table_op_count[i]);
4768     }
4769 }
4770 
4771 int64_t tcg_cpu_exec_time(void)
4772 {
4773     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4774     unsigned int i;
4775     int64_t ret = 0;
4776 
4777     for (i = 0; i < n_ctxs; i++) {
4778         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4779         const TCGProfile *prof = &s->prof;
4780 
4781         ret += qatomic_read(&prof->cpu_exec_time);
4782     }
4783     return ret;
4784 }
4785 #else
4786 void tcg_dump_op_count(GString *buf)
4787 {
4788     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4789 }
4790 
4791 int64_t tcg_cpu_exec_time(void)
4792 {
4793     error_report("%s: TCG profiler not compiled", __func__);
4794     exit(EXIT_FAILURE);
4795 }
4796 #endif
4797 
4798 
4799 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
4800 {
4801 #ifdef CONFIG_PROFILER
4802     TCGProfile *prof = &s->prof;
4803 #endif
4804     int i, num_insns;
4805     TCGOp *op;
4806 
4807 #ifdef CONFIG_PROFILER
4808     {
4809         int n = 0;
4810 
4811         QTAILQ_FOREACH(op, &s->ops, link) {
4812             n++;
4813         }
4814         qatomic_set(&prof->op_count, prof->op_count + n);
4815         if (n > prof->op_count_max) {
4816             qatomic_set(&prof->op_count_max, n);
4817         }
4818 
4819         n = s->nb_temps;
4820         qatomic_set(&prof->temp_count, prof->temp_count + n);
4821         if (n > prof->temp_count_max) {
4822             qatomic_set(&prof->temp_count_max, n);
4823         }
4824     }
4825 #endif
4826 
4827 #ifdef DEBUG_DISAS
4828     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4829                  && qemu_log_in_addr_range(pc_start))) {
4830         FILE *logfile = qemu_log_trylock();
4831         if (logfile) {
4832             fprintf(logfile, "OP:\n");
4833             tcg_dump_ops(s, logfile, false);
4834             fprintf(logfile, "\n");
4835             qemu_log_unlock(logfile);
4836         }
4837     }
4838 #endif
4839 
4840 #ifdef CONFIG_DEBUG_TCG
4841     /* Ensure all labels referenced have been emitted.  */
4842     {
4843         TCGLabel *l;
4844         bool error = false;
4845 
4846         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4847             if (unlikely(!l->present) && l->refs) {
4848                 qemu_log_mask(CPU_LOG_TB_OP,
4849                               "$L%d referenced but not present.\n", l->id);
4850                 error = true;
4851             }
4852         }
4853         assert(!error);
4854     }
4855 #endif
4856 
4857 #ifdef CONFIG_PROFILER
4858     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4859 #endif
4860 
4861 #ifdef USE_TCG_OPTIMIZATIONS
4862     tcg_optimize(s);
4863 #endif
4864 
4865 #ifdef CONFIG_PROFILER
4866     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4867     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4868 #endif
4869 
4870     reachable_code_pass(s);
4871     liveness_pass_1(s);
4872 
4873     if (s->nb_indirects > 0) {
4874 #ifdef DEBUG_DISAS
4875         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4876                      && qemu_log_in_addr_range(pc_start))) {
4877             FILE *logfile = qemu_log_trylock();
4878             if (logfile) {
4879                 fprintf(logfile, "OP before indirect lowering:\n");
4880                 tcg_dump_ops(s, logfile, false);
4881                 fprintf(logfile, "\n");
4882                 qemu_log_unlock(logfile);
4883             }
4884         }
4885 #endif
4886         /* Replace indirect temps with direct temps.  */
4887         if (liveness_pass_2(s)) {
4888             /* If changes were made, re-run liveness.  */
4889             liveness_pass_1(s);
4890         }
4891     }
4892 
4893 #ifdef CONFIG_PROFILER
4894     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4895 #endif
4896 
4897 #ifdef DEBUG_DISAS
4898     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4899                  && qemu_log_in_addr_range(pc_start))) {
4900         FILE *logfile = qemu_log_trylock();
4901         if (logfile) {
4902             fprintf(logfile, "OP after optimization and liveness analysis:\n");
4903             tcg_dump_ops(s, logfile, true);
4904             fprintf(logfile, "\n");
4905             qemu_log_unlock(logfile);
4906         }
4907     }
4908 #endif
4909 
4910     /* Initialize goto_tb jump offsets. */
4911     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
4912     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
4913     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
4914     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
4915 
4916     tcg_reg_alloc_start(s);
4917 
4918     /*
4919      * Reset the buffer pointers when restarting after overflow.
4920      * TODO: Move this into translate-all.c with the rest of the
4921      * buffer management.  Having only this done here is confusing.
4922      */
4923     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4924     s->code_ptr = s->code_buf;
4925 
4926 #ifdef TCG_TARGET_NEED_LDST_LABELS
4927     QSIMPLEQ_INIT(&s->ldst_labels);
4928 #endif
4929 #ifdef TCG_TARGET_NEED_POOL_LABELS
4930     s->pool_labels = NULL;
4931 #endif
4932 
4933     num_insns = -1;
4934     QTAILQ_FOREACH(op, &s->ops, link) {
4935         TCGOpcode opc = op->opc;
4936 
4937 #ifdef CONFIG_PROFILER
4938         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4939 #endif
4940 
4941         switch (opc) {
4942         case INDEX_op_mov_i32:
4943         case INDEX_op_mov_i64:
4944         case INDEX_op_mov_vec:
4945             tcg_reg_alloc_mov(s, op);
4946             break;
4947         case INDEX_op_dup_vec:
4948             tcg_reg_alloc_dup(s, op);
4949             break;
4950         case INDEX_op_insn_start:
4951             if (num_insns >= 0) {
4952                 size_t off = tcg_current_code_size(s);
4953                 s->gen_insn_end_off[num_insns] = off;
4954                 /* Assert that we do not overflow our stored offset.  */
4955                 assert(s->gen_insn_end_off[num_insns] == off);
4956             }
4957             num_insns++;
4958             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4959                 target_ulong a;
4960 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4961                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4962 #else
4963                 a = op->args[i];
4964 #endif
4965                 s->gen_insn_data[num_insns][i] = a;
4966             }
4967             break;
4968         case INDEX_op_discard:
4969             temp_dead(s, arg_temp(op->args[0]));
4970             break;
4971         case INDEX_op_set_label:
4972             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4973             tcg_out_label(s, arg_label(op->args[0]));
4974             break;
4975         case INDEX_op_call:
4976             tcg_reg_alloc_call(s, op);
4977             break;
4978         case INDEX_op_exit_tb:
4979             tcg_out_exit_tb(s, op->args[0]);
4980             break;
4981         case INDEX_op_goto_tb:
4982             tcg_out_goto_tb(s, op->args[0]);
4983             break;
4984         case INDEX_op_dup2_vec:
4985             if (tcg_reg_alloc_dup2(s, op)) {
4986                 break;
4987             }
4988             /* fall through */
4989         default:
4990             /* Sanity check that we've not introduced any unhandled opcodes. */
4991             tcg_debug_assert(tcg_op_supported(opc));
4992             /* Note: in order to speed up the code, it would be much
4993                faster to have specialized register allocator functions for
4994                some common argument patterns */
4995             tcg_reg_alloc_op(s, op);
4996             break;
4997         }
4998         /* Test for (pending) buffer overflow.  The assumption is that any
4999            one operation beginning below the high water mark cannot overrun
5000            the buffer completely.  Thus we can test for overflow after
5001            generating code without having to check during generation.  */
5002         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
5003             return -1;
5004         }
5005         /* Test for TB overflow, as seen by gen_insn_end_off.  */
5006         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
5007             return -2;
5008         }
5009     }
5010     tcg_debug_assert(num_insns >= 0);
5011     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
5012 
5013     /* Generate TB finalization at the end of block */
5014 #ifdef TCG_TARGET_NEED_LDST_LABELS
5015     i = tcg_out_ldst_finalize(s);
5016     if (i < 0) {
5017         return i;
5018     }
5019 #endif
5020 #ifdef TCG_TARGET_NEED_POOL_LABELS
5021     i = tcg_out_pool_finalize(s);
5022     if (i < 0) {
5023         return i;
5024     }
5025 #endif
5026     if (!tcg_resolve_relocs(s)) {
5027         return -2;
5028     }
5029 
5030 #ifndef CONFIG_TCG_INTERPRETER
5031     /* flush instruction cache */
5032     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
5033                         (uintptr_t)s->code_buf,
5034                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
5035 #endif
5036 
5037     return tcg_current_code_size(s);
5038 }
5039 
5040 #ifdef CONFIG_PROFILER
5041 void tcg_dump_info(GString *buf)
5042 {
5043     TCGProfile prof = {};
5044     const TCGProfile *s;
5045     int64_t tb_count;
5046     int64_t tb_div_count;
5047     int64_t tot;
5048 
5049     tcg_profile_snapshot_counters(&prof);
5050     s = &prof;
5051     tb_count = s->tb_count;
5052     tb_div_count = tb_count ? tb_count : 1;
5053     tot = s->interm_time + s->code_time;
5054 
5055     g_string_append_printf(buf, "JIT cycles          %" PRId64
5056                            " (%0.3f s at 2.4 GHz)\n",
5057                            tot, tot / 2.4e9);
5058     g_string_append_printf(buf, "translated TBs      %" PRId64
5059                            " (aborted=%" PRId64 " %0.1f%%)\n",
5060                            tb_count, s->tb_count1 - tb_count,
5061                            (double)(s->tb_count1 - s->tb_count)
5062                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
5063     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
5064                            (double)s->op_count / tb_div_count, s->op_count_max);
5065     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
5066                            (double)s->del_op_count / tb_div_count);
5067     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
5068                            (double)s->temp_count / tb_div_count,
5069                            s->temp_count_max);
5070     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
5071                            (double)s->code_out_len / tb_div_count);
5072     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
5073                            (double)s->search_out_len / tb_div_count);
5074 
5075     g_string_append_printf(buf, "cycles/op           %0.1f\n",
5076                            s->op_count ? (double)tot / s->op_count : 0);
5077     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
5078                            s->code_in_len ? (double)tot / s->code_in_len : 0);
5079     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
5080                            s->code_out_len ? (double)tot / s->code_out_len : 0);
5081     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
5082                            s->search_out_len ?
5083                            (double)tot / s->search_out_len : 0);
5084     if (tot == 0) {
5085         tot = 1;
5086     }
5087     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
5088                            (double)s->interm_time / tot * 100.0);
5089     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
5090                            (double)s->code_time / tot * 100.0);
5091     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
5092                            (double)s->opt_time / (s->code_time ?
5093                                                   s->code_time : 1)
5094                            * 100.0);
5095     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
5096                            (double)s->la_time / (s->code_time ?
5097                                                  s->code_time : 1) * 100.0);
5098     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
5099                            s->restore_count);
5100     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
5101                            s->restore_count ?
5102                            (double)s->restore_time / s->restore_count : 0);
5103 }
5104 #else
5105 void tcg_dump_info(GString *buf)
5106 {
5107     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5108 }
5109 #endif
5110 
5111 #ifdef ELF_HOST_MACHINE
5112 /* In order to use this feature, the backend needs to do three things:
5113 
5114    (1) Define ELF_HOST_MACHINE to indicate both what value to
5115        put into the ELF image and to indicate support for the feature.
5116 
5117    (2) Define tcg_register_jit.  This should create a buffer containing
5118        the contents of a .debug_frame section that describes the post-
5119        prologue unwind info for the tcg machine.
5120 
5121    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
5122 */
5123 
5124 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
5125 typedef enum {
5126     JIT_NOACTION = 0,
5127     JIT_REGISTER_FN,
5128     JIT_UNREGISTER_FN
5129 } jit_actions_t;
5130 
5131 struct jit_code_entry {
5132     struct jit_code_entry *next_entry;
5133     struct jit_code_entry *prev_entry;
5134     const void *symfile_addr;
5135     uint64_t symfile_size;
5136 };
5137 
5138 struct jit_descriptor {
5139     uint32_t version;
5140     uint32_t action_flag;
5141     struct jit_code_entry *relevant_entry;
5142     struct jit_code_entry *first_entry;
5143 };
5144 
5145 void __jit_debug_register_code(void) __attribute__((noinline));
5146 void __jit_debug_register_code(void)
5147 {
5148     asm("");
5149 }
5150 
5151 /* Must statically initialize the version, because GDB may check
5152    the version before we can set it.  */
5153 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
5154 
5155 /* End GDB interface.  */
5156 
5157 static int find_string(const char *strtab, const char *str)
5158 {
5159     const char *p = strtab + 1;
5160 
5161     while (1) {
5162         if (strcmp(p, str) == 0) {
5163             return p - strtab;
5164         }
5165         p += strlen(p) + 1;
5166     }
5167 }
5168 
5169 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
5170                                  const void *debug_frame,
5171                                  size_t debug_frame_size)
5172 {
5173     struct __attribute__((packed)) DebugInfo {
5174         uint32_t  len;
5175         uint16_t  version;
5176         uint32_t  abbrev;
5177         uint8_t   ptr_size;
5178         uint8_t   cu_die;
5179         uint16_t  cu_lang;
5180         uintptr_t cu_low_pc;
5181         uintptr_t cu_high_pc;
5182         uint8_t   fn_die;
5183         char      fn_name[16];
5184         uintptr_t fn_low_pc;
5185         uintptr_t fn_high_pc;
5186         uint8_t   cu_eoc;
5187     };
5188 
5189     struct ElfImage {
5190         ElfW(Ehdr) ehdr;
5191         ElfW(Phdr) phdr;
5192         ElfW(Shdr) shdr[7];
5193         ElfW(Sym)  sym[2];
5194         struct DebugInfo di;
5195         uint8_t    da[24];
5196         char       str[80];
5197     };
5198 
5199     struct ElfImage *img;
5200 
5201     static const struct ElfImage img_template = {
5202         .ehdr = {
5203             .e_ident[EI_MAG0] = ELFMAG0,
5204             .e_ident[EI_MAG1] = ELFMAG1,
5205             .e_ident[EI_MAG2] = ELFMAG2,
5206             .e_ident[EI_MAG3] = ELFMAG3,
5207             .e_ident[EI_CLASS] = ELF_CLASS,
5208             .e_ident[EI_DATA] = ELF_DATA,
5209             .e_ident[EI_VERSION] = EV_CURRENT,
5210             .e_type = ET_EXEC,
5211             .e_machine = ELF_HOST_MACHINE,
5212             .e_version = EV_CURRENT,
5213             .e_phoff = offsetof(struct ElfImage, phdr),
5214             .e_shoff = offsetof(struct ElfImage, shdr),
5215             .e_ehsize = sizeof(ElfW(Shdr)),
5216             .e_phentsize = sizeof(ElfW(Phdr)),
5217             .e_phnum = 1,
5218             .e_shentsize = sizeof(ElfW(Shdr)),
5219             .e_shnum = ARRAY_SIZE(img->shdr),
5220             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
5221 #ifdef ELF_HOST_FLAGS
5222             .e_flags = ELF_HOST_FLAGS,
5223 #endif
5224 #ifdef ELF_OSABI
5225             .e_ident[EI_OSABI] = ELF_OSABI,
5226 #endif
5227         },
5228         .phdr = {
5229             .p_type = PT_LOAD,
5230             .p_flags = PF_X,
5231         },
5232         .shdr = {
5233             [0] = { .sh_type = SHT_NULL },
5234             /* Trick: The contents of code_gen_buffer are not present in
5235                this fake ELF file; that got allocated elsewhere.  Therefore
5236                we mark .text as SHT_NOBITS (similar to .bss) so that readers
5237                will not look for contents.  We can record any address.  */
5238             [1] = { /* .text */
5239                 .sh_type = SHT_NOBITS,
5240                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
5241             },
5242             [2] = { /* .debug_info */
5243                 .sh_type = SHT_PROGBITS,
5244                 .sh_offset = offsetof(struct ElfImage, di),
5245                 .sh_size = sizeof(struct DebugInfo),
5246             },
5247             [3] = { /* .debug_abbrev */
5248                 .sh_type = SHT_PROGBITS,
5249                 .sh_offset = offsetof(struct ElfImage, da),
5250                 .sh_size = sizeof(img->da),
5251             },
5252             [4] = { /* .debug_frame */
5253                 .sh_type = SHT_PROGBITS,
5254                 .sh_offset = sizeof(struct ElfImage),
5255             },
5256             [5] = { /* .symtab */
5257                 .sh_type = SHT_SYMTAB,
5258                 .sh_offset = offsetof(struct ElfImage, sym),
5259                 .sh_size = sizeof(img->sym),
5260                 .sh_info = 1,
5261                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
5262                 .sh_entsize = sizeof(ElfW(Sym)),
5263             },
5264             [6] = { /* .strtab */
5265                 .sh_type = SHT_STRTAB,
5266                 .sh_offset = offsetof(struct ElfImage, str),
5267                 .sh_size = sizeof(img->str),
5268             }
5269         },
5270         .sym = {
5271             [1] = { /* code_gen_buffer */
5272                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5273                 .st_shndx = 1,
5274             }
5275         },
5276         .di = {
5277             .len = sizeof(struct DebugInfo) - 4,
5278             .version = 2,
5279             .ptr_size = sizeof(void *),
5280             .cu_die = 1,
5281             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5282             .fn_die = 2,
5283             .fn_name = "code_gen_buffer"
5284         },
5285         .da = {
5286             1,          /* abbrev number (the cu) */
5287             0x11, 1,    /* DW_TAG_compile_unit, has children */
5288             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5289             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5290             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5291             0, 0,       /* end of abbrev */
5292             2,          /* abbrev number (the fn) */
5293             0x2e, 0,    /* DW_TAG_subprogram, no children */
5294             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5295             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5296             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5297             0, 0,       /* end of abbrev */
5298             0           /* no more abbrev */
5299         },
5300         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5301                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5302     };
5303 
5304     /* We only need a single jit entry; statically allocate it.  */
5305     static struct jit_code_entry one_entry;
5306 
5307     uintptr_t buf = (uintptr_t)buf_ptr;
5308     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5309     DebugFrameHeader *dfh;
5310 
5311     img = g_malloc(img_size);
5312     *img = img_template;
5313 
5314     img->phdr.p_vaddr = buf;
5315     img->phdr.p_paddr = buf;
5316     img->phdr.p_memsz = buf_size;
5317 
5318     img->shdr[1].sh_name = find_string(img->str, ".text");
5319     img->shdr[1].sh_addr = buf;
5320     img->shdr[1].sh_size = buf_size;
5321 
5322     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5323     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5324 
5325     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5326     img->shdr[4].sh_size = debug_frame_size;
5327 
5328     img->shdr[5].sh_name = find_string(img->str, ".symtab");
5329     img->shdr[6].sh_name = find_string(img->str, ".strtab");
5330 
5331     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5332     img->sym[1].st_value = buf;
5333     img->sym[1].st_size = buf_size;
5334 
5335     img->di.cu_low_pc = buf;
5336     img->di.cu_high_pc = buf + buf_size;
5337     img->di.fn_low_pc = buf;
5338     img->di.fn_high_pc = buf + buf_size;
5339 
5340     dfh = (DebugFrameHeader *)(img + 1);
5341     memcpy(dfh, debug_frame, debug_frame_size);
5342     dfh->fde.func_start = buf;
5343     dfh->fde.func_len = buf_size;
5344 
5345 #ifdef DEBUG_JIT
5346     /* Enable this block to be able to debug the ELF image file creation.
5347        One can use readelf, objdump, or other inspection utilities.  */
5348     {
5349         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
5350         FILE *f = fopen(jit, "w+b");
5351         if (f) {
5352             if (fwrite(img, img_size, 1, f) != img_size) {
5353                 /* Avoid stupid unused return value warning for fwrite.  */
5354             }
5355             fclose(f);
5356         }
5357     }
5358 #endif
5359 
5360     one_entry.symfile_addr = img;
5361     one_entry.symfile_size = img_size;
5362 
5363     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5364     __jit_debug_descriptor.relevant_entry = &one_entry;
5365     __jit_debug_descriptor.first_entry = &one_entry;
5366     __jit_debug_register_code();
5367 }
5368 #else
5369 /* No support for the feature.  Provide the entry point expected by exec.c,
5370    and implement the internal function we declared earlier.  */
5371 
5372 static void tcg_register_jit_int(const void *buf, size_t size,
5373                                  const void *debug_frame,
5374                                  size_t debug_frame_size)
5375 {
5376 }
5377 
5378 void tcg_register_jit(const void *buf, size_t buf_size)
5379 {
5380 }
5381 #endif /* ELF_HOST_MACHINE */
5382 
5383 #if !TCG_TARGET_MAYBE_vec
5384 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5385 {
5386     g_assert_not_reached();
5387 }
5388 #endif
5389