xref: /openbmc/qemu/tcg/tcg.c (revision ac12b601)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41    CPU definitions. Currently they are used for qemu_ld/st
42    instructions */
43 #define NO_CPU_IO_DEFS
44 
45 #include "exec/exec-all.h"
46 
47 #if !defined(CONFIG_USER_ONLY)
48 #include "hw/boards.h"
49 #endif
50 
51 #include "tcg/tcg-op.h"
52 
53 #if UINTPTR_MAX == UINT32_MAX
54 # define ELF_CLASS  ELFCLASS32
55 #else
56 # define ELF_CLASS  ELFCLASS64
57 #endif
58 #ifdef HOST_WORDS_BIGENDIAN
59 # define ELF_DATA   ELFDATA2MSB
60 #else
61 # define ELF_DATA   ELFDATA2LSB
62 #endif
63 
64 #include "elf.h"
65 #include "exec/log.h"
66 
67 /* Forward declarations for functions declared in tcg-target.c.inc and
68    used here. */
69 static void tcg_target_init(TCGContext *s);
70 static void tcg_target_qemu_prologue(TCGContext *s);
71 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
72                         intptr_t value, intptr_t addend);
73 
74 /* The CIE and FDE header definitions will be common to all hosts.  */
75 typedef struct {
76     uint32_t len __attribute__((aligned((sizeof(void *)))));
77     uint32_t id;
78     uint8_t version;
79     char augmentation[1];
80     uint8_t code_align;
81     uint8_t data_align;
82     uint8_t return_column;
83 } DebugFrameCIE;
84 
85 typedef struct QEMU_PACKED {
86     uint32_t len __attribute__((aligned((sizeof(void *)))));
87     uint32_t cie_offset;
88     uintptr_t func_start;
89     uintptr_t func_len;
90 } DebugFrameFDEHeader;
91 
92 typedef struct QEMU_PACKED {
93     DebugFrameCIE cie;
94     DebugFrameFDEHeader fde;
95 } DebugFrameHeader;
96 
97 static void tcg_register_jit_int(const void *buf, size_t size,
98                                  const void *debug_frame,
99                                  size_t debug_frame_size)
100     __attribute__((unused));
101 
102 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
103 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
104                        intptr_t arg2);
105 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
106 static void tcg_out_movi(TCGContext *s, TCGType type,
107                          TCGReg ret, tcg_target_long arg);
108 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
109                        const TCGArg args[TCG_MAX_OP_ARGS],
110                        const int const_args[TCG_MAX_OP_ARGS]);
111 #if TCG_TARGET_MAYBE_vec
112 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
113                             TCGReg dst, TCGReg src);
114 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
115                              TCGReg dst, TCGReg base, intptr_t offset);
116 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
117                              TCGReg dst, int64_t arg);
118 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
119                            unsigned vecl, unsigned vece,
120                            const TCGArg args[TCG_MAX_OP_ARGS],
121                            const int const_args[TCG_MAX_OP_ARGS]);
122 #else
123 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
124                                    TCGReg dst, TCGReg src)
125 {
126     g_assert_not_reached();
127 }
128 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
129                                     TCGReg dst, TCGReg base, intptr_t offset)
130 {
131     g_assert_not_reached();
132 }
133 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
134                                     TCGReg dst, int64_t arg)
135 {
136     g_assert_not_reached();
137 }
138 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
139                                   unsigned vecl, unsigned vece,
140                                   const TCGArg args[TCG_MAX_OP_ARGS],
141                                   const int const_args[TCG_MAX_OP_ARGS])
142 {
143     g_assert_not_reached();
144 }
145 #endif
146 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
147                        intptr_t arg2);
148 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
149                         TCGReg base, intptr_t ofs);
150 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
151 static int tcg_target_const_match(tcg_target_long val, TCGType type,
152                                   const TCGArgConstraint *arg_ct);
153 #ifdef TCG_TARGET_NEED_LDST_LABELS
154 static int tcg_out_ldst_finalize(TCGContext *s);
155 #endif
156 
157 #define TCG_HIGHWATER 1024
158 
159 static TCGContext **tcg_ctxs;
160 static unsigned int n_tcg_ctxs;
161 TCGv_env cpu_env = 0;
162 const void *tcg_code_gen_epilogue;
163 uintptr_t tcg_splitwx_diff;
164 
165 #ifndef CONFIG_TCG_INTERPRETER
166 tcg_prologue_fn *tcg_qemu_tb_exec;
167 #endif
168 
169 struct tcg_region_tree {
170     QemuMutex lock;
171     GTree *tree;
172     /* padding to avoid false sharing is computed at run-time */
173 };
174 
175 /*
176  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
177  * dynamically allocate from as demand dictates. Given appropriate region
178  * sizing, this minimizes flushes even when some TCG threads generate a lot
179  * more code than others.
180  */
181 struct tcg_region_state {
182     QemuMutex lock;
183 
184     /* fields set at init time */
185     void *start;
186     void *start_aligned;
187     void *end;
188     size_t n;
189     size_t size; /* size of one region */
190     size_t stride; /* .size + guard size */
191 
192     /* fields protected by the lock */
193     size_t current; /* current region index */
194     size_t agg_size_full; /* aggregate size of full regions */
195 };
196 
197 static struct tcg_region_state region;
198 /*
199  * This is an array of struct tcg_region_tree's, with padding.
200  * We use void * to simplify the computation of region_trees[i]; each
201  * struct is found every tree_size bytes.
202  */
203 static void *region_trees;
204 static size_t tree_size;
205 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
206 static TCGRegSet tcg_target_call_clobber_regs;
207 
208 #if TCG_TARGET_INSN_UNIT_SIZE == 1
209 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
210 {
211     *s->code_ptr++ = v;
212 }
213 
214 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
215                                                       uint8_t v)
216 {
217     *p = v;
218 }
219 #endif
220 
221 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
222 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
223 {
224     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
225         *s->code_ptr++ = v;
226     } else {
227         tcg_insn_unit *p = s->code_ptr;
228         memcpy(p, &v, sizeof(v));
229         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
230     }
231 }
232 
233 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
234                                                        uint16_t v)
235 {
236     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
237         *p = v;
238     } else {
239         memcpy(p, &v, sizeof(v));
240     }
241 }
242 #endif
243 
244 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
245 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
246 {
247     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
248         *s->code_ptr++ = v;
249     } else {
250         tcg_insn_unit *p = s->code_ptr;
251         memcpy(p, &v, sizeof(v));
252         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
253     }
254 }
255 
256 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
257                                                        uint32_t v)
258 {
259     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
260         *p = v;
261     } else {
262         memcpy(p, &v, sizeof(v));
263     }
264 }
265 #endif
266 
267 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
268 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
269 {
270     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
271         *s->code_ptr++ = v;
272     } else {
273         tcg_insn_unit *p = s->code_ptr;
274         memcpy(p, &v, sizeof(v));
275         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
276     }
277 }
278 
279 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
280                                                        uint64_t v)
281 {
282     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
283         *p = v;
284     } else {
285         memcpy(p, &v, sizeof(v));
286     }
287 }
288 #endif
289 
290 /* label relocation processing */
291 
292 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
293                           TCGLabel *l, intptr_t addend)
294 {
295     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
296 
297     r->type = type;
298     r->ptr = code_ptr;
299     r->addend = addend;
300     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
301 }
302 
303 static void tcg_out_label(TCGContext *s, TCGLabel *l)
304 {
305     tcg_debug_assert(!l->has_value);
306     l->has_value = 1;
307     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
308 }
309 
310 TCGLabel *gen_new_label(void)
311 {
312     TCGContext *s = tcg_ctx;
313     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
314 
315     memset(l, 0, sizeof(TCGLabel));
316     l->id = s->nb_labels++;
317     QSIMPLEQ_INIT(&l->relocs);
318 
319     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
320 
321     return l;
322 }
323 
324 static bool tcg_resolve_relocs(TCGContext *s)
325 {
326     TCGLabel *l;
327 
328     QSIMPLEQ_FOREACH(l, &s->labels, next) {
329         TCGRelocation *r;
330         uintptr_t value = l->u.value;
331 
332         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
333             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
334                 return false;
335             }
336         }
337     }
338     return true;
339 }
340 
341 static void set_jmp_reset_offset(TCGContext *s, int which)
342 {
343     /*
344      * We will check for overflow at the end of the opcode loop in
345      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
346      */
347     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
348 }
349 
350 /* Signal overflow, starting over with fewer guest insns. */
351 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
352 {
353     siglongjmp(s->jmp_trans, -2);
354 }
355 
356 #define C_PFX1(P, A)                    P##A
357 #define C_PFX2(P, A, B)                 P##A##_##B
358 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
359 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
360 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
361 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
362 
363 /* Define an enumeration for the various combinations. */
364 
365 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
366 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
367 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
368 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
369 
370 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
371 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
372 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
373 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
374 
375 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
376 
377 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
378 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
379 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
380 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
381 
382 typedef enum {
383 #include "tcg-target-con-set.h"
384 } TCGConstraintSetIndex;
385 
386 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
387 
388 #undef C_O0_I1
389 #undef C_O0_I2
390 #undef C_O0_I3
391 #undef C_O0_I4
392 #undef C_O1_I1
393 #undef C_O1_I2
394 #undef C_O1_I3
395 #undef C_O1_I4
396 #undef C_N1_I2
397 #undef C_O2_I1
398 #undef C_O2_I2
399 #undef C_O2_I3
400 #undef C_O2_I4
401 
402 /* Put all of the constraint sets into an array, indexed by the enum. */
403 
404 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
405 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
406 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
407 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
408 
409 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
410 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
411 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
412 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
413 
414 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
415 
416 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
417 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
418 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
419 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
420 
421 static const TCGTargetOpDef constraint_sets[] = {
422 #include "tcg-target-con-set.h"
423 };
424 
425 
426 #undef C_O0_I1
427 #undef C_O0_I2
428 #undef C_O0_I3
429 #undef C_O0_I4
430 #undef C_O1_I1
431 #undef C_O1_I2
432 #undef C_O1_I3
433 #undef C_O1_I4
434 #undef C_N1_I2
435 #undef C_O2_I1
436 #undef C_O2_I2
437 #undef C_O2_I3
438 #undef C_O2_I4
439 
440 /* Expand the enumerator to be returned from tcg_target_op_def(). */
441 
442 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
443 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
444 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
445 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
446 
447 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
448 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
449 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
450 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
451 
452 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
453 
454 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
455 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
456 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
457 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
458 
459 #include "tcg-target.c.inc"
460 
461 /* compare a pointer @ptr and a tb_tc @s */
462 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
463 {
464     if (ptr >= s->ptr + s->size) {
465         return 1;
466     } else if (ptr < s->ptr) {
467         return -1;
468     }
469     return 0;
470 }
471 
472 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
473 {
474     const struct tb_tc *a = ap;
475     const struct tb_tc *b = bp;
476 
477     /*
478      * When both sizes are set, we know this isn't a lookup.
479      * This is the most likely case: every TB must be inserted; lookups
480      * are a lot less frequent.
481      */
482     if (likely(a->size && b->size)) {
483         if (a->ptr > b->ptr) {
484             return 1;
485         } else if (a->ptr < b->ptr) {
486             return -1;
487         }
488         /* a->ptr == b->ptr should happen only on deletions */
489         g_assert(a->size == b->size);
490         return 0;
491     }
492     /*
493      * All lookups have either .size field set to 0.
494      * From the glib sources we see that @ap is always the lookup key. However
495      * the docs provide no guarantee, so we just mark this case as likely.
496      */
497     if (likely(a->size == 0)) {
498         return ptr_cmp_tb_tc(a->ptr, b);
499     }
500     return ptr_cmp_tb_tc(b->ptr, a);
501 }
502 
503 static void tcg_region_trees_init(void)
504 {
505     size_t i;
506 
507     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
508     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
509     for (i = 0; i < region.n; i++) {
510         struct tcg_region_tree *rt = region_trees + i * tree_size;
511 
512         qemu_mutex_init(&rt->lock);
513         rt->tree = g_tree_new(tb_tc_cmp);
514     }
515 }
516 
517 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
518 {
519     size_t region_idx;
520 
521     /*
522      * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
523      * a signal handler over which the caller has no control.
524      */
525     if (!in_code_gen_buffer(p)) {
526         p -= tcg_splitwx_diff;
527         if (!in_code_gen_buffer(p)) {
528             return NULL;
529         }
530     }
531 
532     if (p < region.start_aligned) {
533         region_idx = 0;
534     } else {
535         ptrdiff_t offset = p - region.start_aligned;
536 
537         if (offset > region.stride * (region.n - 1)) {
538             region_idx = region.n - 1;
539         } else {
540             region_idx = offset / region.stride;
541         }
542     }
543     return region_trees + region_idx * tree_size;
544 }
545 
546 void tcg_tb_insert(TranslationBlock *tb)
547 {
548     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
549 
550     g_assert(rt != NULL);
551     qemu_mutex_lock(&rt->lock);
552     g_tree_insert(rt->tree, &tb->tc, tb);
553     qemu_mutex_unlock(&rt->lock);
554 }
555 
556 void tcg_tb_remove(TranslationBlock *tb)
557 {
558     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
559 
560     g_assert(rt != NULL);
561     qemu_mutex_lock(&rt->lock);
562     g_tree_remove(rt->tree, &tb->tc);
563     qemu_mutex_unlock(&rt->lock);
564 }
565 
566 /*
567  * Find the TB 'tb' such that
568  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
569  * Return NULL if not found.
570  */
571 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
572 {
573     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
574     TranslationBlock *tb;
575     struct tb_tc s = { .ptr = (void *)tc_ptr };
576 
577     if (rt == NULL) {
578         return NULL;
579     }
580 
581     qemu_mutex_lock(&rt->lock);
582     tb = g_tree_lookup(rt->tree, &s);
583     qemu_mutex_unlock(&rt->lock);
584     return tb;
585 }
586 
587 static void tcg_region_tree_lock_all(void)
588 {
589     size_t i;
590 
591     for (i = 0; i < region.n; i++) {
592         struct tcg_region_tree *rt = region_trees + i * tree_size;
593 
594         qemu_mutex_lock(&rt->lock);
595     }
596 }
597 
598 static void tcg_region_tree_unlock_all(void)
599 {
600     size_t i;
601 
602     for (i = 0; i < region.n; i++) {
603         struct tcg_region_tree *rt = region_trees + i * tree_size;
604 
605         qemu_mutex_unlock(&rt->lock);
606     }
607 }
608 
609 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
610 {
611     size_t i;
612 
613     tcg_region_tree_lock_all();
614     for (i = 0; i < region.n; i++) {
615         struct tcg_region_tree *rt = region_trees + i * tree_size;
616 
617         g_tree_foreach(rt->tree, func, user_data);
618     }
619     tcg_region_tree_unlock_all();
620 }
621 
622 size_t tcg_nb_tbs(void)
623 {
624     size_t nb_tbs = 0;
625     size_t i;
626 
627     tcg_region_tree_lock_all();
628     for (i = 0; i < region.n; i++) {
629         struct tcg_region_tree *rt = region_trees + i * tree_size;
630 
631         nb_tbs += g_tree_nnodes(rt->tree);
632     }
633     tcg_region_tree_unlock_all();
634     return nb_tbs;
635 }
636 
637 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
638 {
639     TranslationBlock *tb = v;
640 
641     tb_destroy(tb);
642     return FALSE;
643 }
644 
645 static void tcg_region_tree_reset_all(void)
646 {
647     size_t i;
648 
649     tcg_region_tree_lock_all();
650     for (i = 0; i < region.n; i++) {
651         struct tcg_region_tree *rt = region_trees + i * tree_size;
652 
653         g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
654         /* Increment the refcount first so that destroy acts as a reset */
655         g_tree_ref(rt->tree);
656         g_tree_destroy(rt->tree);
657     }
658     tcg_region_tree_unlock_all();
659 }
660 
661 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
662 {
663     void *start, *end;
664 
665     start = region.start_aligned + curr_region * region.stride;
666     end = start + region.size;
667 
668     if (curr_region == 0) {
669         start = region.start;
670     }
671     if (curr_region == region.n - 1) {
672         end = region.end;
673     }
674 
675     *pstart = start;
676     *pend = end;
677 }
678 
679 static void tcg_region_assign(TCGContext *s, size_t curr_region)
680 {
681     void *start, *end;
682 
683     tcg_region_bounds(curr_region, &start, &end);
684 
685     s->code_gen_buffer = start;
686     s->code_gen_ptr = start;
687     s->code_gen_buffer_size = end - start;
688     s->code_gen_highwater = end - TCG_HIGHWATER;
689 }
690 
691 static bool tcg_region_alloc__locked(TCGContext *s)
692 {
693     if (region.current == region.n) {
694         return true;
695     }
696     tcg_region_assign(s, region.current);
697     region.current++;
698     return false;
699 }
700 
701 /*
702  * Request a new region once the one in use has filled up.
703  * Returns true on error.
704  */
705 static bool tcg_region_alloc(TCGContext *s)
706 {
707     bool err;
708     /* read the region size now; alloc__locked will overwrite it on success */
709     size_t size_full = s->code_gen_buffer_size;
710 
711     qemu_mutex_lock(&region.lock);
712     err = tcg_region_alloc__locked(s);
713     if (!err) {
714         region.agg_size_full += size_full - TCG_HIGHWATER;
715     }
716     qemu_mutex_unlock(&region.lock);
717     return err;
718 }
719 
720 /*
721  * Perform a context's first region allocation.
722  * This function does _not_ increment region.agg_size_full.
723  */
724 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
725 {
726     return tcg_region_alloc__locked(s);
727 }
728 
729 /* Call from a safe-work context */
730 void tcg_region_reset_all(void)
731 {
732     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
733     unsigned int i;
734 
735     qemu_mutex_lock(&region.lock);
736     region.current = 0;
737     region.agg_size_full = 0;
738 
739     for (i = 0; i < n_ctxs; i++) {
740         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
741         bool err = tcg_region_initial_alloc__locked(s);
742 
743         g_assert(!err);
744     }
745     qemu_mutex_unlock(&region.lock);
746 
747     tcg_region_tree_reset_all();
748 }
749 
750 #ifdef CONFIG_USER_ONLY
751 static size_t tcg_n_regions(void)
752 {
753     return 1;
754 }
755 #else
756 /*
757  * It is likely that some vCPUs will translate more code than others, so we
758  * first try to set more regions than max_cpus, with those regions being of
759  * reasonable size. If that's not possible we make do by evenly dividing
760  * the code_gen_buffer among the vCPUs.
761  */
762 static size_t tcg_n_regions(void)
763 {
764     size_t i;
765 
766     /* Use a single region if all we have is one vCPU thread */
767 #if !defined(CONFIG_USER_ONLY)
768     MachineState *ms = MACHINE(qdev_get_machine());
769     unsigned int max_cpus = ms->smp.max_cpus;
770 #endif
771     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
772         return 1;
773     }
774 
775     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
776     for (i = 8; i > 0; i--) {
777         size_t regions_per_thread = i;
778         size_t region_size;
779 
780         region_size = tcg_init_ctx.code_gen_buffer_size;
781         region_size /= max_cpus * regions_per_thread;
782 
783         if (region_size >= 2 * 1024u * 1024) {
784             return max_cpus * regions_per_thread;
785         }
786     }
787     /* If we can't, then just allocate one region per vCPU thread */
788     return max_cpus;
789 }
790 #endif
791 
792 /*
793  * Initializes region partitioning.
794  *
795  * Called at init time from the parent thread (i.e. the one calling
796  * tcg_context_init), after the target's TCG globals have been set.
797  *
798  * Region partitioning works by splitting code_gen_buffer into separate regions,
799  * and then assigning regions to TCG threads so that the threads can translate
800  * code in parallel without synchronization.
801  *
802  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
803  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
804  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
805  * must have been parsed before calling this function, since it calls
806  * qemu_tcg_mttcg_enabled().
807  *
808  * In user-mode we use a single region.  Having multiple regions in user-mode
809  * is not supported, because the number of vCPU threads (recall that each thread
810  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
811  * OS, and usually this number is huge (tens of thousands is not uncommon).
812  * Thus, given this large bound on the number of vCPU threads and the fact
813  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
814  * that the availability of at least one region per vCPU thread.
815  *
816  * However, this user-mode limitation is unlikely to be a significant problem
817  * in practice. Multi-threaded guests share most if not all of their translated
818  * code, which makes parallel code generation less appealing than in softmmu.
819  */
820 void tcg_region_init(void)
821 {
822     void *buf = tcg_init_ctx.code_gen_buffer;
823     void *aligned;
824     size_t size = tcg_init_ctx.code_gen_buffer_size;
825     size_t page_size = qemu_real_host_page_size;
826     size_t region_size;
827     size_t n_regions;
828     size_t i;
829 
830     n_regions = tcg_n_regions();
831 
832     /* The first region will be 'aligned - buf' bytes larger than the others */
833     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
834     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
835     /*
836      * Make region_size a multiple of page_size, using aligned as the start.
837      * As a result of this we might end up with a few extra pages at the end of
838      * the buffer; we will assign those to the last region.
839      */
840     region_size = (size - (aligned - buf)) / n_regions;
841     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
842 
843     /* A region must have at least 2 pages; one code, one guard */
844     g_assert(region_size >= 2 * page_size);
845 
846     /* init the region struct */
847     qemu_mutex_init(&region.lock);
848     region.n = n_regions;
849     region.size = region_size - page_size;
850     region.stride = region_size;
851     region.start = buf;
852     region.start_aligned = aligned;
853     /* page-align the end, since its last page will be a guard page */
854     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
855     /* account for that last guard page */
856     region.end -= page_size;
857 
858     /*
859      * Set guard pages in the rw buffer, as that's the one into which
860      * buffer overruns could occur.  Do not set guard pages in the rx
861      * buffer -- let that one use hugepages throughout.
862      */
863     for (i = 0; i < region.n; i++) {
864         void *start, *end;
865 
866         tcg_region_bounds(i, &start, &end);
867 
868         /*
869          * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
870          * rejects a permission change from RWX -> NONE.  Guard pages are
871          * nice for bug detection but are not essential; ignore any failure.
872          */
873         (void)qemu_mprotect_none(end, page_size);
874     }
875 
876     tcg_region_trees_init();
877 
878     /* In user-mode we support only one ctx, so do the initial allocation now */
879 #ifdef CONFIG_USER_ONLY
880     {
881         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
882 
883         g_assert(!err);
884     }
885 #endif
886 }
887 
888 #ifdef CONFIG_DEBUG_TCG
889 const void *tcg_splitwx_to_rx(void *rw)
890 {
891     /* Pass NULL pointers unchanged. */
892     if (rw) {
893         g_assert(in_code_gen_buffer(rw));
894         rw += tcg_splitwx_diff;
895     }
896     return rw;
897 }
898 
899 void *tcg_splitwx_to_rw(const void *rx)
900 {
901     /* Pass NULL pointers unchanged. */
902     if (rx) {
903         rx -= tcg_splitwx_diff;
904         /* Assert that we end with a pointer in the rw region. */
905         g_assert(in_code_gen_buffer(rx));
906     }
907     return (void *)rx;
908 }
909 #endif /* CONFIG_DEBUG_TCG */
910 
911 static void alloc_tcg_plugin_context(TCGContext *s)
912 {
913 #ifdef CONFIG_PLUGIN
914     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
915     s->plugin_tb->insns =
916         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
917 #endif
918 }
919 
920 /*
921  * All TCG threads except the parent (i.e. the one that called tcg_context_init
922  * and registered the target's TCG globals) must register with this function
923  * before initiating translation.
924  *
925  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
926  * of tcg_region_init() for the reasoning behind this.
927  *
928  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
929  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
930  * is not used anymore for translation once this function is called.
931  *
932  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
933  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
934  */
935 #ifdef CONFIG_USER_ONLY
936 void tcg_register_thread(void)
937 {
938     tcg_ctx = &tcg_init_ctx;
939 }
940 #else
941 void tcg_register_thread(void)
942 {
943     MachineState *ms = MACHINE(qdev_get_machine());
944     TCGContext *s = g_malloc(sizeof(*s));
945     unsigned int i, n;
946     bool err;
947 
948     *s = tcg_init_ctx;
949 
950     /* Relink mem_base.  */
951     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
952         if (tcg_init_ctx.temps[i].mem_base) {
953             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
954             tcg_debug_assert(b >= 0 && b < n);
955             s->temps[i].mem_base = &s->temps[b];
956         }
957     }
958 
959     /* Claim an entry in tcg_ctxs */
960     n = qatomic_fetch_inc(&n_tcg_ctxs);
961     g_assert(n < ms->smp.max_cpus);
962     qatomic_set(&tcg_ctxs[n], s);
963 
964     if (n > 0) {
965         alloc_tcg_plugin_context(s);
966     }
967 
968     tcg_ctx = s;
969     qemu_mutex_lock(&region.lock);
970     err = tcg_region_initial_alloc__locked(tcg_ctx);
971     g_assert(!err);
972     qemu_mutex_unlock(&region.lock);
973 }
974 #endif /* !CONFIG_USER_ONLY */
975 
976 /*
977  * Returns the size (in bytes) of all translated code (i.e. from all regions)
978  * currently in the cache.
979  * See also: tcg_code_capacity()
980  * Do not confuse with tcg_current_code_size(); that one applies to a single
981  * TCG context.
982  */
983 size_t tcg_code_size(void)
984 {
985     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
986     unsigned int i;
987     size_t total;
988 
989     qemu_mutex_lock(&region.lock);
990     total = region.agg_size_full;
991     for (i = 0; i < n_ctxs; i++) {
992         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
993         size_t size;
994 
995         size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
996         g_assert(size <= s->code_gen_buffer_size);
997         total += size;
998     }
999     qemu_mutex_unlock(&region.lock);
1000     return total;
1001 }
1002 
1003 /*
1004  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
1005  * regions.
1006  * See also: tcg_code_size()
1007  */
1008 size_t tcg_code_capacity(void)
1009 {
1010     size_t guard_size, capacity;
1011 
1012     /* no need for synchronization; these variables are set at init time */
1013     guard_size = region.stride - region.size;
1014     capacity = region.end + guard_size - region.start;
1015     capacity -= region.n * (guard_size + TCG_HIGHWATER);
1016     return capacity;
1017 }
1018 
1019 size_t tcg_tb_phys_invalidate_count(void)
1020 {
1021     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
1022     unsigned int i;
1023     size_t total = 0;
1024 
1025     for (i = 0; i < n_ctxs; i++) {
1026         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
1027 
1028         total += qatomic_read(&s->tb_phys_invalidate_count);
1029     }
1030     return total;
1031 }
1032 
1033 /* pool based memory allocation */
1034 void *tcg_malloc_internal(TCGContext *s, int size)
1035 {
1036     TCGPool *p;
1037     int pool_size;
1038 
1039     if (size > TCG_POOL_CHUNK_SIZE) {
1040         /* big malloc: insert a new pool (XXX: could optimize) */
1041         p = g_malloc(sizeof(TCGPool) + size);
1042         p->size = size;
1043         p->next = s->pool_first_large;
1044         s->pool_first_large = p;
1045         return p->data;
1046     } else {
1047         p = s->pool_current;
1048         if (!p) {
1049             p = s->pool_first;
1050             if (!p)
1051                 goto new_pool;
1052         } else {
1053             if (!p->next) {
1054             new_pool:
1055                 pool_size = TCG_POOL_CHUNK_SIZE;
1056                 p = g_malloc(sizeof(TCGPool) + pool_size);
1057                 p->size = pool_size;
1058                 p->next = NULL;
1059                 if (s->pool_current)
1060                     s->pool_current->next = p;
1061                 else
1062                     s->pool_first = p;
1063             } else {
1064                 p = p->next;
1065             }
1066         }
1067     }
1068     s->pool_current = p;
1069     s->pool_cur = p->data + size;
1070     s->pool_end = p->data + p->size;
1071     return p->data;
1072 }
1073 
1074 void tcg_pool_reset(TCGContext *s)
1075 {
1076     TCGPool *p, *t;
1077     for (p = s->pool_first_large; p; p = t) {
1078         t = p->next;
1079         g_free(p);
1080     }
1081     s->pool_first_large = NULL;
1082     s->pool_cur = s->pool_end = NULL;
1083     s->pool_current = NULL;
1084 }
1085 
1086 typedef struct TCGHelperInfo {
1087     void *func;
1088     const char *name;
1089     unsigned flags;
1090     unsigned sizemask;
1091 } TCGHelperInfo;
1092 
1093 #include "exec/helper-proto.h"
1094 
1095 static const TCGHelperInfo all_helpers[] = {
1096 #include "exec/helper-tcg.h"
1097 };
1098 static GHashTable *helper_table;
1099 
1100 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1101 static void process_op_defs(TCGContext *s);
1102 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1103                                             TCGReg reg, const char *name);
1104 
1105 void tcg_context_init(TCGContext *s)
1106 {
1107     int op, total_args, n, i;
1108     TCGOpDef *def;
1109     TCGArgConstraint *args_ct;
1110     TCGTemp *ts;
1111 
1112     memset(s, 0, sizeof(*s));
1113     s->nb_globals = 0;
1114 
1115     /* Count total number of arguments and allocate the corresponding
1116        space */
1117     total_args = 0;
1118     for(op = 0; op < NB_OPS; op++) {
1119         def = &tcg_op_defs[op];
1120         n = def->nb_iargs + def->nb_oargs;
1121         total_args += n;
1122     }
1123 
1124     args_ct = g_new0(TCGArgConstraint, total_args);
1125 
1126     for(op = 0; op < NB_OPS; op++) {
1127         def = &tcg_op_defs[op];
1128         def->args_ct = args_ct;
1129         n = def->nb_iargs + def->nb_oargs;
1130         args_ct += n;
1131     }
1132 
1133     /* Register helpers.  */
1134     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1135     helper_table = g_hash_table_new(NULL, NULL);
1136 
1137     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1138         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1139                             (gpointer)&all_helpers[i]);
1140     }
1141 
1142     tcg_target_init(s);
1143     process_op_defs(s);
1144 
1145     /* Reverse the order of the saved registers, assuming they're all at
1146        the start of tcg_target_reg_alloc_order.  */
1147     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1148         int r = tcg_target_reg_alloc_order[n];
1149         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1150             break;
1151         }
1152     }
1153     for (i = 0; i < n; ++i) {
1154         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1155     }
1156     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1157         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1158     }
1159 
1160     alloc_tcg_plugin_context(s);
1161 
1162     tcg_ctx = s;
1163     /*
1164      * In user-mode we simply share the init context among threads, since we
1165      * use a single region. See the documentation tcg_region_init() for the
1166      * reasoning behind this.
1167      * In softmmu we will have at most max_cpus TCG threads.
1168      */
1169 #ifdef CONFIG_USER_ONLY
1170     tcg_ctxs = &tcg_ctx;
1171     n_tcg_ctxs = 1;
1172 #else
1173     MachineState *ms = MACHINE(qdev_get_machine());
1174     unsigned int max_cpus = ms->smp.max_cpus;
1175     tcg_ctxs = g_new(TCGContext *, max_cpus);
1176 #endif
1177 
1178     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1179     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1180     cpu_env = temp_tcgv_ptr(ts);
1181 }
1182 
1183 /*
1184  * Allocate TBs right before their corresponding translated code, making
1185  * sure that TBs and code are on different cache lines.
1186  */
1187 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1188 {
1189     uintptr_t align = qemu_icache_linesize;
1190     TranslationBlock *tb;
1191     void *next;
1192 
1193  retry:
1194     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1195     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1196 
1197     if (unlikely(next > s->code_gen_highwater)) {
1198         if (tcg_region_alloc(s)) {
1199             return NULL;
1200         }
1201         goto retry;
1202     }
1203     qatomic_set(&s->code_gen_ptr, next);
1204     s->data_gen_ptr = NULL;
1205     return tb;
1206 }
1207 
1208 void tcg_prologue_init(TCGContext *s)
1209 {
1210     size_t prologue_size, total_size;
1211     void *buf0, *buf1;
1212 
1213     /* Put the prologue at the beginning of code_gen_buffer.  */
1214     buf0 = s->code_gen_buffer;
1215     total_size = s->code_gen_buffer_size;
1216     s->code_ptr = buf0;
1217     s->code_buf = buf0;
1218     s->data_gen_ptr = NULL;
1219 
1220     /*
1221      * The region trees are not yet configured, but tcg_splitwx_to_rx
1222      * needs the bounds for an assert.
1223      */
1224     region.start = buf0;
1225     region.end = buf0 + total_size;
1226 
1227 #ifndef CONFIG_TCG_INTERPRETER
1228     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1229 #endif
1230 
1231     /* Compute a high-water mark, at which we voluntarily flush the buffer
1232        and start over.  The size here is arbitrary, significantly larger
1233        than we expect the code generation for any one opcode to require.  */
1234     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1235 
1236 #ifdef TCG_TARGET_NEED_POOL_LABELS
1237     s->pool_labels = NULL;
1238 #endif
1239 
1240     qemu_thread_jit_write();
1241     /* Generate the prologue.  */
1242     tcg_target_qemu_prologue(s);
1243 
1244 #ifdef TCG_TARGET_NEED_POOL_LABELS
1245     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1246     {
1247         int result = tcg_out_pool_finalize(s);
1248         tcg_debug_assert(result == 0);
1249     }
1250 #endif
1251 
1252     buf1 = s->code_ptr;
1253 #ifndef CONFIG_TCG_INTERPRETER
1254     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1255                         tcg_ptr_byte_diff(buf1, buf0));
1256 #endif
1257 
1258     /* Deduct the prologue from the buffer.  */
1259     prologue_size = tcg_current_code_size(s);
1260     s->code_gen_ptr = buf1;
1261     s->code_gen_buffer = buf1;
1262     s->code_buf = buf1;
1263     total_size -= prologue_size;
1264     s->code_gen_buffer_size = total_size;
1265 
1266     tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1267 
1268 #ifdef DEBUG_DISAS
1269     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1270         FILE *logfile = qemu_log_lock();
1271         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1272         if (s->data_gen_ptr) {
1273             size_t code_size = s->data_gen_ptr - buf0;
1274             size_t data_size = prologue_size - code_size;
1275             size_t i;
1276 
1277             log_disas(buf0, code_size);
1278 
1279             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1280                 if (sizeof(tcg_target_ulong) == 8) {
1281                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1282                              (uintptr_t)s->data_gen_ptr + i,
1283                              *(uint64_t *)(s->data_gen_ptr + i));
1284                 } else {
1285                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1286                              (uintptr_t)s->data_gen_ptr + i,
1287                              *(uint32_t *)(s->data_gen_ptr + i));
1288                 }
1289             }
1290         } else {
1291             log_disas(buf0, prologue_size);
1292         }
1293         qemu_log("\n");
1294         qemu_log_flush();
1295         qemu_log_unlock(logfile);
1296     }
1297 #endif
1298 
1299     /* Assert that goto_ptr is implemented completely.  */
1300     if (TCG_TARGET_HAS_goto_ptr) {
1301         tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1302     }
1303 }
1304 
1305 void tcg_func_start(TCGContext *s)
1306 {
1307     tcg_pool_reset(s);
1308     s->nb_temps = s->nb_globals;
1309 
1310     /* No temps have been previously allocated for size or locality.  */
1311     memset(s->free_temps, 0, sizeof(s->free_temps));
1312 
1313     /* No constant temps have been previously allocated. */
1314     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1315         if (s->const_table[i]) {
1316             g_hash_table_remove_all(s->const_table[i]);
1317         }
1318     }
1319 
1320     s->nb_ops = 0;
1321     s->nb_labels = 0;
1322     s->current_frame_offset = s->frame_start;
1323 
1324 #ifdef CONFIG_DEBUG_TCG
1325     s->goto_tb_issue_mask = 0;
1326 #endif
1327 
1328     QTAILQ_INIT(&s->ops);
1329     QTAILQ_INIT(&s->free_ops);
1330     QSIMPLEQ_INIT(&s->labels);
1331 }
1332 
1333 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1334 {
1335     int n = s->nb_temps++;
1336 
1337     if (n >= TCG_MAX_TEMPS) {
1338         tcg_raise_tb_overflow(s);
1339     }
1340     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1341 }
1342 
1343 static TCGTemp *tcg_global_alloc(TCGContext *s)
1344 {
1345     TCGTemp *ts;
1346 
1347     tcg_debug_assert(s->nb_globals == s->nb_temps);
1348     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1349     s->nb_globals++;
1350     ts = tcg_temp_alloc(s);
1351     ts->kind = TEMP_GLOBAL;
1352 
1353     return ts;
1354 }
1355 
1356 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1357                                             TCGReg reg, const char *name)
1358 {
1359     TCGTemp *ts;
1360 
1361     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1362         tcg_abort();
1363     }
1364 
1365     ts = tcg_global_alloc(s);
1366     ts->base_type = type;
1367     ts->type = type;
1368     ts->kind = TEMP_FIXED;
1369     ts->reg = reg;
1370     ts->name = name;
1371     tcg_regset_set_reg(s->reserved_regs, reg);
1372 
1373     return ts;
1374 }
1375 
1376 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1377 {
1378     s->frame_start = start;
1379     s->frame_end = start + size;
1380     s->frame_temp
1381         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1382 }
1383 
1384 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1385                                      intptr_t offset, const char *name)
1386 {
1387     TCGContext *s = tcg_ctx;
1388     TCGTemp *base_ts = tcgv_ptr_temp(base);
1389     TCGTemp *ts = tcg_global_alloc(s);
1390     int indirect_reg = 0, bigendian = 0;
1391 #ifdef HOST_WORDS_BIGENDIAN
1392     bigendian = 1;
1393 #endif
1394 
1395     switch (base_ts->kind) {
1396     case TEMP_FIXED:
1397         break;
1398     case TEMP_GLOBAL:
1399         /* We do not support double-indirect registers.  */
1400         tcg_debug_assert(!base_ts->indirect_reg);
1401         base_ts->indirect_base = 1;
1402         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1403                             ? 2 : 1);
1404         indirect_reg = 1;
1405         break;
1406     default:
1407         g_assert_not_reached();
1408     }
1409 
1410     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1411         TCGTemp *ts2 = tcg_global_alloc(s);
1412         char buf[64];
1413 
1414         ts->base_type = TCG_TYPE_I64;
1415         ts->type = TCG_TYPE_I32;
1416         ts->indirect_reg = indirect_reg;
1417         ts->mem_allocated = 1;
1418         ts->mem_base = base_ts;
1419         ts->mem_offset = offset + bigendian * 4;
1420         pstrcpy(buf, sizeof(buf), name);
1421         pstrcat(buf, sizeof(buf), "_0");
1422         ts->name = strdup(buf);
1423 
1424         tcg_debug_assert(ts2 == ts + 1);
1425         ts2->base_type = TCG_TYPE_I64;
1426         ts2->type = TCG_TYPE_I32;
1427         ts2->indirect_reg = indirect_reg;
1428         ts2->mem_allocated = 1;
1429         ts2->mem_base = base_ts;
1430         ts2->mem_offset = offset + (1 - bigendian) * 4;
1431         pstrcpy(buf, sizeof(buf), name);
1432         pstrcat(buf, sizeof(buf), "_1");
1433         ts2->name = strdup(buf);
1434     } else {
1435         ts->base_type = type;
1436         ts->type = type;
1437         ts->indirect_reg = indirect_reg;
1438         ts->mem_allocated = 1;
1439         ts->mem_base = base_ts;
1440         ts->mem_offset = offset;
1441         ts->name = name;
1442     }
1443     return ts;
1444 }
1445 
1446 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1447 {
1448     TCGContext *s = tcg_ctx;
1449     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1450     TCGTemp *ts;
1451     int idx, k;
1452 
1453     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1454     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1455     if (idx < TCG_MAX_TEMPS) {
1456         /* There is already an available temp with the right type.  */
1457         clear_bit(idx, s->free_temps[k].l);
1458 
1459         ts = &s->temps[idx];
1460         ts->temp_allocated = 1;
1461         tcg_debug_assert(ts->base_type == type);
1462         tcg_debug_assert(ts->kind == kind);
1463     } else {
1464         ts = tcg_temp_alloc(s);
1465         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1466             TCGTemp *ts2 = tcg_temp_alloc(s);
1467 
1468             ts->base_type = type;
1469             ts->type = TCG_TYPE_I32;
1470             ts->temp_allocated = 1;
1471             ts->kind = kind;
1472 
1473             tcg_debug_assert(ts2 == ts + 1);
1474             ts2->base_type = TCG_TYPE_I64;
1475             ts2->type = TCG_TYPE_I32;
1476             ts2->temp_allocated = 1;
1477             ts2->kind = kind;
1478         } else {
1479             ts->base_type = type;
1480             ts->type = type;
1481             ts->temp_allocated = 1;
1482             ts->kind = kind;
1483         }
1484     }
1485 
1486 #if defined(CONFIG_DEBUG_TCG)
1487     s->temps_in_use++;
1488 #endif
1489     return ts;
1490 }
1491 
1492 TCGv_vec tcg_temp_new_vec(TCGType type)
1493 {
1494     TCGTemp *t;
1495 
1496 #ifdef CONFIG_DEBUG_TCG
1497     switch (type) {
1498     case TCG_TYPE_V64:
1499         assert(TCG_TARGET_HAS_v64);
1500         break;
1501     case TCG_TYPE_V128:
1502         assert(TCG_TARGET_HAS_v128);
1503         break;
1504     case TCG_TYPE_V256:
1505         assert(TCG_TARGET_HAS_v256);
1506         break;
1507     default:
1508         g_assert_not_reached();
1509     }
1510 #endif
1511 
1512     t = tcg_temp_new_internal(type, 0);
1513     return temp_tcgv_vec(t);
1514 }
1515 
1516 /* Create a new temp of the same type as an existing temp.  */
1517 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1518 {
1519     TCGTemp *t = tcgv_vec_temp(match);
1520 
1521     tcg_debug_assert(t->temp_allocated != 0);
1522 
1523     t = tcg_temp_new_internal(t->base_type, 0);
1524     return temp_tcgv_vec(t);
1525 }
1526 
1527 void tcg_temp_free_internal(TCGTemp *ts)
1528 {
1529     TCGContext *s = tcg_ctx;
1530     int k, idx;
1531 
1532     /* In order to simplify users of tcg_constant_*, silently ignore free. */
1533     if (ts->kind == TEMP_CONST) {
1534         return;
1535     }
1536 
1537 #if defined(CONFIG_DEBUG_TCG)
1538     s->temps_in_use--;
1539     if (s->temps_in_use < 0) {
1540         fprintf(stderr, "More temporaries freed than allocated!\n");
1541     }
1542 #endif
1543 
1544     tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1545     tcg_debug_assert(ts->temp_allocated != 0);
1546     ts->temp_allocated = 0;
1547 
1548     idx = temp_idx(ts);
1549     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1550     set_bit(idx, s->free_temps[k].l);
1551 }
1552 
1553 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1554 {
1555     TCGContext *s = tcg_ctx;
1556     GHashTable *h = s->const_table[type];
1557     TCGTemp *ts;
1558 
1559     if (h == NULL) {
1560         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1561         s->const_table[type] = h;
1562     }
1563 
1564     ts = g_hash_table_lookup(h, &val);
1565     if (ts == NULL) {
1566         ts = tcg_temp_alloc(s);
1567 
1568         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1569             TCGTemp *ts2 = tcg_temp_alloc(s);
1570 
1571             ts->base_type = TCG_TYPE_I64;
1572             ts->type = TCG_TYPE_I32;
1573             ts->kind = TEMP_CONST;
1574             ts->temp_allocated = 1;
1575             /*
1576              * Retain the full value of the 64-bit constant in the low
1577              * part, so that the hash table works.  Actual uses will
1578              * truncate the value to the low part.
1579              */
1580             ts->val = val;
1581 
1582             tcg_debug_assert(ts2 == ts + 1);
1583             ts2->base_type = TCG_TYPE_I64;
1584             ts2->type = TCG_TYPE_I32;
1585             ts2->kind = TEMP_CONST;
1586             ts2->temp_allocated = 1;
1587             ts2->val = val >> 32;
1588         } else {
1589             ts->base_type = type;
1590             ts->type = type;
1591             ts->kind = TEMP_CONST;
1592             ts->temp_allocated = 1;
1593             ts->val = val;
1594         }
1595         g_hash_table_insert(h, &ts->val, ts);
1596     }
1597 
1598     return ts;
1599 }
1600 
1601 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1602 {
1603     val = dup_const(vece, val);
1604     return temp_tcgv_vec(tcg_constant_internal(type, val));
1605 }
1606 
1607 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1608 {
1609     TCGTemp *t = tcgv_vec_temp(match);
1610 
1611     tcg_debug_assert(t->temp_allocated != 0);
1612     return tcg_constant_vec(t->base_type, vece, val);
1613 }
1614 
1615 TCGv_i32 tcg_const_i32(int32_t val)
1616 {
1617     TCGv_i32 t0;
1618     t0 = tcg_temp_new_i32();
1619     tcg_gen_movi_i32(t0, val);
1620     return t0;
1621 }
1622 
1623 TCGv_i64 tcg_const_i64(int64_t val)
1624 {
1625     TCGv_i64 t0;
1626     t0 = tcg_temp_new_i64();
1627     tcg_gen_movi_i64(t0, val);
1628     return t0;
1629 }
1630 
1631 TCGv_i32 tcg_const_local_i32(int32_t val)
1632 {
1633     TCGv_i32 t0;
1634     t0 = tcg_temp_local_new_i32();
1635     tcg_gen_movi_i32(t0, val);
1636     return t0;
1637 }
1638 
1639 TCGv_i64 tcg_const_local_i64(int64_t val)
1640 {
1641     TCGv_i64 t0;
1642     t0 = tcg_temp_local_new_i64();
1643     tcg_gen_movi_i64(t0, val);
1644     return t0;
1645 }
1646 
1647 #if defined(CONFIG_DEBUG_TCG)
1648 void tcg_clear_temp_count(void)
1649 {
1650     TCGContext *s = tcg_ctx;
1651     s->temps_in_use = 0;
1652 }
1653 
1654 int tcg_check_temp_count(void)
1655 {
1656     TCGContext *s = tcg_ctx;
1657     if (s->temps_in_use) {
1658         /* Clear the count so that we don't give another
1659          * warning immediately next time around.
1660          */
1661         s->temps_in_use = 0;
1662         return 1;
1663     }
1664     return 0;
1665 }
1666 #endif
1667 
1668 /* Return true if OP may appear in the opcode stream.
1669    Test the runtime variable that controls each opcode.  */
1670 bool tcg_op_supported(TCGOpcode op)
1671 {
1672     const bool have_vec
1673         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1674 
1675     switch (op) {
1676     case INDEX_op_discard:
1677     case INDEX_op_set_label:
1678     case INDEX_op_call:
1679     case INDEX_op_br:
1680     case INDEX_op_mb:
1681     case INDEX_op_insn_start:
1682     case INDEX_op_exit_tb:
1683     case INDEX_op_goto_tb:
1684     case INDEX_op_qemu_ld_i32:
1685     case INDEX_op_qemu_st_i32:
1686     case INDEX_op_qemu_ld_i64:
1687     case INDEX_op_qemu_st_i64:
1688         return true;
1689 
1690     case INDEX_op_qemu_st8_i32:
1691         return TCG_TARGET_HAS_qemu_st8_i32;
1692 
1693     case INDEX_op_goto_ptr:
1694         return TCG_TARGET_HAS_goto_ptr;
1695 
1696     case INDEX_op_mov_i32:
1697     case INDEX_op_setcond_i32:
1698     case INDEX_op_brcond_i32:
1699     case INDEX_op_ld8u_i32:
1700     case INDEX_op_ld8s_i32:
1701     case INDEX_op_ld16u_i32:
1702     case INDEX_op_ld16s_i32:
1703     case INDEX_op_ld_i32:
1704     case INDEX_op_st8_i32:
1705     case INDEX_op_st16_i32:
1706     case INDEX_op_st_i32:
1707     case INDEX_op_add_i32:
1708     case INDEX_op_sub_i32:
1709     case INDEX_op_mul_i32:
1710     case INDEX_op_and_i32:
1711     case INDEX_op_or_i32:
1712     case INDEX_op_xor_i32:
1713     case INDEX_op_shl_i32:
1714     case INDEX_op_shr_i32:
1715     case INDEX_op_sar_i32:
1716         return true;
1717 
1718     case INDEX_op_movcond_i32:
1719         return TCG_TARGET_HAS_movcond_i32;
1720     case INDEX_op_div_i32:
1721     case INDEX_op_divu_i32:
1722         return TCG_TARGET_HAS_div_i32;
1723     case INDEX_op_rem_i32:
1724     case INDEX_op_remu_i32:
1725         return TCG_TARGET_HAS_rem_i32;
1726     case INDEX_op_div2_i32:
1727     case INDEX_op_divu2_i32:
1728         return TCG_TARGET_HAS_div2_i32;
1729     case INDEX_op_rotl_i32:
1730     case INDEX_op_rotr_i32:
1731         return TCG_TARGET_HAS_rot_i32;
1732     case INDEX_op_deposit_i32:
1733         return TCG_TARGET_HAS_deposit_i32;
1734     case INDEX_op_extract_i32:
1735         return TCG_TARGET_HAS_extract_i32;
1736     case INDEX_op_sextract_i32:
1737         return TCG_TARGET_HAS_sextract_i32;
1738     case INDEX_op_extract2_i32:
1739         return TCG_TARGET_HAS_extract2_i32;
1740     case INDEX_op_add2_i32:
1741         return TCG_TARGET_HAS_add2_i32;
1742     case INDEX_op_sub2_i32:
1743         return TCG_TARGET_HAS_sub2_i32;
1744     case INDEX_op_mulu2_i32:
1745         return TCG_TARGET_HAS_mulu2_i32;
1746     case INDEX_op_muls2_i32:
1747         return TCG_TARGET_HAS_muls2_i32;
1748     case INDEX_op_muluh_i32:
1749         return TCG_TARGET_HAS_muluh_i32;
1750     case INDEX_op_mulsh_i32:
1751         return TCG_TARGET_HAS_mulsh_i32;
1752     case INDEX_op_ext8s_i32:
1753         return TCG_TARGET_HAS_ext8s_i32;
1754     case INDEX_op_ext16s_i32:
1755         return TCG_TARGET_HAS_ext16s_i32;
1756     case INDEX_op_ext8u_i32:
1757         return TCG_TARGET_HAS_ext8u_i32;
1758     case INDEX_op_ext16u_i32:
1759         return TCG_TARGET_HAS_ext16u_i32;
1760     case INDEX_op_bswap16_i32:
1761         return TCG_TARGET_HAS_bswap16_i32;
1762     case INDEX_op_bswap32_i32:
1763         return TCG_TARGET_HAS_bswap32_i32;
1764     case INDEX_op_not_i32:
1765         return TCG_TARGET_HAS_not_i32;
1766     case INDEX_op_neg_i32:
1767         return TCG_TARGET_HAS_neg_i32;
1768     case INDEX_op_andc_i32:
1769         return TCG_TARGET_HAS_andc_i32;
1770     case INDEX_op_orc_i32:
1771         return TCG_TARGET_HAS_orc_i32;
1772     case INDEX_op_eqv_i32:
1773         return TCG_TARGET_HAS_eqv_i32;
1774     case INDEX_op_nand_i32:
1775         return TCG_TARGET_HAS_nand_i32;
1776     case INDEX_op_nor_i32:
1777         return TCG_TARGET_HAS_nor_i32;
1778     case INDEX_op_clz_i32:
1779         return TCG_TARGET_HAS_clz_i32;
1780     case INDEX_op_ctz_i32:
1781         return TCG_TARGET_HAS_ctz_i32;
1782     case INDEX_op_ctpop_i32:
1783         return TCG_TARGET_HAS_ctpop_i32;
1784 
1785     case INDEX_op_brcond2_i32:
1786     case INDEX_op_setcond2_i32:
1787         return TCG_TARGET_REG_BITS == 32;
1788 
1789     case INDEX_op_mov_i64:
1790     case INDEX_op_setcond_i64:
1791     case INDEX_op_brcond_i64:
1792     case INDEX_op_ld8u_i64:
1793     case INDEX_op_ld8s_i64:
1794     case INDEX_op_ld16u_i64:
1795     case INDEX_op_ld16s_i64:
1796     case INDEX_op_ld32u_i64:
1797     case INDEX_op_ld32s_i64:
1798     case INDEX_op_ld_i64:
1799     case INDEX_op_st8_i64:
1800     case INDEX_op_st16_i64:
1801     case INDEX_op_st32_i64:
1802     case INDEX_op_st_i64:
1803     case INDEX_op_add_i64:
1804     case INDEX_op_sub_i64:
1805     case INDEX_op_mul_i64:
1806     case INDEX_op_and_i64:
1807     case INDEX_op_or_i64:
1808     case INDEX_op_xor_i64:
1809     case INDEX_op_shl_i64:
1810     case INDEX_op_shr_i64:
1811     case INDEX_op_sar_i64:
1812     case INDEX_op_ext_i32_i64:
1813     case INDEX_op_extu_i32_i64:
1814         return TCG_TARGET_REG_BITS == 64;
1815 
1816     case INDEX_op_movcond_i64:
1817         return TCG_TARGET_HAS_movcond_i64;
1818     case INDEX_op_div_i64:
1819     case INDEX_op_divu_i64:
1820         return TCG_TARGET_HAS_div_i64;
1821     case INDEX_op_rem_i64:
1822     case INDEX_op_remu_i64:
1823         return TCG_TARGET_HAS_rem_i64;
1824     case INDEX_op_div2_i64:
1825     case INDEX_op_divu2_i64:
1826         return TCG_TARGET_HAS_div2_i64;
1827     case INDEX_op_rotl_i64:
1828     case INDEX_op_rotr_i64:
1829         return TCG_TARGET_HAS_rot_i64;
1830     case INDEX_op_deposit_i64:
1831         return TCG_TARGET_HAS_deposit_i64;
1832     case INDEX_op_extract_i64:
1833         return TCG_TARGET_HAS_extract_i64;
1834     case INDEX_op_sextract_i64:
1835         return TCG_TARGET_HAS_sextract_i64;
1836     case INDEX_op_extract2_i64:
1837         return TCG_TARGET_HAS_extract2_i64;
1838     case INDEX_op_extrl_i64_i32:
1839         return TCG_TARGET_HAS_extrl_i64_i32;
1840     case INDEX_op_extrh_i64_i32:
1841         return TCG_TARGET_HAS_extrh_i64_i32;
1842     case INDEX_op_ext8s_i64:
1843         return TCG_TARGET_HAS_ext8s_i64;
1844     case INDEX_op_ext16s_i64:
1845         return TCG_TARGET_HAS_ext16s_i64;
1846     case INDEX_op_ext32s_i64:
1847         return TCG_TARGET_HAS_ext32s_i64;
1848     case INDEX_op_ext8u_i64:
1849         return TCG_TARGET_HAS_ext8u_i64;
1850     case INDEX_op_ext16u_i64:
1851         return TCG_TARGET_HAS_ext16u_i64;
1852     case INDEX_op_ext32u_i64:
1853         return TCG_TARGET_HAS_ext32u_i64;
1854     case INDEX_op_bswap16_i64:
1855         return TCG_TARGET_HAS_bswap16_i64;
1856     case INDEX_op_bswap32_i64:
1857         return TCG_TARGET_HAS_bswap32_i64;
1858     case INDEX_op_bswap64_i64:
1859         return TCG_TARGET_HAS_bswap64_i64;
1860     case INDEX_op_not_i64:
1861         return TCG_TARGET_HAS_not_i64;
1862     case INDEX_op_neg_i64:
1863         return TCG_TARGET_HAS_neg_i64;
1864     case INDEX_op_andc_i64:
1865         return TCG_TARGET_HAS_andc_i64;
1866     case INDEX_op_orc_i64:
1867         return TCG_TARGET_HAS_orc_i64;
1868     case INDEX_op_eqv_i64:
1869         return TCG_TARGET_HAS_eqv_i64;
1870     case INDEX_op_nand_i64:
1871         return TCG_TARGET_HAS_nand_i64;
1872     case INDEX_op_nor_i64:
1873         return TCG_TARGET_HAS_nor_i64;
1874     case INDEX_op_clz_i64:
1875         return TCG_TARGET_HAS_clz_i64;
1876     case INDEX_op_ctz_i64:
1877         return TCG_TARGET_HAS_ctz_i64;
1878     case INDEX_op_ctpop_i64:
1879         return TCG_TARGET_HAS_ctpop_i64;
1880     case INDEX_op_add2_i64:
1881         return TCG_TARGET_HAS_add2_i64;
1882     case INDEX_op_sub2_i64:
1883         return TCG_TARGET_HAS_sub2_i64;
1884     case INDEX_op_mulu2_i64:
1885         return TCG_TARGET_HAS_mulu2_i64;
1886     case INDEX_op_muls2_i64:
1887         return TCG_TARGET_HAS_muls2_i64;
1888     case INDEX_op_muluh_i64:
1889         return TCG_TARGET_HAS_muluh_i64;
1890     case INDEX_op_mulsh_i64:
1891         return TCG_TARGET_HAS_mulsh_i64;
1892 
1893     case INDEX_op_mov_vec:
1894     case INDEX_op_dup_vec:
1895     case INDEX_op_dupm_vec:
1896     case INDEX_op_ld_vec:
1897     case INDEX_op_st_vec:
1898     case INDEX_op_add_vec:
1899     case INDEX_op_sub_vec:
1900     case INDEX_op_and_vec:
1901     case INDEX_op_or_vec:
1902     case INDEX_op_xor_vec:
1903     case INDEX_op_cmp_vec:
1904         return have_vec;
1905     case INDEX_op_dup2_vec:
1906         return have_vec && TCG_TARGET_REG_BITS == 32;
1907     case INDEX_op_not_vec:
1908         return have_vec && TCG_TARGET_HAS_not_vec;
1909     case INDEX_op_neg_vec:
1910         return have_vec && TCG_TARGET_HAS_neg_vec;
1911     case INDEX_op_abs_vec:
1912         return have_vec && TCG_TARGET_HAS_abs_vec;
1913     case INDEX_op_andc_vec:
1914         return have_vec && TCG_TARGET_HAS_andc_vec;
1915     case INDEX_op_orc_vec:
1916         return have_vec && TCG_TARGET_HAS_orc_vec;
1917     case INDEX_op_mul_vec:
1918         return have_vec && TCG_TARGET_HAS_mul_vec;
1919     case INDEX_op_shli_vec:
1920     case INDEX_op_shri_vec:
1921     case INDEX_op_sari_vec:
1922         return have_vec && TCG_TARGET_HAS_shi_vec;
1923     case INDEX_op_shls_vec:
1924     case INDEX_op_shrs_vec:
1925     case INDEX_op_sars_vec:
1926         return have_vec && TCG_TARGET_HAS_shs_vec;
1927     case INDEX_op_shlv_vec:
1928     case INDEX_op_shrv_vec:
1929     case INDEX_op_sarv_vec:
1930         return have_vec && TCG_TARGET_HAS_shv_vec;
1931     case INDEX_op_rotli_vec:
1932         return have_vec && TCG_TARGET_HAS_roti_vec;
1933     case INDEX_op_rotls_vec:
1934         return have_vec && TCG_TARGET_HAS_rots_vec;
1935     case INDEX_op_rotlv_vec:
1936     case INDEX_op_rotrv_vec:
1937         return have_vec && TCG_TARGET_HAS_rotv_vec;
1938     case INDEX_op_ssadd_vec:
1939     case INDEX_op_usadd_vec:
1940     case INDEX_op_sssub_vec:
1941     case INDEX_op_ussub_vec:
1942         return have_vec && TCG_TARGET_HAS_sat_vec;
1943     case INDEX_op_smin_vec:
1944     case INDEX_op_umin_vec:
1945     case INDEX_op_smax_vec:
1946     case INDEX_op_umax_vec:
1947         return have_vec && TCG_TARGET_HAS_minmax_vec;
1948     case INDEX_op_bitsel_vec:
1949         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1950     case INDEX_op_cmpsel_vec:
1951         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1952 
1953     default:
1954         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1955         return true;
1956     }
1957 }
1958 
1959 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1960    and endian swap. Maybe it would be better to do the alignment
1961    and endian swap in tcg_reg_alloc_call(). */
1962 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1963 {
1964     int i, real_args, nb_rets, pi;
1965     unsigned sizemask, flags;
1966     TCGHelperInfo *info;
1967     TCGOp *op;
1968 
1969     info = g_hash_table_lookup(helper_table, (gpointer)func);
1970     flags = info->flags;
1971     sizemask = info->sizemask;
1972 
1973 #ifdef CONFIG_PLUGIN
1974     /* detect non-plugin helpers */
1975     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1976         tcg_ctx->plugin_insn->calls_helpers = true;
1977     }
1978 #endif
1979 
1980 #if defined(__sparc__) && !defined(__arch64__) \
1981     && !defined(CONFIG_TCG_INTERPRETER)
1982     /* We have 64-bit values in one register, but need to pass as two
1983        separate parameters.  Split them.  */
1984     int orig_sizemask = sizemask;
1985     int orig_nargs = nargs;
1986     TCGv_i64 retl, reth;
1987     TCGTemp *split_args[MAX_OPC_PARAM];
1988 
1989     retl = NULL;
1990     reth = NULL;
1991     if (sizemask != 0) {
1992         for (i = real_args = 0; i < nargs; ++i) {
1993             int is_64bit = sizemask & (1 << (i+1)*2);
1994             if (is_64bit) {
1995                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1996                 TCGv_i32 h = tcg_temp_new_i32();
1997                 TCGv_i32 l = tcg_temp_new_i32();
1998                 tcg_gen_extr_i64_i32(l, h, orig);
1999                 split_args[real_args++] = tcgv_i32_temp(h);
2000                 split_args[real_args++] = tcgv_i32_temp(l);
2001             } else {
2002                 split_args[real_args++] = args[i];
2003             }
2004         }
2005         nargs = real_args;
2006         args = split_args;
2007         sizemask = 0;
2008     }
2009 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2010     for (i = 0; i < nargs; ++i) {
2011         int is_64bit = sizemask & (1 << (i+1)*2);
2012         int is_signed = sizemask & (2 << (i+1)*2);
2013         if (!is_64bit) {
2014             TCGv_i64 temp = tcg_temp_new_i64();
2015             TCGv_i64 orig = temp_tcgv_i64(args[i]);
2016             if (is_signed) {
2017                 tcg_gen_ext32s_i64(temp, orig);
2018             } else {
2019                 tcg_gen_ext32u_i64(temp, orig);
2020             }
2021             args[i] = tcgv_i64_temp(temp);
2022         }
2023     }
2024 #endif /* TCG_TARGET_EXTEND_ARGS */
2025 
2026     op = tcg_emit_op(INDEX_op_call);
2027 
2028     pi = 0;
2029     if (ret != NULL) {
2030 #if defined(__sparc__) && !defined(__arch64__) \
2031     && !defined(CONFIG_TCG_INTERPRETER)
2032         if (orig_sizemask & 1) {
2033             /* The 32-bit ABI is going to return the 64-bit value in
2034                the %o0/%o1 register pair.  Prepare for this by using
2035                two return temporaries, and reassemble below.  */
2036             retl = tcg_temp_new_i64();
2037             reth = tcg_temp_new_i64();
2038             op->args[pi++] = tcgv_i64_arg(reth);
2039             op->args[pi++] = tcgv_i64_arg(retl);
2040             nb_rets = 2;
2041         } else {
2042             op->args[pi++] = temp_arg(ret);
2043             nb_rets = 1;
2044         }
2045 #else
2046         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
2047 #ifdef HOST_WORDS_BIGENDIAN
2048             op->args[pi++] = temp_arg(ret + 1);
2049             op->args[pi++] = temp_arg(ret);
2050 #else
2051             op->args[pi++] = temp_arg(ret);
2052             op->args[pi++] = temp_arg(ret + 1);
2053 #endif
2054             nb_rets = 2;
2055         } else {
2056             op->args[pi++] = temp_arg(ret);
2057             nb_rets = 1;
2058         }
2059 #endif
2060     } else {
2061         nb_rets = 0;
2062     }
2063     TCGOP_CALLO(op) = nb_rets;
2064 
2065     real_args = 0;
2066     for (i = 0; i < nargs; i++) {
2067         int is_64bit = sizemask & (1 << (i+1)*2);
2068         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
2069 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
2070             /* some targets want aligned 64 bit args */
2071             if (real_args & 1) {
2072                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
2073                 real_args++;
2074             }
2075 #endif
2076            /* If stack grows up, then we will be placing successive
2077               arguments at lower addresses, which means we need to
2078               reverse the order compared to how we would normally
2079               treat either big or little-endian.  For those arguments
2080               that will wind up in registers, this still works for
2081               HPPA (the only current STACK_GROWSUP target) since the
2082               argument registers are *also* allocated in decreasing
2083               order.  If another such target is added, this logic may
2084               have to get more complicated to differentiate between
2085               stack arguments and register arguments.  */
2086 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
2087             op->args[pi++] = temp_arg(args[i] + 1);
2088             op->args[pi++] = temp_arg(args[i]);
2089 #else
2090             op->args[pi++] = temp_arg(args[i]);
2091             op->args[pi++] = temp_arg(args[i] + 1);
2092 #endif
2093             real_args += 2;
2094             continue;
2095         }
2096 
2097         op->args[pi++] = temp_arg(args[i]);
2098         real_args++;
2099     }
2100     op->args[pi++] = (uintptr_t)func;
2101     op->args[pi++] = flags;
2102     TCGOP_CALLI(op) = real_args;
2103 
2104     /* Make sure the fields didn't overflow.  */
2105     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
2106     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
2107 
2108 #if defined(__sparc__) && !defined(__arch64__) \
2109     && !defined(CONFIG_TCG_INTERPRETER)
2110     /* Free all of the parts we allocated above.  */
2111     for (i = real_args = 0; i < orig_nargs; ++i) {
2112         int is_64bit = orig_sizemask & (1 << (i+1)*2);
2113         if (is_64bit) {
2114             tcg_temp_free_internal(args[real_args++]);
2115             tcg_temp_free_internal(args[real_args++]);
2116         } else {
2117             real_args++;
2118         }
2119     }
2120     if (orig_sizemask & 1) {
2121         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
2122            Note that describing these as TCGv_i64 eliminates an unnecessary
2123            zero-extension that tcg_gen_concat_i32_i64 would create.  */
2124         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
2125         tcg_temp_free_i64(retl);
2126         tcg_temp_free_i64(reth);
2127     }
2128 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2129     for (i = 0; i < nargs; ++i) {
2130         int is_64bit = sizemask & (1 << (i+1)*2);
2131         if (!is_64bit) {
2132             tcg_temp_free_internal(args[i]);
2133         }
2134     }
2135 #endif /* TCG_TARGET_EXTEND_ARGS */
2136 }
2137 
2138 static void tcg_reg_alloc_start(TCGContext *s)
2139 {
2140     int i, n;
2141 
2142     for (i = 0, n = s->nb_temps; i < n; i++) {
2143         TCGTemp *ts = &s->temps[i];
2144         TCGTempVal val = TEMP_VAL_MEM;
2145 
2146         switch (ts->kind) {
2147         case TEMP_CONST:
2148             val = TEMP_VAL_CONST;
2149             break;
2150         case TEMP_FIXED:
2151             val = TEMP_VAL_REG;
2152             break;
2153         case TEMP_GLOBAL:
2154             break;
2155         case TEMP_NORMAL:
2156             val = TEMP_VAL_DEAD;
2157             /* fall through */
2158         case TEMP_LOCAL:
2159             ts->mem_allocated = 0;
2160             break;
2161         default:
2162             g_assert_not_reached();
2163         }
2164         ts->val_type = val;
2165     }
2166 
2167     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2168 }
2169 
2170 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2171                                  TCGTemp *ts)
2172 {
2173     int idx = temp_idx(ts);
2174 
2175     switch (ts->kind) {
2176     case TEMP_FIXED:
2177     case TEMP_GLOBAL:
2178         pstrcpy(buf, buf_size, ts->name);
2179         break;
2180     case TEMP_LOCAL:
2181         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2182         break;
2183     case TEMP_NORMAL:
2184         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2185         break;
2186     case TEMP_CONST:
2187         switch (ts->type) {
2188         case TCG_TYPE_I32:
2189             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2190             break;
2191 #if TCG_TARGET_REG_BITS > 32
2192         case TCG_TYPE_I64:
2193             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2194             break;
2195 #endif
2196         case TCG_TYPE_V64:
2197         case TCG_TYPE_V128:
2198         case TCG_TYPE_V256:
2199             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2200                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2201             break;
2202         default:
2203             g_assert_not_reached();
2204         }
2205         break;
2206     }
2207     return buf;
2208 }
2209 
2210 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2211                              int buf_size, TCGArg arg)
2212 {
2213     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2214 }
2215 
2216 /* Find helper name.  */
2217 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
2218 {
2219     const char *ret = NULL;
2220     if (helper_table) {
2221         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
2222         if (info) {
2223             ret = info->name;
2224         }
2225     }
2226     return ret;
2227 }
2228 
2229 static const char * const cond_name[] =
2230 {
2231     [TCG_COND_NEVER] = "never",
2232     [TCG_COND_ALWAYS] = "always",
2233     [TCG_COND_EQ] = "eq",
2234     [TCG_COND_NE] = "ne",
2235     [TCG_COND_LT] = "lt",
2236     [TCG_COND_GE] = "ge",
2237     [TCG_COND_LE] = "le",
2238     [TCG_COND_GT] = "gt",
2239     [TCG_COND_LTU] = "ltu",
2240     [TCG_COND_GEU] = "geu",
2241     [TCG_COND_LEU] = "leu",
2242     [TCG_COND_GTU] = "gtu"
2243 };
2244 
2245 static const char * const ldst_name[] =
2246 {
2247     [MO_UB]   = "ub",
2248     [MO_SB]   = "sb",
2249     [MO_LEUW] = "leuw",
2250     [MO_LESW] = "lesw",
2251     [MO_LEUL] = "leul",
2252     [MO_LESL] = "lesl",
2253     [MO_LEQ]  = "leq",
2254     [MO_BEUW] = "beuw",
2255     [MO_BESW] = "besw",
2256     [MO_BEUL] = "beul",
2257     [MO_BESL] = "besl",
2258     [MO_BEQ]  = "beq",
2259 };
2260 
2261 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2262 #ifdef TARGET_ALIGNED_ONLY
2263     [MO_UNALN >> MO_ASHIFT]    = "un+",
2264     [MO_ALIGN >> MO_ASHIFT]    = "",
2265 #else
2266     [MO_UNALN >> MO_ASHIFT]    = "",
2267     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2268 #endif
2269     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2270     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2271     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2272     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2273     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2274     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2275 };
2276 
2277 static inline bool tcg_regset_single(TCGRegSet d)
2278 {
2279     return (d & (d - 1)) == 0;
2280 }
2281 
2282 static inline TCGReg tcg_regset_first(TCGRegSet d)
2283 {
2284     if (TCG_TARGET_NB_REGS <= 32) {
2285         return ctz32(d);
2286     } else {
2287         return ctz64(d);
2288     }
2289 }
2290 
2291 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2292 {
2293     char buf[128];
2294     TCGOp *op;
2295 
2296     QTAILQ_FOREACH(op, &s->ops, link) {
2297         int i, k, nb_oargs, nb_iargs, nb_cargs;
2298         const TCGOpDef *def;
2299         TCGOpcode c;
2300         int col = 0;
2301 
2302         c = op->opc;
2303         def = &tcg_op_defs[c];
2304 
2305         if (c == INDEX_op_insn_start) {
2306             nb_oargs = 0;
2307             col += qemu_log("\n ----");
2308 
2309             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2310                 target_ulong a;
2311 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2312                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2313 #else
2314                 a = op->args[i];
2315 #endif
2316                 col += qemu_log(" " TARGET_FMT_lx, a);
2317             }
2318         } else if (c == INDEX_op_call) {
2319             /* variable number of arguments */
2320             nb_oargs = TCGOP_CALLO(op);
2321             nb_iargs = TCGOP_CALLI(op);
2322             nb_cargs = def->nb_cargs;
2323 
2324             /* function name, flags, out args */
2325             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2326                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2327                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2328             for (i = 0; i < nb_oargs; i++) {
2329                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2330                                                        op->args[i]));
2331             }
2332             for (i = 0; i < nb_iargs; i++) {
2333                 TCGArg arg = op->args[nb_oargs + i];
2334                 const char *t = "<dummy>";
2335                 if (arg != TCG_CALL_DUMMY_ARG) {
2336                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2337                 }
2338                 col += qemu_log(",%s", t);
2339             }
2340         } else {
2341             col += qemu_log(" %s ", def->name);
2342 
2343             nb_oargs = def->nb_oargs;
2344             nb_iargs = def->nb_iargs;
2345             nb_cargs = def->nb_cargs;
2346 
2347             if (def->flags & TCG_OPF_VECTOR) {
2348                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2349                                 8 << TCGOP_VECE(op));
2350             }
2351 
2352             k = 0;
2353             for (i = 0; i < nb_oargs; i++) {
2354                 if (k != 0) {
2355                     col += qemu_log(",");
2356                 }
2357                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2358                                                       op->args[k++]));
2359             }
2360             for (i = 0; i < nb_iargs; i++) {
2361                 if (k != 0) {
2362                     col += qemu_log(",");
2363                 }
2364                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2365                                                       op->args[k++]));
2366             }
2367             switch (c) {
2368             case INDEX_op_brcond_i32:
2369             case INDEX_op_setcond_i32:
2370             case INDEX_op_movcond_i32:
2371             case INDEX_op_brcond2_i32:
2372             case INDEX_op_setcond2_i32:
2373             case INDEX_op_brcond_i64:
2374             case INDEX_op_setcond_i64:
2375             case INDEX_op_movcond_i64:
2376             case INDEX_op_cmp_vec:
2377             case INDEX_op_cmpsel_vec:
2378                 if (op->args[k] < ARRAY_SIZE(cond_name)
2379                     && cond_name[op->args[k]]) {
2380                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2381                 } else {
2382                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2383                 }
2384                 i = 1;
2385                 break;
2386             case INDEX_op_qemu_ld_i32:
2387             case INDEX_op_qemu_st_i32:
2388             case INDEX_op_qemu_st8_i32:
2389             case INDEX_op_qemu_ld_i64:
2390             case INDEX_op_qemu_st_i64:
2391                 {
2392                     TCGMemOpIdx oi = op->args[k++];
2393                     MemOp op = get_memop(oi);
2394                     unsigned ix = get_mmuidx(oi);
2395 
2396                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2397                         col += qemu_log(",$0x%x,%u", op, ix);
2398                     } else {
2399                         const char *s_al, *s_op;
2400                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2401                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2402                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2403                     }
2404                     i = 1;
2405                 }
2406                 break;
2407             default:
2408                 i = 0;
2409                 break;
2410             }
2411             switch (c) {
2412             case INDEX_op_set_label:
2413             case INDEX_op_br:
2414             case INDEX_op_brcond_i32:
2415             case INDEX_op_brcond_i64:
2416             case INDEX_op_brcond2_i32:
2417                 col += qemu_log("%s$L%d", k ? "," : "",
2418                                 arg_label(op->args[k])->id);
2419                 i++, k++;
2420                 break;
2421             default:
2422                 break;
2423             }
2424             for (; i < nb_cargs; i++, k++) {
2425                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2426             }
2427         }
2428 
2429         if (have_prefs || op->life) {
2430 
2431             QemuLogFile *logfile;
2432 
2433             rcu_read_lock();
2434             logfile = qatomic_rcu_read(&qemu_logfile);
2435             if (logfile) {
2436                 for (; col < 40; ++col) {
2437                     putc(' ', logfile->fd);
2438                 }
2439             }
2440             rcu_read_unlock();
2441         }
2442 
2443         if (op->life) {
2444             unsigned life = op->life;
2445 
2446             if (life & (SYNC_ARG * 3)) {
2447                 qemu_log("  sync:");
2448                 for (i = 0; i < 2; ++i) {
2449                     if (life & (SYNC_ARG << i)) {
2450                         qemu_log(" %d", i);
2451                     }
2452                 }
2453             }
2454             life /= DEAD_ARG;
2455             if (life) {
2456                 qemu_log("  dead:");
2457                 for (i = 0; life; ++i, life >>= 1) {
2458                     if (life & 1) {
2459                         qemu_log(" %d", i);
2460                     }
2461                 }
2462             }
2463         }
2464 
2465         if (have_prefs) {
2466             for (i = 0; i < nb_oargs; ++i) {
2467                 TCGRegSet set = op->output_pref[i];
2468 
2469                 if (i == 0) {
2470                     qemu_log("  pref=");
2471                 } else {
2472                     qemu_log(",");
2473                 }
2474                 if (set == 0) {
2475                     qemu_log("none");
2476                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2477                     qemu_log("all");
2478 #ifdef CONFIG_DEBUG_TCG
2479                 } else if (tcg_regset_single(set)) {
2480                     TCGReg reg = tcg_regset_first(set);
2481                     qemu_log("%s", tcg_target_reg_names[reg]);
2482 #endif
2483                 } else if (TCG_TARGET_NB_REGS <= 32) {
2484                     qemu_log("%#x", (uint32_t)set);
2485                 } else {
2486                     qemu_log("%#" PRIx64, (uint64_t)set);
2487                 }
2488             }
2489         }
2490 
2491         qemu_log("\n");
2492     }
2493 }
2494 
2495 /* we give more priority to constraints with less registers */
2496 static int get_constraint_priority(const TCGOpDef *def, int k)
2497 {
2498     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2499     int n;
2500 
2501     if (arg_ct->oalias) {
2502         /* an alias is equivalent to a single register */
2503         n = 1;
2504     } else {
2505         n = ctpop64(arg_ct->regs);
2506     }
2507     return TCG_TARGET_NB_REGS - n + 1;
2508 }
2509 
2510 /* sort from highest priority to lowest */
2511 static void sort_constraints(TCGOpDef *def, int start, int n)
2512 {
2513     int i, j;
2514     TCGArgConstraint *a = def->args_ct;
2515 
2516     for (i = 0; i < n; i++) {
2517         a[start + i].sort_index = start + i;
2518     }
2519     if (n <= 1) {
2520         return;
2521     }
2522     for (i = 0; i < n - 1; i++) {
2523         for (j = i + 1; j < n; j++) {
2524             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2525             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2526             if (p1 < p2) {
2527                 int tmp = a[start + i].sort_index;
2528                 a[start + i].sort_index = a[start + j].sort_index;
2529                 a[start + j].sort_index = tmp;
2530             }
2531         }
2532     }
2533 }
2534 
2535 static void process_op_defs(TCGContext *s)
2536 {
2537     TCGOpcode op;
2538 
2539     for (op = 0; op < NB_OPS; op++) {
2540         TCGOpDef *def = &tcg_op_defs[op];
2541         const TCGTargetOpDef *tdefs;
2542         int i, nb_args;
2543 
2544         if (def->flags & TCG_OPF_NOT_PRESENT) {
2545             continue;
2546         }
2547 
2548         nb_args = def->nb_iargs + def->nb_oargs;
2549         if (nb_args == 0) {
2550             continue;
2551         }
2552 
2553         /*
2554          * Macro magic should make it impossible, but double-check that
2555          * the array index is in range.  Since the signness of an enum
2556          * is implementation defined, force the result to unsigned.
2557          */
2558         unsigned con_set = tcg_target_op_def(op);
2559         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2560         tdefs = &constraint_sets[con_set];
2561 
2562         for (i = 0; i < nb_args; i++) {
2563             const char *ct_str = tdefs->args_ct_str[i];
2564             /* Incomplete TCGTargetOpDef entry. */
2565             tcg_debug_assert(ct_str != NULL);
2566 
2567             while (*ct_str != '\0') {
2568                 switch(*ct_str) {
2569                 case '0' ... '9':
2570                     {
2571                         int oarg = *ct_str - '0';
2572                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2573                         tcg_debug_assert(oarg < def->nb_oargs);
2574                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2575                         def->args_ct[i] = def->args_ct[oarg];
2576                         /* The output sets oalias.  */
2577                         def->args_ct[oarg].oalias = true;
2578                         def->args_ct[oarg].alias_index = i;
2579                         /* The input sets ialias. */
2580                         def->args_ct[i].ialias = true;
2581                         def->args_ct[i].alias_index = oarg;
2582                     }
2583                     ct_str++;
2584                     break;
2585                 case '&':
2586                     def->args_ct[i].newreg = true;
2587                     ct_str++;
2588                     break;
2589                 case 'i':
2590                     def->args_ct[i].ct |= TCG_CT_CONST;
2591                     ct_str++;
2592                     break;
2593 
2594                 /* Include all of the target-specific constraints. */
2595 
2596 #undef CONST
2597 #define CONST(CASE, MASK) \
2598     case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2599 #define REGS(CASE, MASK) \
2600     case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2601 
2602 #include "tcg-target-con-str.h"
2603 
2604 #undef REGS
2605 #undef CONST
2606                 default:
2607                     /* Typo in TCGTargetOpDef constraint. */
2608                     g_assert_not_reached();
2609                 }
2610             }
2611         }
2612 
2613         /* TCGTargetOpDef entry with too much information? */
2614         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2615 
2616         /* sort the constraints (XXX: this is just an heuristic) */
2617         sort_constraints(def, 0, def->nb_oargs);
2618         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2619     }
2620 }
2621 
2622 void tcg_op_remove(TCGContext *s, TCGOp *op)
2623 {
2624     TCGLabel *label;
2625 
2626     switch (op->opc) {
2627     case INDEX_op_br:
2628         label = arg_label(op->args[0]);
2629         label->refs--;
2630         break;
2631     case INDEX_op_brcond_i32:
2632     case INDEX_op_brcond_i64:
2633         label = arg_label(op->args[3]);
2634         label->refs--;
2635         break;
2636     case INDEX_op_brcond2_i32:
2637         label = arg_label(op->args[5]);
2638         label->refs--;
2639         break;
2640     default:
2641         break;
2642     }
2643 
2644     QTAILQ_REMOVE(&s->ops, op, link);
2645     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2646     s->nb_ops--;
2647 
2648 #ifdef CONFIG_PROFILER
2649     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2650 #endif
2651 }
2652 
2653 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2654 {
2655     TCGContext *s = tcg_ctx;
2656     TCGOp *op;
2657 
2658     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2659         op = tcg_malloc(sizeof(TCGOp));
2660     } else {
2661         op = QTAILQ_FIRST(&s->free_ops);
2662         QTAILQ_REMOVE(&s->free_ops, op, link);
2663     }
2664     memset(op, 0, offsetof(TCGOp, link));
2665     op->opc = opc;
2666     s->nb_ops++;
2667 
2668     return op;
2669 }
2670 
2671 TCGOp *tcg_emit_op(TCGOpcode opc)
2672 {
2673     TCGOp *op = tcg_op_alloc(opc);
2674     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2675     return op;
2676 }
2677 
2678 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2679 {
2680     TCGOp *new_op = tcg_op_alloc(opc);
2681     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2682     return new_op;
2683 }
2684 
2685 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2686 {
2687     TCGOp *new_op = tcg_op_alloc(opc);
2688     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2689     return new_op;
2690 }
2691 
2692 /* Reachable analysis : remove unreachable code.  */
2693 static void reachable_code_pass(TCGContext *s)
2694 {
2695     TCGOp *op, *op_next;
2696     bool dead = false;
2697 
2698     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2699         bool remove = dead;
2700         TCGLabel *label;
2701         int call_flags;
2702 
2703         switch (op->opc) {
2704         case INDEX_op_set_label:
2705             label = arg_label(op->args[0]);
2706             if (label->refs == 0) {
2707                 /*
2708                  * While there is an occasional backward branch, virtually
2709                  * all branches generated by the translators are forward.
2710                  * Which means that generally we will have already removed
2711                  * all references to the label that will be, and there is
2712                  * little to be gained by iterating.
2713                  */
2714                 remove = true;
2715             } else {
2716                 /* Once we see a label, insns become live again.  */
2717                 dead = false;
2718                 remove = false;
2719 
2720                 /*
2721                  * Optimization can fold conditional branches to unconditional.
2722                  * If we find a label with one reference which is preceded by
2723                  * an unconditional branch to it, remove both.  This needed to
2724                  * wait until the dead code in between them was removed.
2725                  */
2726                 if (label->refs == 1) {
2727                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2728                     if (op_prev->opc == INDEX_op_br &&
2729                         label == arg_label(op_prev->args[0])) {
2730                         tcg_op_remove(s, op_prev);
2731                         remove = true;
2732                     }
2733                 }
2734             }
2735             break;
2736 
2737         case INDEX_op_br:
2738         case INDEX_op_exit_tb:
2739         case INDEX_op_goto_ptr:
2740             /* Unconditional branches; everything following is dead.  */
2741             dead = true;
2742             break;
2743 
2744         case INDEX_op_call:
2745             /* Notice noreturn helper calls, raising exceptions.  */
2746             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2747             if (call_flags & TCG_CALL_NO_RETURN) {
2748                 dead = true;
2749             }
2750             break;
2751 
2752         case INDEX_op_insn_start:
2753             /* Never remove -- we need to keep these for unwind.  */
2754             remove = false;
2755             break;
2756 
2757         default:
2758             break;
2759         }
2760 
2761         if (remove) {
2762             tcg_op_remove(s, op);
2763         }
2764     }
2765 }
2766 
2767 #define TS_DEAD  1
2768 #define TS_MEM   2
2769 
2770 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2771 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2772 
2773 /* For liveness_pass_1, the register preferences for a given temp.  */
2774 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2775 {
2776     return ts->state_ptr;
2777 }
2778 
2779 /* For liveness_pass_1, reset the preferences for a given temp to the
2780  * maximal regset for its type.
2781  */
2782 static inline void la_reset_pref(TCGTemp *ts)
2783 {
2784     *la_temp_pref(ts)
2785         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2786 }
2787 
2788 /* liveness analysis: end of function: all temps are dead, and globals
2789    should be in memory. */
2790 static void la_func_end(TCGContext *s, int ng, int nt)
2791 {
2792     int i;
2793 
2794     for (i = 0; i < ng; ++i) {
2795         s->temps[i].state = TS_DEAD | TS_MEM;
2796         la_reset_pref(&s->temps[i]);
2797     }
2798     for (i = ng; i < nt; ++i) {
2799         s->temps[i].state = TS_DEAD;
2800         la_reset_pref(&s->temps[i]);
2801     }
2802 }
2803 
2804 /* liveness analysis: end of basic block: all temps are dead, globals
2805    and local temps should be in memory. */
2806 static void la_bb_end(TCGContext *s, int ng, int nt)
2807 {
2808     int i;
2809 
2810     for (i = 0; i < nt; ++i) {
2811         TCGTemp *ts = &s->temps[i];
2812         int state;
2813 
2814         switch (ts->kind) {
2815         case TEMP_FIXED:
2816         case TEMP_GLOBAL:
2817         case TEMP_LOCAL:
2818             state = TS_DEAD | TS_MEM;
2819             break;
2820         case TEMP_NORMAL:
2821         case TEMP_CONST:
2822             state = TS_DEAD;
2823             break;
2824         default:
2825             g_assert_not_reached();
2826         }
2827         ts->state = state;
2828         la_reset_pref(ts);
2829     }
2830 }
2831 
2832 /* liveness analysis: sync globals back to memory.  */
2833 static void la_global_sync(TCGContext *s, int ng)
2834 {
2835     int i;
2836 
2837     for (i = 0; i < ng; ++i) {
2838         int state = s->temps[i].state;
2839         s->temps[i].state = state | TS_MEM;
2840         if (state == TS_DEAD) {
2841             /* If the global was previously dead, reset prefs.  */
2842             la_reset_pref(&s->temps[i]);
2843         }
2844     }
2845 }
2846 
2847 /*
2848  * liveness analysis: conditional branch: all temps are dead,
2849  * globals and local temps should be synced.
2850  */
2851 static void la_bb_sync(TCGContext *s, int ng, int nt)
2852 {
2853     la_global_sync(s, ng);
2854 
2855     for (int i = ng; i < nt; ++i) {
2856         TCGTemp *ts = &s->temps[i];
2857         int state;
2858 
2859         switch (ts->kind) {
2860         case TEMP_LOCAL:
2861             state = ts->state;
2862             ts->state = state | TS_MEM;
2863             if (state != TS_DEAD) {
2864                 continue;
2865             }
2866             break;
2867         case TEMP_NORMAL:
2868             s->temps[i].state = TS_DEAD;
2869             break;
2870         case TEMP_CONST:
2871             continue;
2872         default:
2873             g_assert_not_reached();
2874         }
2875         la_reset_pref(&s->temps[i]);
2876     }
2877 }
2878 
2879 /* liveness analysis: sync globals back to memory and kill.  */
2880 static void la_global_kill(TCGContext *s, int ng)
2881 {
2882     int i;
2883 
2884     for (i = 0; i < ng; i++) {
2885         s->temps[i].state = TS_DEAD | TS_MEM;
2886         la_reset_pref(&s->temps[i]);
2887     }
2888 }
2889 
2890 /* liveness analysis: note live globals crossing calls.  */
2891 static void la_cross_call(TCGContext *s, int nt)
2892 {
2893     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2894     int i;
2895 
2896     for (i = 0; i < nt; i++) {
2897         TCGTemp *ts = &s->temps[i];
2898         if (!(ts->state & TS_DEAD)) {
2899             TCGRegSet *pset = la_temp_pref(ts);
2900             TCGRegSet set = *pset;
2901 
2902             set &= mask;
2903             /* If the combination is not possible, restart.  */
2904             if (set == 0) {
2905                 set = tcg_target_available_regs[ts->type] & mask;
2906             }
2907             *pset = set;
2908         }
2909     }
2910 }
2911 
2912 /* Liveness analysis : update the opc_arg_life array to tell if a
2913    given input arguments is dead. Instructions updating dead
2914    temporaries are removed. */
2915 static void liveness_pass_1(TCGContext *s)
2916 {
2917     int nb_globals = s->nb_globals;
2918     int nb_temps = s->nb_temps;
2919     TCGOp *op, *op_prev;
2920     TCGRegSet *prefs;
2921     int i;
2922 
2923     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2924     for (i = 0; i < nb_temps; ++i) {
2925         s->temps[i].state_ptr = prefs + i;
2926     }
2927 
2928     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2929     la_func_end(s, nb_globals, nb_temps);
2930 
2931     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2932         int nb_iargs, nb_oargs;
2933         TCGOpcode opc_new, opc_new2;
2934         bool have_opc_new2;
2935         TCGLifeData arg_life = 0;
2936         TCGTemp *ts;
2937         TCGOpcode opc = op->opc;
2938         const TCGOpDef *def = &tcg_op_defs[opc];
2939 
2940         switch (opc) {
2941         case INDEX_op_call:
2942             {
2943                 int call_flags;
2944                 int nb_call_regs;
2945 
2946                 nb_oargs = TCGOP_CALLO(op);
2947                 nb_iargs = TCGOP_CALLI(op);
2948                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2949 
2950                 /* pure functions can be removed if their result is unused */
2951                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2952                     for (i = 0; i < nb_oargs; i++) {
2953                         ts = arg_temp(op->args[i]);
2954                         if (ts->state != TS_DEAD) {
2955                             goto do_not_remove_call;
2956                         }
2957                     }
2958                     goto do_remove;
2959                 }
2960             do_not_remove_call:
2961 
2962                 /* Output args are dead.  */
2963                 for (i = 0; i < nb_oargs; i++) {
2964                     ts = arg_temp(op->args[i]);
2965                     if (ts->state & TS_DEAD) {
2966                         arg_life |= DEAD_ARG << i;
2967                     }
2968                     if (ts->state & TS_MEM) {
2969                         arg_life |= SYNC_ARG << i;
2970                     }
2971                     ts->state = TS_DEAD;
2972                     la_reset_pref(ts);
2973 
2974                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2975                     op->output_pref[i] = 0;
2976                 }
2977 
2978                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2979                                     TCG_CALL_NO_READ_GLOBALS))) {
2980                     la_global_kill(s, nb_globals);
2981                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2982                     la_global_sync(s, nb_globals);
2983                 }
2984 
2985                 /* Record arguments that die in this helper.  */
2986                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2987                     ts = arg_temp(op->args[i]);
2988                     if (ts && ts->state & TS_DEAD) {
2989                         arg_life |= DEAD_ARG << i;
2990                     }
2991                 }
2992 
2993                 /* For all live registers, remove call-clobbered prefs.  */
2994                 la_cross_call(s, nb_temps);
2995 
2996                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2997 
2998                 /* Input arguments are live for preceding opcodes.  */
2999                 for (i = 0; i < nb_iargs; i++) {
3000                     ts = arg_temp(op->args[i + nb_oargs]);
3001                     if (ts && ts->state & TS_DEAD) {
3002                         /* For those arguments that die, and will be allocated
3003                          * in registers, clear the register set for that arg,
3004                          * to be filled in below.  For args that will be on
3005                          * the stack, reset to any available reg.
3006                          */
3007                         *la_temp_pref(ts)
3008                             = (i < nb_call_regs ? 0 :
3009                                tcg_target_available_regs[ts->type]);
3010                         ts->state &= ~TS_DEAD;
3011                     }
3012                 }
3013 
3014                 /* For each input argument, add its input register to prefs.
3015                    If a temp is used once, this produces a single set bit.  */
3016                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
3017                     ts = arg_temp(op->args[i + nb_oargs]);
3018                     if (ts) {
3019                         tcg_regset_set_reg(*la_temp_pref(ts),
3020                                            tcg_target_call_iarg_regs[i]);
3021                     }
3022                 }
3023             }
3024             break;
3025         case INDEX_op_insn_start:
3026             break;
3027         case INDEX_op_discard:
3028             /* mark the temporary as dead */
3029             ts = arg_temp(op->args[0]);
3030             ts->state = TS_DEAD;
3031             la_reset_pref(ts);
3032             break;
3033 
3034         case INDEX_op_add2_i32:
3035             opc_new = INDEX_op_add_i32;
3036             goto do_addsub2;
3037         case INDEX_op_sub2_i32:
3038             opc_new = INDEX_op_sub_i32;
3039             goto do_addsub2;
3040         case INDEX_op_add2_i64:
3041             opc_new = INDEX_op_add_i64;
3042             goto do_addsub2;
3043         case INDEX_op_sub2_i64:
3044             opc_new = INDEX_op_sub_i64;
3045         do_addsub2:
3046             nb_iargs = 4;
3047             nb_oargs = 2;
3048             /* Test if the high part of the operation is dead, but not
3049                the low part.  The result can be optimized to a simple
3050                add or sub.  This happens often for x86_64 guest when the
3051                cpu mode is set to 32 bit.  */
3052             if (arg_temp(op->args[1])->state == TS_DEAD) {
3053                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3054                     goto do_remove;
3055                 }
3056                 /* Replace the opcode and adjust the args in place,
3057                    leaving 3 unused args at the end.  */
3058                 op->opc = opc = opc_new;
3059                 op->args[1] = op->args[2];
3060                 op->args[2] = op->args[4];
3061                 /* Fall through and mark the single-word operation live.  */
3062                 nb_iargs = 2;
3063                 nb_oargs = 1;
3064             }
3065             goto do_not_remove;
3066 
3067         case INDEX_op_mulu2_i32:
3068             opc_new = INDEX_op_mul_i32;
3069             opc_new2 = INDEX_op_muluh_i32;
3070             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3071             goto do_mul2;
3072         case INDEX_op_muls2_i32:
3073             opc_new = INDEX_op_mul_i32;
3074             opc_new2 = INDEX_op_mulsh_i32;
3075             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3076             goto do_mul2;
3077         case INDEX_op_mulu2_i64:
3078             opc_new = INDEX_op_mul_i64;
3079             opc_new2 = INDEX_op_muluh_i64;
3080             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3081             goto do_mul2;
3082         case INDEX_op_muls2_i64:
3083             opc_new = INDEX_op_mul_i64;
3084             opc_new2 = INDEX_op_mulsh_i64;
3085             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3086             goto do_mul2;
3087         do_mul2:
3088             nb_iargs = 2;
3089             nb_oargs = 2;
3090             if (arg_temp(op->args[1])->state == TS_DEAD) {
3091                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3092                     /* Both parts of the operation are dead.  */
3093                     goto do_remove;
3094                 }
3095                 /* The high part of the operation is dead; generate the low. */
3096                 op->opc = opc = opc_new;
3097                 op->args[1] = op->args[2];
3098                 op->args[2] = op->args[3];
3099             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3100                 /* The low part of the operation is dead; generate the high. */
3101                 op->opc = opc = opc_new2;
3102                 op->args[0] = op->args[1];
3103                 op->args[1] = op->args[2];
3104                 op->args[2] = op->args[3];
3105             } else {
3106                 goto do_not_remove;
3107             }
3108             /* Mark the single-word operation live.  */
3109             nb_oargs = 1;
3110             goto do_not_remove;
3111 
3112         default:
3113             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3114             nb_iargs = def->nb_iargs;
3115             nb_oargs = def->nb_oargs;
3116 
3117             /* Test if the operation can be removed because all
3118                its outputs are dead. We assume that nb_oargs == 0
3119                implies side effects */
3120             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3121                 for (i = 0; i < nb_oargs; i++) {
3122                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3123                         goto do_not_remove;
3124                     }
3125                 }
3126                 goto do_remove;
3127             }
3128             goto do_not_remove;
3129 
3130         do_remove:
3131             tcg_op_remove(s, op);
3132             break;
3133 
3134         do_not_remove:
3135             for (i = 0; i < nb_oargs; i++) {
3136                 ts = arg_temp(op->args[i]);
3137 
3138                 /* Remember the preference of the uses that followed.  */
3139                 op->output_pref[i] = *la_temp_pref(ts);
3140 
3141                 /* Output args are dead.  */
3142                 if (ts->state & TS_DEAD) {
3143                     arg_life |= DEAD_ARG << i;
3144                 }
3145                 if (ts->state & TS_MEM) {
3146                     arg_life |= SYNC_ARG << i;
3147                 }
3148                 ts->state = TS_DEAD;
3149                 la_reset_pref(ts);
3150             }
3151 
3152             /* If end of basic block, update.  */
3153             if (def->flags & TCG_OPF_BB_EXIT) {
3154                 la_func_end(s, nb_globals, nb_temps);
3155             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3156                 la_bb_sync(s, nb_globals, nb_temps);
3157             } else if (def->flags & TCG_OPF_BB_END) {
3158                 la_bb_end(s, nb_globals, nb_temps);
3159             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3160                 la_global_sync(s, nb_globals);
3161                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3162                     la_cross_call(s, nb_temps);
3163                 }
3164             }
3165 
3166             /* Record arguments that die in this opcode.  */
3167             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3168                 ts = arg_temp(op->args[i]);
3169                 if (ts->state & TS_DEAD) {
3170                     arg_life |= DEAD_ARG << i;
3171                 }
3172             }
3173 
3174             /* Input arguments are live for preceding opcodes.  */
3175             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3176                 ts = arg_temp(op->args[i]);
3177                 if (ts->state & TS_DEAD) {
3178                     /* For operands that were dead, initially allow
3179                        all regs for the type.  */
3180                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3181                     ts->state &= ~TS_DEAD;
3182                 }
3183             }
3184 
3185             /* Incorporate constraints for this operand.  */
3186             switch (opc) {
3187             case INDEX_op_mov_i32:
3188             case INDEX_op_mov_i64:
3189                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3190                    have proper constraints.  That said, special case
3191                    moves to propagate preferences backward.  */
3192                 if (IS_DEAD_ARG(1)) {
3193                     *la_temp_pref(arg_temp(op->args[0]))
3194                         = *la_temp_pref(arg_temp(op->args[1]));
3195                 }
3196                 break;
3197 
3198             default:
3199                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3200                     const TCGArgConstraint *ct = &def->args_ct[i];
3201                     TCGRegSet set, *pset;
3202 
3203                     ts = arg_temp(op->args[i]);
3204                     pset = la_temp_pref(ts);
3205                     set = *pset;
3206 
3207                     set &= ct->regs;
3208                     if (ct->ialias) {
3209                         set &= op->output_pref[ct->alias_index];
3210                     }
3211                     /* If the combination is not possible, restart.  */
3212                     if (set == 0) {
3213                         set = ct->regs;
3214                     }
3215                     *pset = set;
3216                 }
3217                 break;
3218             }
3219             break;
3220         }
3221         op->life = arg_life;
3222     }
3223 }
3224 
3225 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3226 static bool liveness_pass_2(TCGContext *s)
3227 {
3228     int nb_globals = s->nb_globals;
3229     int nb_temps, i;
3230     bool changes = false;
3231     TCGOp *op, *op_next;
3232 
3233     /* Create a temporary for each indirect global.  */
3234     for (i = 0; i < nb_globals; ++i) {
3235         TCGTemp *its = &s->temps[i];
3236         if (its->indirect_reg) {
3237             TCGTemp *dts = tcg_temp_alloc(s);
3238             dts->type = its->type;
3239             dts->base_type = its->base_type;
3240             its->state_ptr = dts;
3241         } else {
3242             its->state_ptr = NULL;
3243         }
3244         /* All globals begin dead.  */
3245         its->state = TS_DEAD;
3246     }
3247     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3248         TCGTemp *its = &s->temps[i];
3249         its->state_ptr = NULL;
3250         its->state = TS_DEAD;
3251     }
3252 
3253     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3254         TCGOpcode opc = op->opc;
3255         const TCGOpDef *def = &tcg_op_defs[opc];
3256         TCGLifeData arg_life = op->life;
3257         int nb_iargs, nb_oargs, call_flags;
3258         TCGTemp *arg_ts, *dir_ts;
3259 
3260         if (opc == INDEX_op_call) {
3261             nb_oargs = TCGOP_CALLO(op);
3262             nb_iargs = TCGOP_CALLI(op);
3263             call_flags = op->args[nb_oargs + nb_iargs + 1];
3264         } else {
3265             nb_iargs = def->nb_iargs;
3266             nb_oargs = def->nb_oargs;
3267 
3268             /* Set flags similar to how calls require.  */
3269             if (def->flags & TCG_OPF_COND_BRANCH) {
3270                 /* Like reading globals: sync_globals */
3271                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3272             } else if (def->flags & TCG_OPF_BB_END) {
3273                 /* Like writing globals: save_globals */
3274                 call_flags = 0;
3275             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3276                 /* Like reading globals: sync_globals */
3277                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3278             } else {
3279                 /* No effect on globals.  */
3280                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3281                               TCG_CALL_NO_WRITE_GLOBALS);
3282             }
3283         }
3284 
3285         /* Make sure that input arguments are available.  */
3286         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3287             arg_ts = arg_temp(op->args[i]);
3288             if (arg_ts) {
3289                 dir_ts = arg_ts->state_ptr;
3290                 if (dir_ts && arg_ts->state == TS_DEAD) {
3291                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3292                                       ? INDEX_op_ld_i32
3293                                       : INDEX_op_ld_i64);
3294                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3295 
3296                     lop->args[0] = temp_arg(dir_ts);
3297                     lop->args[1] = temp_arg(arg_ts->mem_base);
3298                     lop->args[2] = arg_ts->mem_offset;
3299 
3300                     /* Loaded, but synced with memory.  */
3301                     arg_ts->state = TS_MEM;
3302                 }
3303             }
3304         }
3305 
3306         /* Perform input replacement, and mark inputs that became dead.
3307            No action is required except keeping temp_state up to date
3308            so that we reload when needed.  */
3309         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3310             arg_ts = arg_temp(op->args[i]);
3311             if (arg_ts) {
3312                 dir_ts = arg_ts->state_ptr;
3313                 if (dir_ts) {
3314                     op->args[i] = temp_arg(dir_ts);
3315                     changes = true;
3316                     if (IS_DEAD_ARG(i)) {
3317                         arg_ts->state = TS_DEAD;
3318                     }
3319                 }
3320             }
3321         }
3322 
3323         /* Liveness analysis should ensure that the following are
3324            all correct, for call sites and basic block end points.  */
3325         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3326             /* Nothing to do */
3327         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3328             for (i = 0; i < nb_globals; ++i) {
3329                 /* Liveness should see that globals are synced back,
3330                    that is, either TS_DEAD or TS_MEM.  */
3331                 arg_ts = &s->temps[i];
3332                 tcg_debug_assert(arg_ts->state_ptr == 0
3333                                  || arg_ts->state != 0);
3334             }
3335         } else {
3336             for (i = 0; i < nb_globals; ++i) {
3337                 /* Liveness should see that globals are saved back,
3338                    that is, TS_DEAD, waiting to be reloaded.  */
3339                 arg_ts = &s->temps[i];
3340                 tcg_debug_assert(arg_ts->state_ptr == 0
3341                                  || arg_ts->state == TS_DEAD);
3342             }
3343         }
3344 
3345         /* Outputs become available.  */
3346         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3347             arg_ts = arg_temp(op->args[0]);
3348             dir_ts = arg_ts->state_ptr;
3349             if (dir_ts) {
3350                 op->args[0] = temp_arg(dir_ts);
3351                 changes = true;
3352 
3353                 /* The output is now live and modified.  */
3354                 arg_ts->state = 0;
3355 
3356                 if (NEED_SYNC_ARG(0)) {
3357                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3358                                       ? INDEX_op_st_i32
3359                                       : INDEX_op_st_i64);
3360                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3361                     TCGTemp *out_ts = dir_ts;
3362 
3363                     if (IS_DEAD_ARG(0)) {
3364                         out_ts = arg_temp(op->args[1]);
3365                         arg_ts->state = TS_DEAD;
3366                         tcg_op_remove(s, op);
3367                     } else {
3368                         arg_ts->state = TS_MEM;
3369                     }
3370 
3371                     sop->args[0] = temp_arg(out_ts);
3372                     sop->args[1] = temp_arg(arg_ts->mem_base);
3373                     sop->args[2] = arg_ts->mem_offset;
3374                 } else {
3375                     tcg_debug_assert(!IS_DEAD_ARG(0));
3376                 }
3377             }
3378         } else {
3379             for (i = 0; i < nb_oargs; i++) {
3380                 arg_ts = arg_temp(op->args[i]);
3381                 dir_ts = arg_ts->state_ptr;
3382                 if (!dir_ts) {
3383                     continue;
3384                 }
3385                 op->args[i] = temp_arg(dir_ts);
3386                 changes = true;
3387 
3388                 /* The output is now live and modified.  */
3389                 arg_ts->state = 0;
3390 
3391                 /* Sync outputs upon their last write.  */
3392                 if (NEED_SYNC_ARG(i)) {
3393                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3394                                       ? INDEX_op_st_i32
3395                                       : INDEX_op_st_i64);
3396                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3397 
3398                     sop->args[0] = temp_arg(dir_ts);
3399                     sop->args[1] = temp_arg(arg_ts->mem_base);
3400                     sop->args[2] = arg_ts->mem_offset;
3401 
3402                     arg_ts->state = TS_MEM;
3403                 }
3404                 /* Drop outputs that are dead.  */
3405                 if (IS_DEAD_ARG(i)) {
3406                     arg_ts->state = TS_DEAD;
3407                 }
3408             }
3409         }
3410     }
3411 
3412     return changes;
3413 }
3414 
3415 #ifdef CONFIG_DEBUG_TCG
3416 static void dump_regs(TCGContext *s)
3417 {
3418     TCGTemp *ts;
3419     int i;
3420     char buf[64];
3421 
3422     for(i = 0; i < s->nb_temps; i++) {
3423         ts = &s->temps[i];
3424         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3425         switch(ts->val_type) {
3426         case TEMP_VAL_REG:
3427             printf("%s", tcg_target_reg_names[ts->reg]);
3428             break;
3429         case TEMP_VAL_MEM:
3430             printf("%d(%s)", (int)ts->mem_offset,
3431                    tcg_target_reg_names[ts->mem_base->reg]);
3432             break;
3433         case TEMP_VAL_CONST:
3434             printf("$0x%" PRIx64, ts->val);
3435             break;
3436         case TEMP_VAL_DEAD:
3437             printf("D");
3438             break;
3439         default:
3440             printf("???");
3441             break;
3442         }
3443         printf("\n");
3444     }
3445 
3446     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3447         if (s->reg_to_temp[i] != NULL) {
3448             printf("%s: %s\n",
3449                    tcg_target_reg_names[i],
3450                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3451         }
3452     }
3453 }
3454 
3455 static void check_regs(TCGContext *s)
3456 {
3457     int reg;
3458     int k;
3459     TCGTemp *ts;
3460     char buf[64];
3461 
3462     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3463         ts = s->reg_to_temp[reg];
3464         if (ts != NULL) {
3465             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3466                 printf("Inconsistency for register %s:\n",
3467                        tcg_target_reg_names[reg]);
3468                 goto fail;
3469             }
3470         }
3471     }
3472     for (k = 0; k < s->nb_temps; k++) {
3473         ts = &s->temps[k];
3474         if (ts->val_type == TEMP_VAL_REG
3475             && ts->kind != TEMP_FIXED
3476             && s->reg_to_temp[ts->reg] != ts) {
3477             printf("Inconsistency for temp %s:\n",
3478                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3479         fail:
3480             printf("reg state:\n");
3481             dump_regs(s);
3482             tcg_abort();
3483         }
3484     }
3485 }
3486 #endif
3487 
3488 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3489 {
3490 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3491     /* Sparc64 stack is accessed with offset of 2047 */
3492     s->current_frame_offset = (s->current_frame_offset +
3493                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3494         ~(sizeof(tcg_target_long) - 1);
3495 #endif
3496     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3497         s->frame_end) {
3498         tcg_abort();
3499     }
3500     ts->mem_offset = s->current_frame_offset;
3501     ts->mem_base = s->frame_temp;
3502     ts->mem_allocated = 1;
3503     s->current_frame_offset += sizeof(tcg_target_long);
3504 }
3505 
3506 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3507 
3508 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3509    mark it free; otherwise mark it dead.  */
3510 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3511 {
3512     TCGTempVal new_type;
3513 
3514     switch (ts->kind) {
3515     case TEMP_FIXED:
3516         return;
3517     case TEMP_GLOBAL:
3518     case TEMP_LOCAL:
3519         new_type = TEMP_VAL_MEM;
3520         break;
3521     case TEMP_NORMAL:
3522         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3523         break;
3524     case TEMP_CONST:
3525         new_type = TEMP_VAL_CONST;
3526         break;
3527     default:
3528         g_assert_not_reached();
3529     }
3530     if (ts->val_type == TEMP_VAL_REG) {
3531         s->reg_to_temp[ts->reg] = NULL;
3532     }
3533     ts->val_type = new_type;
3534 }
3535 
3536 /* Mark a temporary as dead.  */
3537 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3538 {
3539     temp_free_or_dead(s, ts, 1);
3540 }
3541 
3542 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3543    registers needs to be allocated to store a constant.  If 'free_or_dead'
3544    is non-zero, subsequently release the temporary; if it is positive, the
3545    temp is dead; if it is negative, the temp is free.  */
3546 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3547                       TCGRegSet preferred_regs, int free_or_dead)
3548 {
3549     if (!temp_readonly(ts) && !ts->mem_coherent) {
3550         if (!ts->mem_allocated) {
3551             temp_allocate_frame(s, ts);
3552         }
3553         switch (ts->val_type) {
3554         case TEMP_VAL_CONST:
3555             /* If we're going to free the temp immediately, then we won't
3556                require it later in a register, so attempt to store the
3557                constant to memory directly.  */
3558             if (free_or_dead
3559                 && tcg_out_sti(s, ts->type, ts->val,
3560                                ts->mem_base->reg, ts->mem_offset)) {
3561                 break;
3562             }
3563             temp_load(s, ts, tcg_target_available_regs[ts->type],
3564                       allocated_regs, preferred_regs);
3565             /* fallthrough */
3566 
3567         case TEMP_VAL_REG:
3568             tcg_out_st(s, ts->type, ts->reg,
3569                        ts->mem_base->reg, ts->mem_offset);
3570             break;
3571 
3572         case TEMP_VAL_MEM:
3573             break;
3574 
3575         case TEMP_VAL_DEAD:
3576         default:
3577             tcg_abort();
3578         }
3579         ts->mem_coherent = 1;
3580     }
3581     if (free_or_dead) {
3582         temp_free_or_dead(s, ts, free_or_dead);
3583     }
3584 }
3585 
3586 /* free register 'reg' by spilling the corresponding temporary if necessary */
3587 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3588 {
3589     TCGTemp *ts = s->reg_to_temp[reg];
3590     if (ts != NULL) {
3591         temp_sync(s, ts, allocated_regs, 0, -1);
3592     }
3593 }
3594 
3595 /**
3596  * tcg_reg_alloc:
3597  * @required_regs: Set of registers in which we must allocate.
3598  * @allocated_regs: Set of registers which must be avoided.
3599  * @preferred_regs: Set of registers we should prefer.
3600  * @rev: True if we search the registers in "indirect" order.
3601  *
3602  * The allocated register must be in @required_regs & ~@allocated_regs,
3603  * but if we can put it in @preferred_regs we may save a move later.
3604  */
3605 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3606                             TCGRegSet allocated_regs,
3607                             TCGRegSet preferred_regs, bool rev)
3608 {
3609     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3610     TCGRegSet reg_ct[2];
3611     const int *order;
3612 
3613     reg_ct[1] = required_regs & ~allocated_regs;
3614     tcg_debug_assert(reg_ct[1] != 0);
3615     reg_ct[0] = reg_ct[1] & preferred_regs;
3616 
3617     /* Skip the preferred_regs option if it cannot be satisfied,
3618        or if the preference made no difference.  */
3619     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3620 
3621     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3622 
3623     /* Try free registers, preferences first.  */
3624     for (j = f; j < 2; j++) {
3625         TCGRegSet set = reg_ct[j];
3626 
3627         if (tcg_regset_single(set)) {
3628             /* One register in the set.  */
3629             TCGReg reg = tcg_regset_first(set);
3630             if (s->reg_to_temp[reg] == NULL) {
3631                 return reg;
3632             }
3633         } else {
3634             for (i = 0; i < n; i++) {
3635                 TCGReg reg = order[i];
3636                 if (s->reg_to_temp[reg] == NULL &&
3637                     tcg_regset_test_reg(set, reg)) {
3638                     return reg;
3639                 }
3640             }
3641         }
3642     }
3643 
3644     /* We must spill something.  */
3645     for (j = f; j < 2; j++) {
3646         TCGRegSet set = reg_ct[j];
3647 
3648         if (tcg_regset_single(set)) {
3649             /* One register in the set.  */
3650             TCGReg reg = tcg_regset_first(set);
3651             tcg_reg_free(s, reg, allocated_regs);
3652             return reg;
3653         } else {
3654             for (i = 0; i < n; i++) {
3655                 TCGReg reg = order[i];
3656                 if (tcg_regset_test_reg(set, reg)) {
3657                     tcg_reg_free(s, reg, allocated_regs);
3658                     return reg;
3659                 }
3660             }
3661         }
3662     }
3663 
3664     tcg_abort();
3665 }
3666 
3667 /* Make sure the temporary is in a register.  If needed, allocate the register
3668    from DESIRED while avoiding ALLOCATED.  */
3669 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3670                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3671 {
3672     TCGReg reg;
3673 
3674     switch (ts->val_type) {
3675     case TEMP_VAL_REG:
3676         return;
3677     case TEMP_VAL_CONST:
3678         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3679                             preferred_regs, ts->indirect_base);
3680         if (ts->type <= TCG_TYPE_I64) {
3681             tcg_out_movi(s, ts->type, reg, ts->val);
3682         } else {
3683             uint64_t val = ts->val;
3684             MemOp vece = MO_64;
3685 
3686             /*
3687              * Find the minimal vector element that matches the constant.
3688              * The targets will, in general, have to do this search anyway,
3689              * do this generically.
3690              */
3691             if (val == dup_const(MO_8, val)) {
3692                 vece = MO_8;
3693             } else if (val == dup_const(MO_16, val)) {
3694                 vece = MO_16;
3695             } else if (val == dup_const(MO_32, val)) {
3696                 vece = MO_32;
3697             }
3698 
3699             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3700         }
3701         ts->mem_coherent = 0;
3702         break;
3703     case TEMP_VAL_MEM:
3704         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3705                             preferred_regs, ts->indirect_base);
3706         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3707         ts->mem_coherent = 1;
3708         break;
3709     case TEMP_VAL_DEAD:
3710     default:
3711         tcg_abort();
3712     }
3713     ts->reg = reg;
3714     ts->val_type = TEMP_VAL_REG;
3715     s->reg_to_temp[reg] = ts;
3716 }
3717 
3718 /* Save a temporary to memory. 'allocated_regs' is used in case a
3719    temporary registers needs to be allocated to store a constant.  */
3720 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3721 {
3722     /* The liveness analysis already ensures that globals are back
3723        in memory. Keep an tcg_debug_assert for safety. */
3724     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3725 }
3726 
3727 /* save globals to their canonical location and assume they can be
3728    modified be the following code. 'allocated_regs' is used in case a
3729    temporary registers needs to be allocated to store a constant. */
3730 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3731 {
3732     int i, n;
3733 
3734     for (i = 0, n = s->nb_globals; i < n; i++) {
3735         temp_save(s, &s->temps[i], allocated_regs);
3736     }
3737 }
3738 
3739 /* sync globals to their canonical location and assume they can be
3740    read by the following code. 'allocated_regs' is used in case a
3741    temporary registers needs to be allocated to store a constant. */
3742 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3743 {
3744     int i, n;
3745 
3746     for (i = 0, n = s->nb_globals; i < n; i++) {
3747         TCGTemp *ts = &s->temps[i];
3748         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3749                          || ts->kind == TEMP_FIXED
3750                          || ts->mem_coherent);
3751     }
3752 }
3753 
3754 /* at the end of a basic block, we assume all temporaries are dead and
3755    all globals are stored at their canonical location. */
3756 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3757 {
3758     int i;
3759 
3760     for (i = s->nb_globals; i < s->nb_temps; i++) {
3761         TCGTemp *ts = &s->temps[i];
3762 
3763         switch (ts->kind) {
3764         case TEMP_LOCAL:
3765             temp_save(s, ts, allocated_regs);
3766             break;
3767         case TEMP_NORMAL:
3768             /* The liveness analysis already ensures that temps are dead.
3769                Keep an tcg_debug_assert for safety. */
3770             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3771             break;
3772         case TEMP_CONST:
3773             /* Similarly, we should have freed any allocated register. */
3774             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3775             break;
3776         default:
3777             g_assert_not_reached();
3778         }
3779     }
3780 
3781     save_globals(s, allocated_regs);
3782 }
3783 
3784 /*
3785  * At a conditional branch, we assume all temporaries are dead and
3786  * all globals and local temps are synced to their location.
3787  */
3788 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3789 {
3790     sync_globals(s, allocated_regs);
3791 
3792     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3793         TCGTemp *ts = &s->temps[i];
3794         /*
3795          * The liveness analysis already ensures that temps are dead.
3796          * Keep tcg_debug_asserts for safety.
3797          */
3798         switch (ts->kind) {
3799         case TEMP_LOCAL:
3800             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3801             break;
3802         case TEMP_NORMAL:
3803             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3804             break;
3805         case TEMP_CONST:
3806             break;
3807         default:
3808             g_assert_not_reached();
3809         }
3810     }
3811 }
3812 
3813 /*
3814  * Specialized code generation for INDEX_op_mov_* with a constant.
3815  */
3816 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3817                                   tcg_target_ulong val, TCGLifeData arg_life,
3818                                   TCGRegSet preferred_regs)
3819 {
3820     /* ENV should not be modified.  */
3821     tcg_debug_assert(!temp_readonly(ots));
3822 
3823     /* The movi is not explicitly generated here.  */
3824     if (ots->val_type == TEMP_VAL_REG) {
3825         s->reg_to_temp[ots->reg] = NULL;
3826     }
3827     ots->val_type = TEMP_VAL_CONST;
3828     ots->val = val;
3829     ots->mem_coherent = 0;
3830     if (NEED_SYNC_ARG(0)) {
3831         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3832     } else if (IS_DEAD_ARG(0)) {
3833         temp_dead(s, ots);
3834     }
3835 }
3836 
3837 /*
3838  * Specialized code generation for INDEX_op_mov_*.
3839  */
3840 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3841 {
3842     const TCGLifeData arg_life = op->life;
3843     TCGRegSet allocated_regs, preferred_regs;
3844     TCGTemp *ts, *ots;
3845     TCGType otype, itype;
3846 
3847     allocated_regs = s->reserved_regs;
3848     preferred_regs = op->output_pref[0];
3849     ots = arg_temp(op->args[0]);
3850     ts = arg_temp(op->args[1]);
3851 
3852     /* ENV should not be modified.  */
3853     tcg_debug_assert(!temp_readonly(ots));
3854 
3855     /* Note that otype != itype for no-op truncation.  */
3856     otype = ots->type;
3857     itype = ts->type;
3858 
3859     if (ts->val_type == TEMP_VAL_CONST) {
3860         /* propagate constant or generate sti */
3861         tcg_target_ulong val = ts->val;
3862         if (IS_DEAD_ARG(1)) {
3863             temp_dead(s, ts);
3864         }
3865         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3866         return;
3867     }
3868 
3869     /* If the source value is in memory we're going to be forced
3870        to have it in a register in order to perform the copy.  Copy
3871        the SOURCE value into its own register first, that way we
3872        don't have to reload SOURCE the next time it is used. */
3873     if (ts->val_type == TEMP_VAL_MEM) {
3874         temp_load(s, ts, tcg_target_available_regs[itype],
3875                   allocated_regs, preferred_regs);
3876     }
3877 
3878     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3879     if (IS_DEAD_ARG(0)) {
3880         /* mov to a non-saved dead register makes no sense (even with
3881            liveness analysis disabled). */
3882         tcg_debug_assert(NEED_SYNC_ARG(0));
3883         if (!ots->mem_allocated) {
3884             temp_allocate_frame(s, ots);
3885         }
3886         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3887         if (IS_DEAD_ARG(1)) {
3888             temp_dead(s, ts);
3889         }
3890         temp_dead(s, ots);
3891     } else {
3892         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3893             /* the mov can be suppressed */
3894             if (ots->val_type == TEMP_VAL_REG) {
3895                 s->reg_to_temp[ots->reg] = NULL;
3896             }
3897             ots->reg = ts->reg;
3898             temp_dead(s, ts);
3899         } else {
3900             if (ots->val_type != TEMP_VAL_REG) {
3901                 /* When allocating a new register, make sure to not spill the
3902                    input one. */
3903                 tcg_regset_set_reg(allocated_regs, ts->reg);
3904                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3905                                          allocated_regs, preferred_regs,
3906                                          ots->indirect_base);
3907             }
3908             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3909                 /*
3910                  * Cross register class move not supported.
3911                  * Store the source register into the destination slot
3912                  * and leave the destination temp as TEMP_VAL_MEM.
3913                  */
3914                 assert(!temp_readonly(ots));
3915                 if (!ts->mem_allocated) {
3916                     temp_allocate_frame(s, ots);
3917                 }
3918                 tcg_out_st(s, ts->type, ts->reg,
3919                            ots->mem_base->reg, ots->mem_offset);
3920                 ots->mem_coherent = 1;
3921                 temp_free_or_dead(s, ots, -1);
3922                 return;
3923             }
3924         }
3925         ots->val_type = TEMP_VAL_REG;
3926         ots->mem_coherent = 0;
3927         s->reg_to_temp[ots->reg] = ots;
3928         if (NEED_SYNC_ARG(0)) {
3929             temp_sync(s, ots, allocated_regs, 0, 0);
3930         }
3931     }
3932 }
3933 
3934 /*
3935  * Specialized code generation for INDEX_op_dup_vec.
3936  */
3937 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3938 {
3939     const TCGLifeData arg_life = op->life;
3940     TCGRegSet dup_out_regs, dup_in_regs;
3941     TCGTemp *its, *ots;
3942     TCGType itype, vtype;
3943     intptr_t endian_fixup;
3944     unsigned vece;
3945     bool ok;
3946 
3947     ots = arg_temp(op->args[0]);
3948     its = arg_temp(op->args[1]);
3949 
3950     /* ENV should not be modified.  */
3951     tcg_debug_assert(!temp_readonly(ots));
3952 
3953     itype = its->type;
3954     vece = TCGOP_VECE(op);
3955     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3956 
3957     if (its->val_type == TEMP_VAL_CONST) {
3958         /* Propagate constant via movi -> dupi.  */
3959         tcg_target_ulong val = its->val;
3960         if (IS_DEAD_ARG(1)) {
3961             temp_dead(s, its);
3962         }
3963         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3964         return;
3965     }
3966 
3967     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3968     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3969 
3970     /* Allocate the output register now.  */
3971     if (ots->val_type != TEMP_VAL_REG) {
3972         TCGRegSet allocated_regs = s->reserved_regs;
3973 
3974         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3975             /* Make sure to not spill the input register. */
3976             tcg_regset_set_reg(allocated_regs, its->reg);
3977         }
3978         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3979                                  op->output_pref[0], ots->indirect_base);
3980         ots->val_type = TEMP_VAL_REG;
3981         ots->mem_coherent = 0;
3982         s->reg_to_temp[ots->reg] = ots;
3983     }
3984 
3985     switch (its->val_type) {
3986     case TEMP_VAL_REG:
3987         /*
3988          * The dup constriaints must be broad, covering all possible VECE.
3989          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3990          * to fail, indicating that extra moves are required for that case.
3991          */
3992         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3993             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3994                 goto done;
3995             }
3996             /* Try again from memory or a vector input register.  */
3997         }
3998         if (!its->mem_coherent) {
3999             /*
4000              * The input register is not synced, and so an extra store
4001              * would be required to use memory.  Attempt an integer-vector
4002              * register move first.  We do not have a TCGRegSet for this.
4003              */
4004             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4005                 break;
4006             }
4007             /* Sync the temp back to its slot and load from there.  */
4008             temp_sync(s, its, s->reserved_regs, 0, 0);
4009         }
4010         /* fall through */
4011 
4012     case TEMP_VAL_MEM:
4013 #ifdef HOST_WORDS_BIGENDIAN
4014         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
4015         endian_fixup -= 1 << vece;
4016 #else
4017         endian_fixup = 0;
4018 #endif
4019         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4020                              its->mem_offset + endian_fixup)) {
4021             goto done;
4022         }
4023         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4024         break;
4025 
4026     default:
4027         g_assert_not_reached();
4028     }
4029 
4030     /* We now have a vector input register, so dup must succeed. */
4031     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4032     tcg_debug_assert(ok);
4033 
4034  done:
4035     if (IS_DEAD_ARG(1)) {
4036         temp_dead(s, its);
4037     }
4038     if (NEED_SYNC_ARG(0)) {
4039         temp_sync(s, ots, s->reserved_regs, 0, 0);
4040     }
4041     if (IS_DEAD_ARG(0)) {
4042         temp_dead(s, ots);
4043     }
4044 }
4045 
4046 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4047 {
4048     const TCGLifeData arg_life = op->life;
4049     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4050     TCGRegSet i_allocated_regs;
4051     TCGRegSet o_allocated_regs;
4052     int i, k, nb_iargs, nb_oargs;
4053     TCGReg reg;
4054     TCGArg arg;
4055     const TCGArgConstraint *arg_ct;
4056     TCGTemp *ts;
4057     TCGArg new_args[TCG_MAX_OP_ARGS];
4058     int const_args[TCG_MAX_OP_ARGS];
4059 
4060     nb_oargs = def->nb_oargs;
4061     nb_iargs = def->nb_iargs;
4062 
4063     /* copy constants */
4064     memcpy(new_args + nb_oargs + nb_iargs,
4065            op->args + nb_oargs + nb_iargs,
4066            sizeof(TCGArg) * def->nb_cargs);
4067 
4068     i_allocated_regs = s->reserved_regs;
4069     o_allocated_regs = s->reserved_regs;
4070 
4071     /* satisfy input constraints */
4072     for (k = 0; k < nb_iargs; k++) {
4073         TCGRegSet i_preferred_regs, o_preferred_regs;
4074 
4075         i = def->args_ct[nb_oargs + k].sort_index;
4076         arg = op->args[i];
4077         arg_ct = &def->args_ct[i];
4078         ts = arg_temp(arg);
4079 
4080         if (ts->val_type == TEMP_VAL_CONST
4081             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
4082             /* constant is OK for instruction */
4083             const_args[i] = 1;
4084             new_args[i] = ts->val;
4085             continue;
4086         }
4087 
4088         i_preferred_regs = o_preferred_regs = 0;
4089         if (arg_ct->ialias) {
4090             o_preferred_regs = op->output_pref[arg_ct->alias_index];
4091 
4092             /*
4093              * If the input is readonly, then it cannot also be an
4094              * output and aliased to itself.  If the input is not
4095              * dead after the instruction, we must allocate a new
4096              * register and move it.
4097              */
4098             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4099                 goto allocate_in_reg;
4100             }
4101 
4102             /*
4103              * Check if the current register has already been allocated
4104              * for another input aliased to an output.
4105              */
4106             if (ts->val_type == TEMP_VAL_REG) {
4107                 reg = ts->reg;
4108                 for (int k2 = 0; k2 < k; k2++) {
4109                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
4110                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
4111                         goto allocate_in_reg;
4112                     }
4113                 }
4114             }
4115             i_preferred_regs = o_preferred_regs;
4116         }
4117 
4118         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
4119         reg = ts->reg;
4120 
4121         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
4122  allocate_in_reg:
4123             /*
4124              * Allocate a new register matching the constraint
4125              * and move the temporary register into it.
4126              */
4127             temp_load(s, ts, tcg_target_available_regs[ts->type],
4128                       i_allocated_regs, 0);
4129             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
4130                                 o_preferred_regs, ts->indirect_base);
4131             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4132                 /*
4133                  * Cross register class move not supported.  Sync the
4134                  * temp back to its slot and load from there.
4135                  */
4136                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4137                 tcg_out_ld(s, ts->type, reg,
4138                            ts->mem_base->reg, ts->mem_offset);
4139             }
4140         }
4141         new_args[i] = reg;
4142         const_args[i] = 0;
4143         tcg_regset_set_reg(i_allocated_regs, reg);
4144     }
4145 
4146     /* mark dead temporaries and free the associated registers */
4147     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4148         if (IS_DEAD_ARG(i)) {
4149             temp_dead(s, arg_temp(op->args[i]));
4150         }
4151     }
4152 
4153     if (def->flags & TCG_OPF_COND_BRANCH) {
4154         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4155     } else if (def->flags & TCG_OPF_BB_END) {
4156         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4157     } else {
4158         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4159             /* XXX: permit generic clobber register list ? */
4160             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4161                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4162                     tcg_reg_free(s, i, i_allocated_regs);
4163                 }
4164             }
4165         }
4166         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4167             /* sync globals if the op has side effects and might trigger
4168                an exception. */
4169             sync_globals(s, i_allocated_regs);
4170         }
4171 
4172         /* satisfy the output constraints */
4173         for(k = 0; k < nb_oargs; k++) {
4174             i = def->args_ct[k].sort_index;
4175             arg = op->args[i];
4176             arg_ct = &def->args_ct[i];
4177             ts = arg_temp(arg);
4178 
4179             /* ENV should not be modified.  */
4180             tcg_debug_assert(!temp_readonly(ts));
4181 
4182             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4183                 reg = new_args[arg_ct->alias_index];
4184             } else if (arg_ct->newreg) {
4185                 reg = tcg_reg_alloc(s, arg_ct->regs,
4186                                     i_allocated_regs | o_allocated_regs,
4187                                     op->output_pref[k], ts->indirect_base);
4188             } else {
4189                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4190                                     op->output_pref[k], ts->indirect_base);
4191             }
4192             tcg_regset_set_reg(o_allocated_regs, reg);
4193             if (ts->val_type == TEMP_VAL_REG) {
4194                 s->reg_to_temp[ts->reg] = NULL;
4195             }
4196             ts->val_type = TEMP_VAL_REG;
4197             ts->reg = reg;
4198             /*
4199              * Temp value is modified, so the value kept in memory is
4200              * potentially not the same.
4201              */
4202             ts->mem_coherent = 0;
4203             s->reg_to_temp[reg] = ts;
4204             new_args[i] = reg;
4205         }
4206     }
4207 
4208     /* emit instruction */
4209     if (def->flags & TCG_OPF_VECTOR) {
4210         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4211                        new_args, const_args);
4212     } else {
4213         tcg_out_op(s, op->opc, new_args, const_args);
4214     }
4215 
4216     /* move the outputs in the correct register if needed */
4217     for(i = 0; i < nb_oargs; i++) {
4218         ts = arg_temp(op->args[i]);
4219 
4220         /* ENV should not be modified.  */
4221         tcg_debug_assert(!temp_readonly(ts));
4222 
4223         if (NEED_SYNC_ARG(i)) {
4224             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4225         } else if (IS_DEAD_ARG(i)) {
4226             temp_dead(s, ts);
4227         }
4228     }
4229 }
4230 
4231 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4232 {
4233     const TCGLifeData arg_life = op->life;
4234     TCGTemp *ots, *itsl, *itsh;
4235     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4236 
4237     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4238     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4239     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4240 
4241     ots = arg_temp(op->args[0]);
4242     itsl = arg_temp(op->args[1]);
4243     itsh = arg_temp(op->args[2]);
4244 
4245     /* ENV should not be modified.  */
4246     tcg_debug_assert(!temp_readonly(ots));
4247 
4248     /* Allocate the output register now.  */
4249     if (ots->val_type != TEMP_VAL_REG) {
4250         TCGRegSet allocated_regs = s->reserved_regs;
4251         TCGRegSet dup_out_regs =
4252             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4253 
4254         /* Make sure to not spill the input registers. */
4255         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4256             tcg_regset_set_reg(allocated_regs, itsl->reg);
4257         }
4258         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4259             tcg_regset_set_reg(allocated_regs, itsh->reg);
4260         }
4261 
4262         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4263                                  op->output_pref[0], ots->indirect_base);
4264         ots->val_type = TEMP_VAL_REG;
4265         ots->mem_coherent = 0;
4266         s->reg_to_temp[ots->reg] = ots;
4267     }
4268 
4269     /* Promote dup2 of immediates to dupi_vec. */
4270     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4271         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4272         MemOp vece = MO_64;
4273 
4274         if (val == dup_const(MO_8, val)) {
4275             vece = MO_8;
4276         } else if (val == dup_const(MO_16, val)) {
4277             vece = MO_16;
4278         } else if (val == dup_const(MO_32, val)) {
4279             vece = MO_32;
4280         }
4281 
4282         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4283         goto done;
4284     }
4285 
4286     /* If the two inputs form one 64-bit value, try dupm_vec. */
4287     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4288         if (!itsl->mem_coherent) {
4289             temp_sync(s, itsl, s->reserved_regs, 0, 0);
4290         }
4291         if (!itsh->mem_coherent) {
4292             temp_sync(s, itsh, s->reserved_regs, 0, 0);
4293         }
4294 #ifdef HOST_WORDS_BIGENDIAN
4295         TCGTemp *its = itsh;
4296 #else
4297         TCGTemp *its = itsl;
4298 #endif
4299         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4300                              its->mem_base->reg, its->mem_offset)) {
4301             goto done;
4302         }
4303     }
4304 
4305     /* Fall back to generic expansion. */
4306     return false;
4307 
4308  done:
4309     if (IS_DEAD_ARG(1)) {
4310         temp_dead(s, itsl);
4311     }
4312     if (IS_DEAD_ARG(2)) {
4313         temp_dead(s, itsh);
4314     }
4315     if (NEED_SYNC_ARG(0)) {
4316         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4317     } else if (IS_DEAD_ARG(0)) {
4318         temp_dead(s, ots);
4319     }
4320     return true;
4321 }
4322 
4323 #ifdef TCG_TARGET_STACK_GROWSUP
4324 #define STACK_DIR(x) (-(x))
4325 #else
4326 #define STACK_DIR(x) (x)
4327 #endif
4328 
4329 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4330 {
4331     const int nb_oargs = TCGOP_CALLO(op);
4332     const int nb_iargs = TCGOP_CALLI(op);
4333     const TCGLifeData arg_life = op->life;
4334     int flags, nb_regs, i;
4335     TCGReg reg;
4336     TCGArg arg;
4337     TCGTemp *ts;
4338     intptr_t stack_offset;
4339     size_t call_stack_size;
4340     tcg_insn_unit *func_addr;
4341     int allocate_args;
4342     TCGRegSet allocated_regs;
4343 
4344     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
4345     flags = op->args[nb_oargs + nb_iargs + 1];
4346 
4347     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4348     if (nb_regs > nb_iargs) {
4349         nb_regs = nb_iargs;
4350     }
4351 
4352     /* assign stack slots first */
4353     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4354     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
4355         ~(TCG_TARGET_STACK_ALIGN - 1);
4356     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4357     if (allocate_args) {
4358         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4359            preallocate call stack */
4360         tcg_abort();
4361     }
4362 
4363     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4364     for (i = nb_regs; i < nb_iargs; i++) {
4365         arg = op->args[nb_oargs + i];
4366 #ifdef TCG_TARGET_STACK_GROWSUP
4367         stack_offset -= sizeof(tcg_target_long);
4368 #endif
4369         if (arg != TCG_CALL_DUMMY_ARG) {
4370             ts = arg_temp(arg);
4371             temp_load(s, ts, tcg_target_available_regs[ts->type],
4372                       s->reserved_regs, 0);
4373             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4374         }
4375 #ifndef TCG_TARGET_STACK_GROWSUP
4376         stack_offset += sizeof(tcg_target_long);
4377 #endif
4378     }
4379 
4380     /* assign input registers */
4381     allocated_regs = s->reserved_regs;
4382     for (i = 0; i < nb_regs; i++) {
4383         arg = op->args[nb_oargs + i];
4384         if (arg != TCG_CALL_DUMMY_ARG) {
4385             ts = arg_temp(arg);
4386             reg = tcg_target_call_iarg_regs[i];
4387 
4388             if (ts->val_type == TEMP_VAL_REG) {
4389                 if (ts->reg != reg) {
4390                     tcg_reg_free(s, reg, allocated_regs);
4391                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4392                         /*
4393                          * Cross register class move not supported.  Sync the
4394                          * temp back to its slot and load from there.
4395                          */
4396                         temp_sync(s, ts, allocated_regs, 0, 0);
4397                         tcg_out_ld(s, ts->type, reg,
4398                                    ts->mem_base->reg, ts->mem_offset);
4399                     }
4400                 }
4401             } else {
4402                 TCGRegSet arg_set = 0;
4403 
4404                 tcg_reg_free(s, reg, allocated_regs);
4405                 tcg_regset_set_reg(arg_set, reg);
4406                 temp_load(s, ts, arg_set, allocated_regs, 0);
4407             }
4408 
4409             tcg_regset_set_reg(allocated_regs, reg);
4410         }
4411     }
4412 
4413     /* mark dead temporaries and free the associated registers */
4414     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4415         if (IS_DEAD_ARG(i)) {
4416             temp_dead(s, arg_temp(op->args[i]));
4417         }
4418     }
4419 
4420     /* clobber call registers */
4421     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4422         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4423             tcg_reg_free(s, i, allocated_regs);
4424         }
4425     }
4426 
4427     /* Save globals if they might be written by the helper, sync them if
4428        they might be read. */
4429     if (flags & TCG_CALL_NO_READ_GLOBALS) {
4430         /* Nothing to do */
4431     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4432         sync_globals(s, allocated_regs);
4433     } else {
4434         save_globals(s, allocated_regs);
4435     }
4436 
4437     tcg_out_call(s, func_addr);
4438 
4439     /* assign output registers and emit moves if needed */
4440     for(i = 0; i < nb_oargs; i++) {
4441         arg = op->args[i];
4442         ts = arg_temp(arg);
4443 
4444         /* ENV should not be modified.  */
4445         tcg_debug_assert(!temp_readonly(ts));
4446 
4447         reg = tcg_target_call_oarg_regs[i];
4448         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4449         if (ts->val_type == TEMP_VAL_REG) {
4450             s->reg_to_temp[ts->reg] = NULL;
4451         }
4452         ts->val_type = TEMP_VAL_REG;
4453         ts->reg = reg;
4454         ts->mem_coherent = 0;
4455         s->reg_to_temp[reg] = ts;
4456         if (NEED_SYNC_ARG(i)) {
4457             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4458         } else if (IS_DEAD_ARG(i)) {
4459             temp_dead(s, ts);
4460         }
4461     }
4462 }
4463 
4464 #ifdef CONFIG_PROFILER
4465 
4466 /* avoid copy/paste errors */
4467 #define PROF_ADD(to, from, field)                       \
4468     do {                                                \
4469         (to)->field += qatomic_read(&((from)->field));  \
4470     } while (0)
4471 
4472 #define PROF_MAX(to, from, field)                                       \
4473     do {                                                                \
4474         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4475         if (val__ > (to)->field) {                                      \
4476             (to)->field = val__;                                        \
4477         }                                                               \
4478     } while (0)
4479 
4480 /* Pass in a zero'ed @prof */
4481 static inline
4482 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4483 {
4484     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4485     unsigned int i;
4486 
4487     for (i = 0; i < n_ctxs; i++) {
4488         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4489         const TCGProfile *orig = &s->prof;
4490 
4491         if (counters) {
4492             PROF_ADD(prof, orig, cpu_exec_time);
4493             PROF_ADD(prof, orig, tb_count1);
4494             PROF_ADD(prof, orig, tb_count);
4495             PROF_ADD(prof, orig, op_count);
4496             PROF_MAX(prof, orig, op_count_max);
4497             PROF_ADD(prof, orig, temp_count);
4498             PROF_MAX(prof, orig, temp_count_max);
4499             PROF_ADD(prof, orig, del_op_count);
4500             PROF_ADD(prof, orig, code_in_len);
4501             PROF_ADD(prof, orig, code_out_len);
4502             PROF_ADD(prof, orig, search_out_len);
4503             PROF_ADD(prof, orig, interm_time);
4504             PROF_ADD(prof, orig, code_time);
4505             PROF_ADD(prof, orig, la_time);
4506             PROF_ADD(prof, orig, opt_time);
4507             PROF_ADD(prof, orig, restore_count);
4508             PROF_ADD(prof, orig, restore_time);
4509         }
4510         if (table) {
4511             int i;
4512 
4513             for (i = 0; i < NB_OPS; i++) {
4514                 PROF_ADD(prof, orig, table_op_count[i]);
4515             }
4516         }
4517     }
4518 }
4519 
4520 #undef PROF_ADD
4521 #undef PROF_MAX
4522 
4523 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4524 {
4525     tcg_profile_snapshot(prof, true, false);
4526 }
4527 
4528 static void tcg_profile_snapshot_table(TCGProfile *prof)
4529 {
4530     tcg_profile_snapshot(prof, false, true);
4531 }
4532 
4533 void tcg_dump_op_count(void)
4534 {
4535     TCGProfile prof = {};
4536     int i;
4537 
4538     tcg_profile_snapshot_table(&prof);
4539     for (i = 0; i < NB_OPS; i++) {
4540         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4541                     prof.table_op_count[i]);
4542     }
4543 }
4544 
4545 int64_t tcg_cpu_exec_time(void)
4546 {
4547     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4548     unsigned int i;
4549     int64_t ret = 0;
4550 
4551     for (i = 0; i < n_ctxs; i++) {
4552         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4553         const TCGProfile *prof = &s->prof;
4554 
4555         ret += qatomic_read(&prof->cpu_exec_time);
4556     }
4557     return ret;
4558 }
4559 #else
4560 void tcg_dump_op_count(void)
4561 {
4562     qemu_printf("[TCG profiler not compiled]\n");
4563 }
4564 
4565 int64_t tcg_cpu_exec_time(void)
4566 {
4567     error_report("%s: TCG profiler not compiled", __func__);
4568     exit(EXIT_FAILURE);
4569 }
4570 #endif
4571 
4572 
4573 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4574 {
4575 #ifdef CONFIG_PROFILER
4576     TCGProfile *prof = &s->prof;
4577 #endif
4578     int i, num_insns;
4579     TCGOp *op;
4580 
4581 #ifdef CONFIG_PROFILER
4582     {
4583         int n = 0;
4584 
4585         QTAILQ_FOREACH(op, &s->ops, link) {
4586             n++;
4587         }
4588         qatomic_set(&prof->op_count, prof->op_count + n);
4589         if (n > prof->op_count_max) {
4590             qatomic_set(&prof->op_count_max, n);
4591         }
4592 
4593         n = s->nb_temps;
4594         qatomic_set(&prof->temp_count, prof->temp_count + n);
4595         if (n > prof->temp_count_max) {
4596             qatomic_set(&prof->temp_count_max, n);
4597         }
4598     }
4599 #endif
4600 
4601 #ifdef DEBUG_DISAS
4602     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4603                  && qemu_log_in_addr_range(tb->pc))) {
4604         FILE *logfile = qemu_log_lock();
4605         qemu_log("OP:\n");
4606         tcg_dump_ops(s, false);
4607         qemu_log("\n");
4608         qemu_log_unlock(logfile);
4609     }
4610 #endif
4611 
4612 #ifdef CONFIG_DEBUG_TCG
4613     /* Ensure all labels referenced have been emitted.  */
4614     {
4615         TCGLabel *l;
4616         bool error = false;
4617 
4618         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4619             if (unlikely(!l->present) && l->refs) {
4620                 qemu_log_mask(CPU_LOG_TB_OP,
4621                               "$L%d referenced but not present.\n", l->id);
4622                 error = true;
4623             }
4624         }
4625         assert(!error);
4626     }
4627 #endif
4628 
4629 #ifdef CONFIG_PROFILER
4630     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4631 #endif
4632 
4633 #ifdef USE_TCG_OPTIMIZATIONS
4634     tcg_optimize(s);
4635 #endif
4636 
4637 #ifdef CONFIG_PROFILER
4638     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4639     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4640 #endif
4641 
4642     reachable_code_pass(s);
4643     liveness_pass_1(s);
4644 
4645     if (s->nb_indirects > 0) {
4646 #ifdef DEBUG_DISAS
4647         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4648                      && qemu_log_in_addr_range(tb->pc))) {
4649             FILE *logfile = qemu_log_lock();
4650             qemu_log("OP before indirect lowering:\n");
4651             tcg_dump_ops(s, false);
4652             qemu_log("\n");
4653             qemu_log_unlock(logfile);
4654         }
4655 #endif
4656         /* Replace indirect temps with direct temps.  */
4657         if (liveness_pass_2(s)) {
4658             /* If changes were made, re-run liveness.  */
4659             liveness_pass_1(s);
4660         }
4661     }
4662 
4663 #ifdef CONFIG_PROFILER
4664     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4665 #endif
4666 
4667 #ifdef DEBUG_DISAS
4668     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4669                  && qemu_log_in_addr_range(tb->pc))) {
4670         FILE *logfile = qemu_log_lock();
4671         qemu_log("OP after optimization and liveness analysis:\n");
4672         tcg_dump_ops(s, true);
4673         qemu_log("\n");
4674         qemu_log_unlock(logfile);
4675     }
4676 #endif
4677 
4678     tcg_reg_alloc_start(s);
4679 
4680     /*
4681      * Reset the buffer pointers when restarting after overflow.
4682      * TODO: Move this into translate-all.c with the rest of the
4683      * buffer management.  Having only this done here is confusing.
4684      */
4685     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4686     s->code_ptr = s->code_buf;
4687 
4688 #ifdef TCG_TARGET_NEED_LDST_LABELS
4689     QSIMPLEQ_INIT(&s->ldst_labels);
4690 #endif
4691 #ifdef TCG_TARGET_NEED_POOL_LABELS
4692     s->pool_labels = NULL;
4693 #endif
4694 
4695     num_insns = -1;
4696     QTAILQ_FOREACH(op, &s->ops, link) {
4697         TCGOpcode opc = op->opc;
4698 
4699 #ifdef CONFIG_PROFILER
4700         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4701 #endif
4702 
4703         switch (opc) {
4704         case INDEX_op_mov_i32:
4705         case INDEX_op_mov_i64:
4706         case INDEX_op_mov_vec:
4707             tcg_reg_alloc_mov(s, op);
4708             break;
4709         case INDEX_op_dup_vec:
4710             tcg_reg_alloc_dup(s, op);
4711             break;
4712         case INDEX_op_insn_start:
4713             if (num_insns >= 0) {
4714                 size_t off = tcg_current_code_size(s);
4715                 s->gen_insn_end_off[num_insns] = off;
4716                 /* Assert that we do not overflow our stored offset.  */
4717                 assert(s->gen_insn_end_off[num_insns] == off);
4718             }
4719             num_insns++;
4720             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4721                 target_ulong a;
4722 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4723                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4724 #else
4725                 a = op->args[i];
4726 #endif
4727                 s->gen_insn_data[num_insns][i] = a;
4728             }
4729             break;
4730         case INDEX_op_discard:
4731             temp_dead(s, arg_temp(op->args[0]));
4732             break;
4733         case INDEX_op_set_label:
4734             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4735             tcg_out_label(s, arg_label(op->args[0]));
4736             break;
4737         case INDEX_op_call:
4738             tcg_reg_alloc_call(s, op);
4739             break;
4740         case INDEX_op_dup2_vec:
4741             if (tcg_reg_alloc_dup2(s, op)) {
4742                 break;
4743             }
4744             /* fall through */
4745         default:
4746             /* Sanity check that we've not introduced any unhandled opcodes. */
4747             tcg_debug_assert(tcg_op_supported(opc));
4748             /* Note: in order to speed up the code, it would be much
4749                faster to have specialized register allocator functions for
4750                some common argument patterns */
4751             tcg_reg_alloc_op(s, op);
4752             break;
4753         }
4754 #ifdef CONFIG_DEBUG_TCG
4755         check_regs(s);
4756 #endif
4757         /* Test for (pending) buffer overflow.  The assumption is that any
4758            one operation beginning below the high water mark cannot overrun
4759            the buffer completely.  Thus we can test for overflow after
4760            generating code without having to check during generation.  */
4761         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4762             return -1;
4763         }
4764         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4765         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4766             return -2;
4767         }
4768     }
4769     tcg_debug_assert(num_insns >= 0);
4770     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4771 
4772     /* Generate TB finalization at the end of block */
4773 #ifdef TCG_TARGET_NEED_LDST_LABELS
4774     i = tcg_out_ldst_finalize(s);
4775     if (i < 0) {
4776         return i;
4777     }
4778 #endif
4779 #ifdef TCG_TARGET_NEED_POOL_LABELS
4780     i = tcg_out_pool_finalize(s);
4781     if (i < 0) {
4782         return i;
4783     }
4784 #endif
4785     if (!tcg_resolve_relocs(s)) {
4786         return -2;
4787     }
4788 
4789 #ifndef CONFIG_TCG_INTERPRETER
4790     /* flush instruction cache */
4791     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4792                         (uintptr_t)s->code_buf,
4793                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4794 #endif
4795 
4796     return tcg_current_code_size(s);
4797 }
4798 
4799 #ifdef CONFIG_PROFILER
4800 void tcg_dump_info(void)
4801 {
4802     TCGProfile prof = {};
4803     const TCGProfile *s;
4804     int64_t tb_count;
4805     int64_t tb_div_count;
4806     int64_t tot;
4807 
4808     tcg_profile_snapshot_counters(&prof);
4809     s = &prof;
4810     tb_count = s->tb_count;
4811     tb_div_count = tb_count ? tb_count : 1;
4812     tot = s->interm_time + s->code_time;
4813 
4814     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4815                 tot, tot / 2.4e9);
4816     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4817                 " %0.1f%%)\n",
4818                 tb_count, s->tb_count1 - tb_count,
4819                 (double)(s->tb_count1 - s->tb_count)
4820                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4821     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4822                 (double)s->op_count / tb_div_count, s->op_count_max);
4823     qemu_printf("deleted ops/TB      %0.2f\n",
4824                 (double)s->del_op_count / tb_div_count);
4825     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4826                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4827     qemu_printf("avg host code/TB    %0.1f\n",
4828                 (double)s->code_out_len / tb_div_count);
4829     qemu_printf("avg search data/TB  %0.1f\n",
4830                 (double)s->search_out_len / tb_div_count);
4831 
4832     qemu_printf("cycles/op           %0.1f\n",
4833                 s->op_count ? (double)tot / s->op_count : 0);
4834     qemu_printf("cycles/in byte      %0.1f\n",
4835                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4836     qemu_printf("cycles/out byte     %0.1f\n",
4837                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4838     qemu_printf("cycles/search byte     %0.1f\n",
4839                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4840     if (tot == 0) {
4841         tot = 1;
4842     }
4843     qemu_printf("  gen_interm time   %0.1f%%\n",
4844                 (double)s->interm_time / tot * 100.0);
4845     qemu_printf("  gen_code time     %0.1f%%\n",
4846                 (double)s->code_time / tot * 100.0);
4847     qemu_printf("optim./code time    %0.1f%%\n",
4848                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4849                 * 100.0);
4850     qemu_printf("liveness/code time  %0.1f%%\n",
4851                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4852     qemu_printf("cpu_restore count   %" PRId64 "\n",
4853                 s->restore_count);
4854     qemu_printf("  avg cycles        %0.1f\n",
4855                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4856 }
4857 #else
4858 void tcg_dump_info(void)
4859 {
4860     qemu_printf("[TCG profiler not compiled]\n");
4861 }
4862 #endif
4863 
4864 #ifdef ELF_HOST_MACHINE
4865 /* In order to use this feature, the backend needs to do three things:
4866 
4867    (1) Define ELF_HOST_MACHINE to indicate both what value to
4868        put into the ELF image and to indicate support for the feature.
4869 
4870    (2) Define tcg_register_jit.  This should create a buffer containing
4871        the contents of a .debug_frame section that describes the post-
4872        prologue unwind info for the tcg machine.
4873 
4874    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4875 */
4876 
4877 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4878 typedef enum {
4879     JIT_NOACTION = 0,
4880     JIT_REGISTER_FN,
4881     JIT_UNREGISTER_FN
4882 } jit_actions_t;
4883 
4884 struct jit_code_entry {
4885     struct jit_code_entry *next_entry;
4886     struct jit_code_entry *prev_entry;
4887     const void *symfile_addr;
4888     uint64_t symfile_size;
4889 };
4890 
4891 struct jit_descriptor {
4892     uint32_t version;
4893     uint32_t action_flag;
4894     struct jit_code_entry *relevant_entry;
4895     struct jit_code_entry *first_entry;
4896 };
4897 
4898 void __jit_debug_register_code(void) __attribute__((noinline));
4899 void __jit_debug_register_code(void)
4900 {
4901     asm("");
4902 }
4903 
4904 /* Must statically initialize the version, because GDB may check
4905    the version before we can set it.  */
4906 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4907 
4908 /* End GDB interface.  */
4909 
4910 static int find_string(const char *strtab, const char *str)
4911 {
4912     const char *p = strtab + 1;
4913 
4914     while (1) {
4915         if (strcmp(p, str) == 0) {
4916             return p - strtab;
4917         }
4918         p += strlen(p) + 1;
4919     }
4920 }
4921 
4922 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4923                                  const void *debug_frame,
4924                                  size_t debug_frame_size)
4925 {
4926     struct __attribute__((packed)) DebugInfo {
4927         uint32_t  len;
4928         uint16_t  version;
4929         uint32_t  abbrev;
4930         uint8_t   ptr_size;
4931         uint8_t   cu_die;
4932         uint16_t  cu_lang;
4933         uintptr_t cu_low_pc;
4934         uintptr_t cu_high_pc;
4935         uint8_t   fn_die;
4936         char      fn_name[16];
4937         uintptr_t fn_low_pc;
4938         uintptr_t fn_high_pc;
4939         uint8_t   cu_eoc;
4940     };
4941 
4942     struct ElfImage {
4943         ElfW(Ehdr) ehdr;
4944         ElfW(Phdr) phdr;
4945         ElfW(Shdr) shdr[7];
4946         ElfW(Sym)  sym[2];
4947         struct DebugInfo di;
4948         uint8_t    da[24];
4949         char       str[80];
4950     };
4951 
4952     struct ElfImage *img;
4953 
4954     static const struct ElfImage img_template = {
4955         .ehdr = {
4956             .e_ident[EI_MAG0] = ELFMAG0,
4957             .e_ident[EI_MAG1] = ELFMAG1,
4958             .e_ident[EI_MAG2] = ELFMAG2,
4959             .e_ident[EI_MAG3] = ELFMAG3,
4960             .e_ident[EI_CLASS] = ELF_CLASS,
4961             .e_ident[EI_DATA] = ELF_DATA,
4962             .e_ident[EI_VERSION] = EV_CURRENT,
4963             .e_type = ET_EXEC,
4964             .e_machine = ELF_HOST_MACHINE,
4965             .e_version = EV_CURRENT,
4966             .e_phoff = offsetof(struct ElfImage, phdr),
4967             .e_shoff = offsetof(struct ElfImage, shdr),
4968             .e_ehsize = sizeof(ElfW(Shdr)),
4969             .e_phentsize = sizeof(ElfW(Phdr)),
4970             .e_phnum = 1,
4971             .e_shentsize = sizeof(ElfW(Shdr)),
4972             .e_shnum = ARRAY_SIZE(img->shdr),
4973             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4974 #ifdef ELF_HOST_FLAGS
4975             .e_flags = ELF_HOST_FLAGS,
4976 #endif
4977 #ifdef ELF_OSABI
4978             .e_ident[EI_OSABI] = ELF_OSABI,
4979 #endif
4980         },
4981         .phdr = {
4982             .p_type = PT_LOAD,
4983             .p_flags = PF_X,
4984         },
4985         .shdr = {
4986             [0] = { .sh_type = SHT_NULL },
4987             /* Trick: The contents of code_gen_buffer are not present in
4988                this fake ELF file; that got allocated elsewhere.  Therefore
4989                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4990                will not look for contents.  We can record any address.  */
4991             [1] = { /* .text */
4992                 .sh_type = SHT_NOBITS,
4993                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4994             },
4995             [2] = { /* .debug_info */
4996                 .sh_type = SHT_PROGBITS,
4997                 .sh_offset = offsetof(struct ElfImage, di),
4998                 .sh_size = sizeof(struct DebugInfo),
4999             },
5000             [3] = { /* .debug_abbrev */
5001                 .sh_type = SHT_PROGBITS,
5002                 .sh_offset = offsetof(struct ElfImage, da),
5003                 .sh_size = sizeof(img->da),
5004             },
5005             [4] = { /* .debug_frame */
5006                 .sh_type = SHT_PROGBITS,
5007                 .sh_offset = sizeof(struct ElfImage),
5008             },
5009             [5] = { /* .symtab */
5010                 .sh_type = SHT_SYMTAB,
5011                 .sh_offset = offsetof(struct ElfImage, sym),
5012                 .sh_size = sizeof(img->sym),
5013                 .sh_info = 1,
5014                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
5015                 .sh_entsize = sizeof(ElfW(Sym)),
5016             },
5017             [6] = { /* .strtab */
5018                 .sh_type = SHT_STRTAB,
5019                 .sh_offset = offsetof(struct ElfImage, str),
5020                 .sh_size = sizeof(img->str),
5021             }
5022         },
5023         .sym = {
5024             [1] = { /* code_gen_buffer */
5025                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5026                 .st_shndx = 1,
5027             }
5028         },
5029         .di = {
5030             .len = sizeof(struct DebugInfo) - 4,
5031             .version = 2,
5032             .ptr_size = sizeof(void *),
5033             .cu_die = 1,
5034             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5035             .fn_die = 2,
5036             .fn_name = "code_gen_buffer"
5037         },
5038         .da = {
5039             1,          /* abbrev number (the cu) */
5040             0x11, 1,    /* DW_TAG_compile_unit, has children */
5041             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5042             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5043             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5044             0, 0,       /* end of abbrev */
5045             2,          /* abbrev number (the fn) */
5046             0x2e, 0,    /* DW_TAG_subprogram, no children */
5047             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5048             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5049             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5050             0, 0,       /* end of abbrev */
5051             0           /* no more abbrev */
5052         },
5053         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5054                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5055     };
5056 
5057     /* We only need a single jit entry; statically allocate it.  */
5058     static struct jit_code_entry one_entry;
5059 
5060     uintptr_t buf = (uintptr_t)buf_ptr;
5061     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5062     DebugFrameHeader *dfh;
5063 
5064     img = g_malloc(img_size);
5065     *img = img_template;
5066 
5067     img->phdr.p_vaddr = buf;
5068     img->phdr.p_paddr = buf;
5069     img->phdr.p_memsz = buf_size;
5070 
5071     img->shdr[1].sh_name = find_string(img->str, ".text");
5072     img->shdr[1].sh_addr = buf;
5073     img->shdr[1].sh_size = buf_size;
5074 
5075     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5076     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5077 
5078     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5079     img->shdr[4].sh_size = debug_frame_size;
5080 
5081     img->shdr[5].sh_name = find_string(img->str, ".symtab");
5082     img->shdr[6].sh_name = find_string(img->str, ".strtab");
5083 
5084     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5085     img->sym[1].st_value = buf;
5086     img->sym[1].st_size = buf_size;
5087 
5088     img->di.cu_low_pc = buf;
5089     img->di.cu_high_pc = buf + buf_size;
5090     img->di.fn_low_pc = buf;
5091     img->di.fn_high_pc = buf + buf_size;
5092 
5093     dfh = (DebugFrameHeader *)(img + 1);
5094     memcpy(dfh, debug_frame, debug_frame_size);
5095     dfh->fde.func_start = buf;
5096     dfh->fde.func_len = buf_size;
5097 
5098 #ifdef DEBUG_JIT
5099     /* Enable this block to be able to debug the ELF image file creation.
5100        One can use readelf, objdump, or other inspection utilities.  */
5101     {
5102         FILE *f = fopen("/tmp/qemu.jit", "w+b");
5103         if (f) {
5104             if (fwrite(img, img_size, 1, f) != img_size) {
5105                 /* Avoid stupid unused return value warning for fwrite.  */
5106             }
5107             fclose(f);
5108         }
5109     }
5110 #endif
5111 
5112     one_entry.symfile_addr = img;
5113     one_entry.symfile_size = img_size;
5114 
5115     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5116     __jit_debug_descriptor.relevant_entry = &one_entry;
5117     __jit_debug_descriptor.first_entry = &one_entry;
5118     __jit_debug_register_code();
5119 }
5120 #else
5121 /* No support for the feature.  Provide the entry point expected by exec.c,
5122    and implement the internal function we declared earlier.  */
5123 
5124 static void tcg_register_jit_int(const void *buf, size_t size,
5125                                  const void *debug_frame,
5126                                  size_t debug_frame_size)
5127 {
5128 }
5129 
5130 void tcg_register_jit(const void *buf, size_t buf_size)
5131 {
5132 }
5133 #endif /* ELF_HOST_MACHINE */
5134 
5135 #if !TCG_TARGET_MAYBE_vec
5136 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5137 {
5138     g_assert_not_reached();
5139 }
5140 #endif
5141