xref: /openbmc/qemu/tcg/tcg.c (revision 64c9a921)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41    CPU definitions. Currently they are used for qemu_ld/st
42    instructions */
43 #define NO_CPU_IO_DEFS
44 #include "cpu.h"
45 
46 #include "exec/exec-all.h"
47 
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
50 #endif
51 
52 #include "tcg/tcg-op.h"
53 
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS  ELFCLASS32
56 #else
57 # define ELF_CLASS  ELFCLASS64
58 #endif
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA   ELFDATA2MSB
61 #else
62 # define ELF_DATA   ELFDATA2LSB
63 #endif
64 
65 #include "elf.h"
66 #include "exec/log.h"
67 #include "sysemu/sysemu.h"
68 
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70    used here. */
71 static void tcg_target_init(TCGContext *s);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(const void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
106                        intptr_t arg2);
107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
108 static void tcg_out_movi(TCGContext *s, TCGType type,
109                          TCGReg ret, tcg_target_long arg);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
111                        const int *const_args);
112 #if TCG_TARGET_MAYBE_vec
113 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
114                             TCGReg dst, TCGReg src);
115 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
116                              TCGReg dst, TCGReg base, intptr_t offset);
117 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, int64_t arg);
119 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
120                            unsigned vece, const TCGArg *args,
121                            const int *const_args);
122 #else
123 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
124                                    TCGReg dst, TCGReg src)
125 {
126     g_assert_not_reached();
127 }
128 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
129                                     TCGReg dst, TCGReg base, intptr_t offset)
130 {
131     g_assert_not_reached();
132 }
133 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
134                                     TCGReg dst, int64_t arg)
135 {
136     g_assert_not_reached();
137 }
138 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
139                                   unsigned vece, const TCGArg *args,
140                                   const int *const_args)
141 {
142     g_assert_not_reached();
143 }
144 #endif
145 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
146                        intptr_t arg2);
147 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
148                         TCGReg base, intptr_t ofs);
149 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
150 static int tcg_target_const_match(tcg_target_long val, TCGType type,
151                                   const TCGArgConstraint *arg_ct);
152 #ifdef TCG_TARGET_NEED_LDST_LABELS
153 static int tcg_out_ldst_finalize(TCGContext *s);
154 #endif
155 
156 #define TCG_HIGHWATER 1024
157 
158 static TCGContext **tcg_ctxs;
159 static unsigned int n_tcg_ctxs;
160 TCGv_env cpu_env = 0;
161 const void *tcg_code_gen_epilogue;
162 uintptr_t tcg_splitwx_diff;
163 
164 #ifndef CONFIG_TCG_INTERPRETER
165 tcg_prologue_fn *tcg_qemu_tb_exec;
166 #endif
167 
168 struct tcg_region_tree {
169     QemuMutex lock;
170     GTree *tree;
171     /* padding to avoid false sharing is computed at run-time */
172 };
173 
174 /*
175  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
176  * dynamically allocate from as demand dictates. Given appropriate region
177  * sizing, this minimizes flushes even when some TCG threads generate a lot
178  * more code than others.
179  */
180 struct tcg_region_state {
181     QemuMutex lock;
182 
183     /* fields set at init time */
184     void *start;
185     void *start_aligned;
186     void *end;
187     size_t n;
188     size_t size; /* size of one region */
189     size_t stride; /* .size + guard size */
190 
191     /* fields protected by the lock */
192     size_t current; /* current region index */
193     size_t agg_size_full; /* aggregate size of full regions */
194 };
195 
196 static struct tcg_region_state region;
197 /*
198  * This is an array of struct tcg_region_tree's, with padding.
199  * We use void * to simplify the computation of region_trees[i]; each
200  * struct is found every tree_size bytes.
201  */
202 static void *region_trees;
203 static size_t tree_size;
204 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
205 static TCGRegSet tcg_target_call_clobber_regs;
206 
207 #if TCG_TARGET_INSN_UNIT_SIZE == 1
208 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
209 {
210     *s->code_ptr++ = v;
211 }
212 
213 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
214                                                       uint8_t v)
215 {
216     *p = v;
217 }
218 #endif
219 
220 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
221 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
222 {
223     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
224         *s->code_ptr++ = v;
225     } else {
226         tcg_insn_unit *p = s->code_ptr;
227         memcpy(p, &v, sizeof(v));
228         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
229     }
230 }
231 
232 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
233                                                        uint16_t v)
234 {
235     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
236         *p = v;
237     } else {
238         memcpy(p, &v, sizeof(v));
239     }
240 }
241 #endif
242 
243 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
244 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
245 {
246     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
247         *s->code_ptr++ = v;
248     } else {
249         tcg_insn_unit *p = s->code_ptr;
250         memcpy(p, &v, sizeof(v));
251         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
252     }
253 }
254 
255 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
256                                                        uint32_t v)
257 {
258     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
259         *p = v;
260     } else {
261         memcpy(p, &v, sizeof(v));
262     }
263 }
264 #endif
265 
266 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
267 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
268 {
269     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
270         *s->code_ptr++ = v;
271     } else {
272         tcg_insn_unit *p = s->code_ptr;
273         memcpy(p, &v, sizeof(v));
274         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
275     }
276 }
277 
278 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
279                                                        uint64_t v)
280 {
281     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
282         *p = v;
283     } else {
284         memcpy(p, &v, sizeof(v));
285     }
286 }
287 #endif
288 
289 /* label relocation processing */
290 
291 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
292                           TCGLabel *l, intptr_t addend)
293 {
294     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
295 
296     r->type = type;
297     r->ptr = code_ptr;
298     r->addend = addend;
299     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
300 }
301 
302 static void tcg_out_label(TCGContext *s, TCGLabel *l)
303 {
304     tcg_debug_assert(!l->has_value);
305     l->has_value = 1;
306     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
307 }
308 
309 TCGLabel *gen_new_label(void)
310 {
311     TCGContext *s = tcg_ctx;
312     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
313 
314     memset(l, 0, sizeof(TCGLabel));
315     l->id = s->nb_labels++;
316     QSIMPLEQ_INIT(&l->relocs);
317 
318     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
319 
320     return l;
321 }
322 
323 static bool tcg_resolve_relocs(TCGContext *s)
324 {
325     TCGLabel *l;
326 
327     QSIMPLEQ_FOREACH(l, &s->labels, next) {
328         TCGRelocation *r;
329         uintptr_t value = l->u.value;
330 
331         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
332             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
333                 return false;
334             }
335         }
336     }
337     return true;
338 }
339 
340 static void set_jmp_reset_offset(TCGContext *s, int which)
341 {
342     /*
343      * We will check for overflow at the end of the opcode loop in
344      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
345      */
346     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
347 }
348 
349 #define C_PFX1(P, A)                    P##A
350 #define C_PFX2(P, A, B)                 P##A##_##B
351 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
352 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
353 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
354 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
355 
356 /* Define an enumeration for the various combinations. */
357 
358 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
359 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
360 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
361 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
362 
363 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
364 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
365 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
366 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
367 
368 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
369 
370 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
371 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
372 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
373 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
374 
375 typedef enum {
376 #include "tcg-target-con-set.h"
377 } TCGConstraintSetIndex;
378 
379 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
380 
381 #undef C_O0_I1
382 #undef C_O0_I2
383 #undef C_O0_I3
384 #undef C_O0_I4
385 #undef C_O1_I1
386 #undef C_O1_I2
387 #undef C_O1_I3
388 #undef C_O1_I4
389 #undef C_N1_I2
390 #undef C_O2_I1
391 #undef C_O2_I2
392 #undef C_O2_I3
393 #undef C_O2_I4
394 
395 /* Put all of the constraint sets into an array, indexed by the enum. */
396 
397 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
398 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
399 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
400 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
401 
402 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
403 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
404 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
405 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
406 
407 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
408 
409 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
410 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
411 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
412 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
413 
414 static const TCGTargetOpDef constraint_sets[] = {
415 #include "tcg-target-con-set.h"
416 };
417 
418 
419 #undef C_O0_I1
420 #undef C_O0_I2
421 #undef C_O0_I3
422 #undef C_O0_I4
423 #undef C_O1_I1
424 #undef C_O1_I2
425 #undef C_O1_I3
426 #undef C_O1_I4
427 #undef C_N1_I2
428 #undef C_O2_I1
429 #undef C_O2_I2
430 #undef C_O2_I3
431 #undef C_O2_I4
432 
433 /* Expand the enumerator to be returned from tcg_target_op_def(). */
434 
435 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
436 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
437 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
438 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
439 
440 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
441 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
442 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
443 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
444 
445 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
446 
447 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
448 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
449 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
450 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
451 
452 #include "tcg-target.c.inc"
453 
454 /* compare a pointer @ptr and a tb_tc @s */
455 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
456 {
457     if (ptr >= s->ptr + s->size) {
458         return 1;
459     } else if (ptr < s->ptr) {
460         return -1;
461     }
462     return 0;
463 }
464 
465 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
466 {
467     const struct tb_tc *a = ap;
468     const struct tb_tc *b = bp;
469 
470     /*
471      * When both sizes are set, we know this isn't a lookup.
472      * This is the most likely case: every TB must be inserted; lookups
473      * are a lot less frequent.
474      */
475     if (likely(a->size && b->size)) {
476         if (a->ptr > b->ptr) {
477             return 1;
478         } else if (a->ptr < b->ptr) {
479             return -1;
480         }
481         /* a->ptr == b->ptr should happen only on deletions */
482         g_assert(a->size == b->size);
483         return 0;
484     }
485     /*
486      * All lookups have either .size field set to 0.
487      * From the glib sources we see that @ap is always the lookup key. However
488      * the docs provide no guarantee, so we just mark this case as likely.
489      */
490     if (likely(a->size == 0)) {
491         return ptr_cmp_tb_tc(a->ptr, b);
492     }
493     return ptr_cmp_tb_tc(b->ptr, a);
494 }
495 
496 static void tcg_region_trees_init(void)
497 {
498     size_t i;
499 
500     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
501     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
502     for (i = 0; i < region.n; i++) {
503         struct tcg_region_tree *rt = region_trees + i * tree_size;
504 
505         qemu_mutex_init(&rt->lock);
506         rt->tree = g_tree_new(tb_tc_cmp);
507     }
508 }
509 
510 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
511 {
512     void *p = tcg_splitwx_to_rw(cp);
513     size_t region_idx;
514 
515     if (p < region.start_aligned) {
516         region_idx = 0;
517     } else {
518         ptrdiff_t offset = p - region.start_aligned;
519 
520         if (offset > region.stride * (region.n - 1)) {
521             region_idx = region.n - 1;
522         } else {
523             region_idx = offset / region.stride;
524         }
525     }
526     return region_trees + region_idx * tree_size;
527 }
528 
529 void tcg_tb_insert(TranslationBlock *tb)
530 {
531     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
532 
533     qemu_mutex_lock(&rt->lock);
534     g_tree_insert(rt->tree, &tb->tc, tb);
535     qemu_mutex_unlock(&rt->lock);
536 }
537 
538 void tcg_tb_remove(TranslationBlock *tb)
539 {
540     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
541 
542     qemu_mutex_lock(&rt->lock);
543     g_tree_remove(rt->tree, &tb->tc);
544     qemu_mutex_unlock(&rt->lock);
545 }
546 
547 /*
548  * Find the TB 'tb' such that
549  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
550  * Return NULL if not found.
551  */
552 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
553 {
554     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
555     TranslationBlock *tb;
556     struct tb_tc s = { .ptr = (void *)tc_ptr };
557 
558     qemu_mutex_lock(&rt->lock);
559     tb = g_tree_lookup(rt->tree, &s);
560     qemu_mutex_unlock(&rt->lock);
561     return tb;
562 }
563 
564 static void tcg_region_tree_lock_all(void)
565 {
566     size_t i;
567 
568     for (i = 0; i < region.n; i++) {
569         struct tcg_region_tree *rt = region_trees + i * tree_size;
570 
571         qemu_mutex_lock(&rt->lock);
572     }
573 }
574 
575 static void tcg_region_tree_unlock_all(void)
576 {
577     size_t i;
578 
579     for (i = 0; i < region.n; i++) {
580         struct tcg_region_tree *rt = region_trees + i * tree_size;
581 
582         qemu_mutex_unlock(&rt->lock);
583     }
584 }
585 
586 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
587 {
588     size_t i;
589 
590     tcg_region_tree_lock_all();
591     for (i = 0; i < region.n; i++) {
592         struct tcg_region_tree *rt = region_trees + i * tree_size;
593 
594         g_tree_foreach(rt->tree, func, user_data);
595     }
596     tcg_region_tree_unlock_all();
597 }
598 
599 size_t tcg_nb_tbs(void)
600 {
601     size_t nb_tbs = 0;
602     size_t i;
603 
604     tcg_region_tree_lock_all();
605     for (i = 0; i < region.n; i++) {
606         struct tcg_region_tree *rt = region_trees + i * tree_size;
607 
608         nb_tbs += g_tree_nnodes(rt->tree);
609     }
610     tcg_region_tree_unlock_all();
611     return nb_tbs;
612 }
613 
614 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
615 {
616     TranslationBlock *tb = v;
617 
618     tb_destroy(tb);
619     return FALSE;
620 }
621 
622 static void tcg_region_tree_reset_all(void)
623 {
624     size_t i;
625 
626     tcg_region_tree_lock_all();
627     for (i = 0; i < region.n; i++) {
628         struct tcg_region_tree *rt = region_trees + i * tree_size;
629 
630         g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
631         /* Increment the refcount first so that destroy acts as a reset */
632         g_tree_ref(rt->tree);
633         g_tree_destroy(rt->tree);
634     }
635     tcg_region_tree_unlock_all();
636 }
637 
638 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
639 {
640     void *start, *end;
641 
642     start = region.start_aligned + curr_region * region.stride;
643     end = start + region.size;
644 
645     if (curr_region == 0) {
646         start = region.start;
647     }
648     if (curr_region == region.n - 1) {
649         end = region.end;
650     }
651 
652     *pstart = start;
653     *pend = end;
654 }
655 
656 static void tcg_region_assign(TCGContext *s, size_t curr_region)
657 {
658     void *start, *end;
659 
660     tcg_region_bounds(curr_region, &start, &end);
661 
662     s->code_gen_buffer = start;
663     s->code_gen_ptr = start;
664     s->code_gen_buffer_size = end - start;
665     s->code_gen_highwater = end - TCG_HIGHWATER;
666 }
667 
668 static bool tcg_region_alloc__locked(TCGContext *s)
669 {
670     if (region.current == region.n) {
671         return true;
672     }
673     tcg_region_assign(s, region.current);
674     region.current++;
675     return false;
676 }
677 
678 /*
679  * Request a new region once the one in use has filled up.
680  * Returns true on error.
681  */
682 static bool tcg_region_alloc(TCGContext *s)
683 {
684     bool err;
685     /* read the region size now; alloc__locked will overwrite it on success */
686     size_t size_full = s->code_gen_buffer_size;
687 
688     qemu_mutex_lock(&region.lock);
689     err = tcg_region_alloc__locked(s);
690     if (!err) {
691         region.agg_size_full += size_full - TCG_HIGHWATER;
692     }
693     qemu_mutex_unlock(&region.lock);
694     return err;
695 }
696 
697 /*
698  * Perform a context's first region allocation.
699  * This function does _not_ increment region.agg_size_full.
700  */
701 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
702 {
703     return tcg_region_alloc__locked(s);
704 }
705 
706 /* Call from a safe-work context */
707 void tcg_region_reset_all(void)
708 {
709     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
710     unsigned int i;
711 
712     qemu_mutex_lock(&region.lock);
713     region.current = 0;
714     region.agg_size_full = 0;
715 
716     for (i = 0; i < n_ctxs; i++) {
717         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
718         bool err = tcg_region_initial_alloc__locked(s);
719 
720         g_assert(!err);
721     }
722     qemu_mutex_unlock(&region.lock);
723 
724     tcg_region_tree_reset_all();
725 }
726 
727 #ifdef CONFIG_USER_ONLY
728 static size_t tcg_n_regions(void)
729 {
730     return 1;
731 }
732 #else
733 /*
734  * It is likely that some vCPUs will translate more code than others, so we
735  * first try to set more regions than max_cpus, with those regions being of
736  * reasonable size. If that's not possible we make do by evenly dividing
737  * the code_gen_buffer among the vCPUs.
738  */
739 static size_t tcg_n_regions(void)
740 {
741     size_t i;
742 
743     /* Use a single region if all we have is one vCPU thread */
744 #if !defined(CONFIG_USER_ONLY)
745     MachineState *ms = MACHINE(qdev_get_machine());
746     unsigned int max_cpus = ms->smp.max_cpus;
747 #endif
748     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
749         return 1;
750     }
751 
752     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
753     for (i = 8; i > 0; i--) {
754         size_t regions_per_thread = i;
755         size_t region_size;
756 
757         region_size = tcg_init_ctx.code_gen_buffer_size;
758         region_size /= max_cpus * regions_per_thread;
759 
760         if (region_size >= 2 * 1024u * 1024) {
761             return max_cpus * regions_per_thread;
762         }
763     }
764     /* If we can't, then just allocate one region per vCPU thread */
765     return max_cpus;
766 }
767 #endif
768 
769 /*
770  * Initializes region partitioning.
771  *
772  * Called at init time from the parent thread (i.e. the one calling
773  * tcg_context_init), after the target's TCG globals have been set.
774  *
775  * Region partitioning works by splitting code_gen_buffer into separate regions,
776  * and then assigning regions to TCG threads so that the threads can translate
777  * code in parallel without synchronization.
778  *
779  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
780  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
781  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
782  * must have been parsed before calling this function, since it calls
783  * qemu_tcg_mttcg_enabled().
784  *
785  * In user-mode we use a single region.  Having multiple regions in user-mode
786  * is not supported, because the number of vCPU threads (recall that each thread
787  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
788  * OS, and usually this number is huge (tens of thousands is not uncommon).
789  * Thus, given this large bound on the number of vCPU threads and the fact
790  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
791  * that the availability of at least one region per vCPU thread.
792  *
793  * However, this user-mode limitation is unlikely to be a significant problem
794  * in practice. Multi-threaded guests share most if not all of their translated
795  * code, which makes parallel code generation less appealing than in softmmu.
796  */
797 void tcg_region_init(void)
798 {
799     void *buf = tcg_init_ctx.code_gen_buffer;
800     void *aligned;
801     size_t size = tcg_init_ctx.code_gen_buffer_size;
802     size_t page_size = qemu_real_host_page_size;
803     size_t region_size;
804     size_t n_regions;
805     size_t i;
806     uintptr_t splitwx_diff;
807 
808     n_regions = tcg_n_regions();
809 
810     /* The first region will be 'aligned - buf' bytes larger than the others */
811     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
812     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
813     /*
814      * Make region_size a multiple of page_size, using aligned as the start.
815      * As a result of this we might end up with a few extra pages at the end of
816      * the buffer; we will assign those to the last region.
817      */
818     region_size = (size - (aligned - buf)) / n_regions;
819     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
820 
821     /* A region must have at least 2 pages; one code, one guard */
822     g_assert(region_size >= 2 * page_size);
823 
824     /* init the region struct */
825     qemu_mutex_init(&region.lock);
826     region.n = n_regions;
827     region.size = region_size - page_size;
828     region.stride = region_size;
829     region.start = buf;
830     region.start_aligned = aligned;
831     /* page-align the end, since its last page will be a guard page */
832     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
833     /* account for that last guard page */
834     region.end -= page_size;
835 
836     /* set guard pages */
837     splitwx_diff = tcg_splitwx_diff;
838     for (i = 0; i < region.n; i++) {
839         void *start, *end;
840         int rc;
841 
842         tcg_region_bounds(i, &start, &end);
843         rc = qemu_mprotect_none(end, page_size);
844         g_assert(!rc);
845         if (splitwx_diff) {
846             rc = qemu_mprotect_none(end + splitwx_diff, page_size);
847             g_assert(!rc);
848         }
849     }
850 
851     tcg_region_trees_init();
852 
853     /* In user-mode we support only one ctx, so do the initial allocation now */
854 #ifdef CONFIG_USER_ONLY
855     {
856         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
857 
858         g_assert(!err);
859     }
860 #endif
861 }
862 
863 #ifdef CONFIG_DEBUG_TCG
864 const void *tcg_splitwx_to_rx(void *rw)
865 {
866     /* Pass NULL pointers unchanged. */
867     if (rw) {
868         g_assert(in_code_gen_buffer(rw));
869         rw += tcg_splitwx_diff;
870     }
871     return rw;
872 }
873 
874 void *tcg_splitwx_to_rw(const void *rx)
875 {
876     /* Pass NULL pointers unchanged. */
877     if (rx) {
878         rx -= tcg_splitwx_diff;
879         /* Assert that we end with a pointer in the rw region. */
880         g_assert(in_code_gen_buffer(rx));
881     }
882     return (void *)rx;
883 }
884 #endif /* CONFIG_DEBUG_TCG */
885 
886 static void alloc_tcg_plugin_context(TCGContext *s)
887 {
888 #ifdef CONFIG_PLUGIN
889     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
890     s->plugin_tb->insns =
891         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
892 #endif
893 }
894 
895 /*
896  * All TCG threads except the parent (i.e. the one that called tcg_context_init
897  * and registered the target's TCG globals) must register with this function
898  * before initiating translation.
899  *
900  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
901  * of tcg_region_init() for the reasoning behind this.
902  *
903  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
904  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
905  * is not used anymore for translation once this function is called.
906  *
907  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
908  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
909  */
910 #ifdef CONFIG_USER_ONLY
911 void tcg_register_thread(void)
912 {
913     tcg_ctx = &tcg_init_ctx;
914 }
915 #else
916 void tcg_register_thread(void)
917 {
918     MachineState *ms = MACHINE(qdev_get_machine());
919     TCGContext *s = g_malloc(sizeof(*s));
920     unsigned int i, n;
921     bool err;
922 
923     *s = tcg_init_ctx;
924 
925     /* Relink mem_base.  */
926     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
927         if (tcg_init_ctx.temps[i].mem_base) {
928             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
929             tcg_debug_assert(b >= 0 && b < n);
930             s->temps[i].mem_base = &s->temps[b];
931         }
932     }
933 
934     /* Claim an entry in tcg_ctxs */
935     n = qatomic_fetch_inc(&n_tcg_ctxs);
936     g_assert(n < ms->smp.max_cpus);
937     qatomic_set(&tcg_ctxs[n], s);
938 
939     if (n > 0) {
940         alloc_tcg_plugin_context(s);
941     }
942 
943     tcg_ctx = s;
944     qemu_mutex_lock(&region.lock);
945     err = tcg_region_initial_alloc__locked(tcg_ctx);
946     g_assert(!err);
947     qemu_mutex_unlock(&region.lock);
948 }
949 #endif /* !CONFIG_USER_ONLY */
950 
951 /*
952  * Returns the size (in bytes) of all translated code (i.e. from all regions)
953  * currently in the cache.
954  * See also: tcg_code_capacity()
955  * Do not confuse with tcg_current_code_size(); that one applies to a single
956  * TCG context.
957  */
958 size_t tcg_code_size(void)
959 {
960     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
961     unsigned int i;
962     size_t total;
963 
964     qemu_mutex_lock(&region.lock);
965     total = region.agg_size_full;
966     for (i = 0; i < n_ctxs; i++) {
967         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
968         size_t size;
969 
970         size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
971         g_assert(size <= s->code_gen_buffer_size);
972         total += size;
973     }
974     qemu_mutex_unlock(&region.lock);
975     return total;
976 }
977 
978 /*
979  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
980  * regions.
981  * See also: tcg_code_size()
982  */
983 size_t tcg_code_capacity(void)
984 {
985     size_t guard_size, capacity;
986 
987     /* no need for synchronization; these variables are set at init time */
988     guard_size = region.stride - region.size;
989     capacity = region.end + guard_size - region.start;
990     capacity -= region.n * (guard_size + TCG_HIGHWATER);
991     return capacity;
992 }
993 
994 size_t tcg_tb_phys_invalidate_count(void)
995 {
996     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
997     unsigned int i;
998     size_t total = 0;
999 
1000     for (i = 0; i < n_ctxs; i++) {
1001         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
1002 
1003         total += qatomic_read(&s->tb_phys_invalidate_count);
1004     }
1005     return total;
1006 }
1007 
1008 /* pool based memory allocation */
1009 void *tcg_malloc_internal(TCGContext *s, int size)
1010 {
1011     TCGPool *p;
1012     int pool_size;
1013 
1014     if (size > TCG_POOL_CHUNK_SIZE) {
1015         /* big malloc: insert a new pool (XXX: could optimize) */
1016         p = g_malloc(sizeof(TCGPool) + size);
1017         p->size = size;
1018         p->next = s->pool_first_large;
1019         s->pool_first_large = p;
1020         return p->data;
1021     } else {
1022         p = s->pool_current;
1023         if (!p) {
1024             p = s->pool_first;
1025             if (!p)
1026                 goto new_pool;
1027         } else {
1028             if (!p->next) {
1029             new_pool:
1030                 pool_size = TCG_POOL_CHUNK_SIZE;
1031                 p = g_malloc(sizeof(TCGPool) + pool_size);
1032                 p->size = pool_size;
1033                 p->next = NULL;
1034                 if (s->pool_current)
1035                     s->pool_current->next = p;
1036                 else
1037                     s->pool_first = p;
1038             } else {
1039                 p = p->next;
1040             }
1041         }
1042     }
1043     s->pool_current = p;
1044     s->pool_cur = p->data + size;
1045     s->pool_end = p->data + p->size;
1046     return p->data;
1047 }
1048 
1049 void tcg_pool_reset(TCGContext *s)
1050 {
1051     TCGPool *p, *t;
1052     for (p = s->pool_first_large; p; p = t) {
1053         t = p->next;
1054         g_free(p);
1055     }
1056     s->pool_first_large = NULL;
1057     s->pool_cur = s->pool_end = NULL;
1058     s->pool_current = NULL;
1059 }
1060 
1061 typedef struct TCGHelperInfo {
1062     void *func;
1063     const char *name;
1064     unsigned flags;
1065     unsigned sizemask;
1066 } TCGHelperInfo;
1067 
1068 #include "exec/helper-proto.h"
1069 
1070 static const TCGHelperInfo all_helpers[] = {
1071 #include "exec/helper-tcg.h"
1072 };
1073 static GHashTable *helper_table;
1074 
1075 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1076 static void process_op_defs(TCGContext *s);
1077 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1078                                             TCGReg reg, const char *name);
1079 
1080 void tcg_context_init(TCGContext *s)
1081 {
1082     int op, total_args, n, i;
1083     TCGOpDef *def;
1084     TCGArgConstraint *args_ct;
1085     TCGTemp *ts;
1086 
1087     memset(s, 0, sizeof(*s));
1088     s->nb_globals = 0;
1089 
1090     /* Count total number of arguments and allocate the corresponding
1091        space */
1092     total_args = 0;
1093     for(op = 0; op < NB_OPS; op++) {
1094         def = &tcg_op_defs[op];
1095         n = def->nb_iargs + def->nb_oargs;
1096         total_args += n;
1097     }
1098 
1099     args_ct = g_new0(TCGArgConstraint, total_args);
1100 
1101     for(op = 0; op < NB_OPS; op++) {
1102         def = &tcg_op_defs[op];
1103         def->args_ct = args_ct;
1104         n = def->nb_iargs + def->nb_oargs;
1105         args_ct += n;
1106     }
1107 
1108     /* Register helpers.  */
1109     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1110     helper_table = g_hash_table_new(NULL, NULL);
1111 
1112     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1113         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1114                             (gpointer)&all_helpers[i]);
1115     }
1116 
1117     tcg_target_init(s);
1118     process_op_defs(s);
1119 
1120     /* Reverse the order of the saved registers, assuming they're all at
1121        the start of tcg_target_reg_alloc_order.  */
1122     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1123         int r = tcg_target_reg_alloc_order[n];
1124         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1125             break;
1126         }
1127     }
1128     for (i = 0; i < n; ++i) {
1129         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1130     }
1131     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1132         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1133     }
1134 
1135     alloc_tcg_plugin_context(s);
1136 
1137     tcg_ctx = s;
1138     /*
1139      * In user-mode we simply share the init context among threads, since we
1140      * use a single region. See the documentation tcg_region_init() for the
1141      * reasoning behind this.
1142      * In softmmu we will have at most max_cpus TCG threads.
1143      */
1144 #ifdef CONFIG_USER_ONLY
1145     tcg_ctxs = &tcg_ctx;
1146     n_tcg_ctxs = 1;
1147 #else
1148     MachineState *ms = MACHINE(qdev_get_machine());
1149     unsigned int max_cpus = ms->smp.max_cpus;
1150     tcg_ctxs = g_new(TCGContext *, max_cpus);
1151 #endif
1152 
1153     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1154     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1155     cpu_env = temp_tcgv_ptr(ts);
1156 }
1157 
1158 /*
1159  * Allocate TBs right before their corresponding translated code, making
1160  * sure that TBs and code are on different cache lines.
1161  */
1162 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1163 {
1164     uintptr_t align = qemu_icache_linesize;
1165     TranslationBlock *tb;
1166     void *next;
1167 
1168  retry:
1169     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1170     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1171 
1172     if (unlikely(next > s->code_gen_highwater)) {
1173         if (tcg_region_alloc(s)) {
1174             return NULL;
1175         }
1176         goto retry;
1177     }
1178     qatomic_set(&s->code_gen_ptr, next);
1179     s->data_gen_ptr = NULL;
1180     return tb;
1181 }
1182 
1183 void tcg_prologue_init(TCGContext *s)
1184 {
1185     size_t prologue_size, total_size;
1186     void *buf0, *buf1;
1187 
1188     /* Put the prologue at the beginning of code_gen_buffer.  */
1189     buf0 = s->code_gen_buffer;
1190     total_size = s->code_gen_buffer_size;
1191     s->code_ptr = buf0;
1192     s->code_buf = buf0;
1193     s->data_gen_ptr = NULL;
1194 
1195     /*
1196      * The region trees are not yet configured, but tcg_splitwx_to_rx
1197      * needs the bounds for an assert.
1198      */
1199     region.start = buf0;
1200     region.end = buf0 + total_size;
1201 
1202 #ifndef CONFIG_TCG_INTERPRETER
1203     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1204 #endif
1205 
1206     /* Compute a high-water mark, at which we voluntarily flush the buffer
1207        and start over.  The size here is arbitrary, significantly larger
1208        than we expect the code generation for any one opcode to require.  */
1209     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1210 
1211 #ifdef TCG_TARGET_NEED_POOL_LABELS
1212     s->pool_labels = NULL;
1213 #endif
1214 
1215     qemu_thread_jit_write();
1216     /* Generate the prologue.  */
1217     tcg_target_qemu_prologue(s);
1218 
1219 #ifdef TCG_TARGET_NEED_POOL_LABELS
1220     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1221     {
1222         int result = tcg_out_pool_finalize(s);
1223         tcg_debug_assert(result == 0);
1224     }
1225 #endif
1226 
1227     buf1 = s->code_ptr;
1228 #ifndef CONFIG_TCG_INTERPRETER
1229     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1230                         tcg_ptr_byte_diff(buf1, buf0));
1231 #endif
1232 
1233     /* Deduct the prologue from the buffer.  */
1234     prologue_size = tcg_current_code_size(s);
1235     s->code_gen_ptr = buf1;
1236     s->code_gen_buffer = buf1;
1237     s->code_buf = buf1;
1238     total_size -= prologue_size;
1239     s->code_gen_buffer_size = total_size;
1240 
1241     tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1242 
1243 #ifdef DEBUG_DISAS
1244     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1245         FILE *logfile = qemu_log_lock();
1246         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1247         if (s->data_gen_ptr) {
1248             size_t code_size = s->data_gen_ptr - buf0;
1249             size_t data_size = prologue_size - code_size;
1250             size_t i;
1251 
1252             log_disas(buf0, code_size);
1253 
1254             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1255                 if (sizeof(tcg_target_ulong) == 8) {
1256                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1257                              (uintptr_t)s->data_gen_ptr + i,
1258                              *(uint64_t *)(s->data_gen_ptr + i));
1259                 } else {
1260                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1261                              (uintptr_t)s->data_gen_ptr + i,
1262                              *(uint32_t *)(s->data_gen_ptr + i));
1263                 }
1264             }
1265         } else {
1266             log_disas(buf0, prologue_size);
1267         }
1268         qemu_log("\n");
1269         qemu_log_flush();
1270         qemu_log_unlock(logfile);
1271     }
1272 #endif
1273 
1274     /* Assert that goto_ptr is implemented completely.  */
1275     if (TCG_TARGET_HAS_goto_ptr) {
1276         tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1277     }
1278 }
1279 
1280 void tcg_func_start(TCGContext *s)
1281 {
1282     tcg_pool_reset(s);
1283     s->nb_temps = s->nb_globals;
1284 
1285     /* No temps have been previously allocated for size or locality.  */
1286     memset(s->free_temps, 0, sizeof(s->free_temps));
1287 
1288     /* No constant temps have been previously allocated. */
1289     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1290         if (s->const_table[i]) {
1291             g_hash_table_remove_all(s->const_table[i]);
1292         }
1293     }
1294 
1295     s->nb_ops = 0;
1296     s->nb_labels = 0;
1297     s->current_frame_offset = s->frame_start;
1298 
1299 #ifdef CONFIG_DEBUG_TCG
1300     s->goto_tb_issue_mask = 0;
1301 #endif
1302 
1303     QTAILQ_INIT(&s->ops);
1304     QTAILQ_INIT(&s->free_ops);
1305     QSIMPLEQ_INIT(&s->labels);
1306 }
1307 
1308 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1309 {
1310     int n = s->nb_temps++;
1311 
1312     if (n >= TCG_MAX_TEMPS) {
1313         /* Signal overflow, starting over with fewer guest insns. */
1314         siglongjmp(s->jmp_trans, -2);
1315     }
1316     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1317 }
1318 
1319 static TCGTemp *tcg_global_alloc(TCGContext *s)
1320 {
1321     TCGTemp *ts;
1322 
1323     tcg_debug_assert(s->nb_globals == s->nb_temps);
1324     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1325     s->nb_globals++;
1326     ts = tcg_temp_alloc(s);
1327     ts->kind = TEMP_GLOBAL;
1328 
1329     return ts;
1330 }
1331 
1332 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1333                                             TCGReg reg, const char *name)
1334 {
1335     TCGTemp *ts;
1336 
1337     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1338         tcg_abort();
1339     }
1340 
1341     ts = tcg_global_alloc(s);
1342     ts->base_type = type;
1343     ts->type = type;
1344     ts->kind = TEMP_FIXED;
1345     ts->reg = reg;
1346     ts->name = name;
1347     tcg_regset_set_reg(s->reserved_regs, reg);
1348 
1349     return ts;
1350 }
1351 
1352 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1353 {
1354     s->frame_start = start;
1355     s->frame_end = start + size;
1356     s->frame_temp
1357         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1358 }
1359 
1360 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1361                                      intptr_t offset, const char *name)
1362 {
1363     TCGContext *s = tcg_ctx;
1364     TCGTemp *base_ts = tcgv_ptr_temp(base);
1365     TCGTemp *ts = tcg_global_alloc(s);
1366     int indirect_reg = 0, bigendian = 0;
1367 #ifdef HOST_WORDS_BIGENDIAN
1368     bigendian = 1;
1369 #endif
1370 
1371     switch (base_ts->kind) {
1372     case TEMP_FIXED:
1373         break;
1374     case TEMP_GLOBAL:
1375         /* We do not support double-indirect registers.  */
1376         tcg_debug_assert(!base_ts->indirect_reg);
1377         base_ts->indirect_base = 1;
1378         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1379                             ? 2 : 1);
1380         indirect_reg = 1;
1381         break;
1382     default:
1383         g_assert_not_reached();
1384     }
1385 
1386     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1387         TCGTemp *ts2 = tcg_global_alloc(s);
1388         char buf[64];
1389 
1390         ts->base_type = TCG_TYPE_I64;
1391         ts->type = TCG_TYPE_I32;
1392         ts->indirect_reg = indirect_reg;
1393         ts->mem_allocated = 1;
1394         ts->mem_base = base_ts;
1395         ts->mem_offset = offset + bigendian * 4;
1396         pstrcpy(buf, sizeof(buf), name);
1397         pstrcat(buf, sizeof(buf), "_0");
1398         ts->name = strdup(buf);
1399 
1400         tcg_debug_assert(ts2 == ts + 1);
1401         ts2->base_type = TCG_TYPE_I64;
1402         ts2->type = TCG_TYPE_I32;
1403         ts2->indirect_reg = indirect_reg;
1404         ts2->mem_allocated = 1;
1405         ts2->mem_base = base_ts;
1406         ts2->mem_offset = offset + (1 - bigendian) * 4;
1407         pstrcpy(buf, sizeof(buf), name);
1408         pstrcat(buf, sizeof(buf), "_1");
1409         ts2->name = strdup(buf);
1410     } else {
1411         ts->base_type = type;
1412         ts->type = type;
1413         ts->indirect_reg = indirect_reg;
1414         ts->mem_allocated = 1;
1415         ts->mem_base = base_ts;
1416         ts->mem_offset = offset;
1417         ts->name = name;
1418     }
1419     return ts;
1420 }
1421 
1422 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1423 {
1424     TCGContext *s = tcg_ctx;
1425     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1426     TCGTemp *ts;
1427     int idx, k;
1428 
1429     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1430     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1431     if (idx < TCG_MAX_TEMPS) {
1432         /* There is already an available temp with the right type.  */
1433         clear_bit(idx, s->free_temps[k].l);
1434 
1435         ts = &s->temps[idx];
1436         ts->temp_allocated = 1;
1437         tcg_debug_assert(ts->base_type == type);
1438         tcg_debug_assert(ts->kind == kind);
1439     } else {
1440         ts = tcg_temp_alloc(s);
1441         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1442             TCGTemp *ts2 = tcg_temp_alloc(s);
1443 
1444             ts->base_type = type;
1445             ts->type = TCG_TYPE_I32;
1446             ts->temp_allocated = 1;
1447             ts->kind = kind;
1448 
1449             tcg_debug_assert(ts2 == ts + 1);
1450             ts2->base_type = TCG_TYPE_I64;
1451             ts2->type = TCG_TYPE_I32;
1452             ts2->temp_allocated = 1;
1453             ts2->kind = kind;
1454         } else {
1455             ts->base_type = type;
1456             ts->type = type;
1457             ts->temp_allocated = 1;
1458             ts->kind = kind;
1459         }
1460     }
1461 
1462 #if defined(CONFIG_DEBUG_TCG)
1463     s->temps_in_use++;
1464 #endif
1465     return ts;
1466 }
1467 
1468 TCGv_vec tcg_temp_new_vec(TCGType type)
1469 {
1470     TCGTemp *t;
1471 
1472 #ifdef CONFIG_DEBUG_TCG
1473     switch (type) {
1474     case TCG_TYPE_V64:
1475         assert(TCG_TARGET_HAS_v64);
1476         break;
1477     case TCG_TYPE_V128:
1478         assert(TCG_TARGET_HAS_v128);
1479         break;
1480     case TCG_TYPE_V256:
1481         assert(TCG_TARGET_HAS_v256);
1482         break;
1483     default:
1484         g_assert_not_reached();
1485     }
1486 #endif
1487 
1488     t = tcg_temp_new_internal(type, 0);
1489     return temp_tcgv_vec(t);
1490 }
1491 
1492 /* Create a new temp of the same type as an existing temp.  */
1493 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1494 {
1495     TCGTemp *t = tcgv_vec_temp(match);
1496 
1497     tcg_debug_assert(t->temp_allocated != 0);
1498 
1499     t = tcg_temp_new_internal(t->base_type, 0);
1500     return temp_tcgv_vec(t);
1501 }
1502 
1503 void tcg_temp_free_internal(TCGTemp *ts)
1504 {
1505     TCGContext *s = tcg_ctx;
1506     int k, idx;
1507 
1508     /* In order to simplify users of tcg_constant_*, silently ignore free. */
1509     if (ts->kind == TEMP_CONST) {
1510         return;
1511     }
1512 
1513 #if defined(CONFIG_DEBUG_TCG)
1514     s->temps_in_use--;
1515     if (s->temps_in_use < 0) {
1516         fprintf(stderr, "More temporaries freed than allocated!\n");
1517     }
1518 #endif
1519 
1520     tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1521     tcg_debug_assert(ts->temp_allocated != 0);
1522     ts->temp_allocated = 0;
1523 
1524     idx = temp_idx(ts);
1525     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1526     set_bit(idx, s->free_temps[k].l);
1527 }
1528 
1529 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1530 {
1531     TCGContext *s = tcg_ctx;
1532     GHashTable *h = s->const_table[type];
1533     TCGTemp *ts;
1534 
1535     if (h == NULL) {
1536         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1537         s->const_table[type] = h;
1538     }
1539 
1540     ts = g_hash_table_lookup(h, &val);
1541     if (ts == NULL) {
1542         ts = tcg_temp_alloc(s);
1543 
1544         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1545             TCGTemp *ts2 = tcg_temp_alloc(s);
1546 
1547             ts->base_type = TCG_TYPE_I64;
1548             ts->type = TCG_TYPE_I32;
1549             ts->kind = TEMP_CONST;
1550             ts->temp_allocated = 1;
1551             /*
1552              * Retain the full value of the 64-bit constant in the low
1553              * part, so that the hash table works.  Actual uses will
1554              * truncate the value to the low part.
1555              */
1556             ts->val = val;
1557 
1558             tcg_debug_assert(ts2 == ts + 1);
1559             ts2->base_type = TCG_TYPE_I64;
1560             ts2->type = TCG_TYPE_I32;
1561             ts2->kind = TEMP_CONST;
1562             ts2->temp_allocated = 1;
1563             ts2->val = val >> 32;
1564         } else {
1565             ts->base_type = type;
1566             ts->type = type;
1567             ts->kind = TEMP_CONST;
1568             ts->temp_allocated = 1;
1569             ts->val = val;
1570         }
1571         g_hash_table_insert(h, &ts->val, ts);
1572     }
1573 
1574     return ts;
1575 }
1576 
1577 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1578 {
1579     val = dup_const(vece, val);
1580     return temp_tcgv_vec(tcg_constant_internal(type, val));
1581 }
1582 
1583 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1584 {
1585     TCGTemp *t = tcgv_vec_temp(match);
1586 
1587     tcg_debug_assert(t->temp_allocated != 0);
1588     return tcg_constant_vec(t->base_type, vece, val);
1589 }
1590 
1591 TCGv_i32 tcg_const_i32(int32_t val)
1592 {
1593     TCGv_i32 t0;
1594     t0 = tcg_temp_new_i32();
1595     tcg_gen_movi_i32(t0, val);
1596     return t0;
1597 }
1598 
1599 TCGv_i64 tcg_const_i64(int64_t val)
1600 {
1601     TCGv_i64 t0;
1602     t0 = tcg_temp_new_i64();
1603     tcg_gen_movi_i64(t0, val);
1604     return t0;
1605 }
1606 
1607 TCGv_i32 tcg_const_local_i32(int32_t val)
1608 {
1609     TCGv_i32 t0;
1610     t0 = tcg_temp_local_new_i32();
1611     tcg_gen_movi_i32(t0, val);
1612     return t0;
1613 }
1614 
1615 TCGv_i64 tcg_const_local_i64(int64_t val)
1616 {
1617     TCGv_i64 t0;
1618     t0 = tcg_temp_local_new_i64();
1619     tcg_gen_movi_i64(t0, val);
1620     return t0;
1621 }
1622 
1623 #if defined(CONFIG_DEBUG_TCG)
1624 void tcg_clear_temp_count(void)
1625 {
1626     TCGContext *s = tcg_ctx;
1627     s->temps_in_use = 0;
1628 }
1629 
1630 int tcg_check_temp_count(void)
1631 {
1632     TCGContext *s = tcg_ctx;
1633     if (s->temps_in_use) {
1634         /* Clear the count so that we don't give another
1635          * warning immediately next time around.
1636          */
1637         s->temps_in_use = 0;
1638         return 1;
1639     }
1640     return 0;
1641 }
1642 #endif
1643 
1644 /* Return true if OP may appear in the opcode stream.
1645    Test the runtime variable that controls each opcode.  */
1646 bool tcg_op_supported(TCGOpcode op)
1647 {
1648     const bool have_vec
1649         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1650 
1651     switch (op) {
1652     case INDEX_op_discard:
1653     case INDEX_op_set_label:
1654     case INDEX_op_call:
1655     case INDEX_op_br:
1656     case INDEX_op_mb:
1657     case INDEX_op_insn_start:
1658     case INDEX_op_exit_tb:
1659     case INDEX_op_goto_tb:
1660     case INDEX_op_qemu_ld_i32:
1661     case INDEX_op_qemu_st_i32:
1662     case INDEX_op_qemu_ld_i64:
1663     case INDEX_op_qemu_st_i64:
1664         return true;
1665 
1666     case INDEX_op_qemu_st8_i32:
1667         return TCG_TARGET_HAS_qemu_st8_i32;
1668 
1669     case INDEX_op_goto_ptr:
1670         return TCG_TARGET_HAS_goto_ptr;
1671 
1672     case INDEX_op_mov_i32:
1673     case INDEX_op_setcond_i32:
1674     case INDEX_op_brcond_i32:
1675     case INDEX_op_ld8u_i32:
1676     case INDEX_op_ld8s_i32:
1677     case INDEX_op_ld16u_i32:
1678     case INDEX_op_ld16s_i32:
1679     case INDEX_op_ld_i32:
1680     case INDEX_op_st8_i32:
1681     case INDEX_op_st16_i32:
1682     case INDEX_op_st_i32:
1683     case INDEX_op_add_i32:
1684     case INDEX_op_sub_i32:
1685     case INDEX_op_mul_i32:
1686     case INDEX_op_and_i32:
1687     case INDEX_op_or_i32:
1688     case INDEX_op_xor_i32:
1689     case INDEX_op_shl_i32:
1690     case INDEX_op_shr_i32:
1691     case INDEX_op_sar_i32:
1692         return true;
1693 
1694     case INDEX_op_movcond_i32:
1695         return TCG_TARGET_HAS_movcond_i32;
1696     case INDEX_op_div_i32:
1697     case INDEX_op_divu_i32:
1698         return TCG_TARGET_HAS_div_i32;
1699     case INDEX_op_rem_i32:
1700     case INDEX_op_remu_i32:
1701         return TCG_TARGET_HAS_rem_i32;
1702     case INDEX_op_div2_i32:
1703     case INDEX_op_divu2_i32:
1704         return TCG_TARGET_HAS_div2_i32;
1705     case INDEX_op_rotl_i32:
1706     case INDEX_op_rotr_i32:
1707         return TCG_TARGET_HAS_rot_i32;
1708     case INDEX_op_deposit_i32:
1709         return TCG_TARGET_HAS_deposit_i32;
1710     case INDEX_op_extract_i32:
1711         return TCG_TARGET_HAS_extract_i32;
1712     case INDEX_op_sextract_i32:
1713         return TCG_TARGET_HAS_sextract_i32;
1714     case INDEX_op_extract2_i32:
1715         return TCG_TARGET_HAS_extract2_i32;
1716     case INDEX_op_add2_i32:
1717         return TCG_TARGET_HAS_add2_i32;
1718     case INDEX_op_sub2_i32:
1719         return TCG_TARGET_HAS_sub2_i32;
1720     case INDEX_op_mulu2_i32:
1721         return TCG_TARGET_HAS_mulu2_i32;
1722     case INDEX_op_muls2_i32:
1723         return TCG_TARGET_HAS_muls2_i32;
1724     case INDEX_op_muluh_i32:
1725         return TCG_TARGET_HAS_muluh_i32;
1726     case INDEX_op_mulsh_i32:
1727         return TCG_TARGET_HAS_mulsh_i32;
1728     case INDEX_op_ext8s_i32:
1729         return TCG_TARGET_HAS_ext8s_i32;
1730     case INDEX_op_ext16s_i32:
1731         return TCG_TARGET_HAS_ext16s_i32;
1732     case INDEX_op_ext8u_i32:
1733         return TCG_TARGET_HAS_ext8u_i32;
1734     case INDEX_op_ext16u_i32:
1735         return TCG_TARGET_HAS_ext16u_i32;
1736     case INDEX_op_bswap16_i32:
1737         return TCG_TARGET_HAS_bswap16_i32;
1738     case INDEX_op_bswap32_i32:
1739         return TCG_TARGET_HAS_bswap32_i32;
1740     case INDEX_op_not_i32:
1741         return TCG_TARGET_HAS_not_i32;
1742     case INDEX_op_neg_i32:
1743         return TCG_TARGET_HAS_neg_i32;
1744     case INDEX_op_andc_i32:
1745         return TCG_TARGET_HAS_andc_i32;
1746     case INDEX_op_orc_i32:
1747         return TCG_TARGET_HAS_orc_i32;
1748     case INDEX_op_eqv_i32:
1749         return TCG_TARGET_HAS_eqv_i32;
1750     case INDEX_op_nand_i32:
1751         return TCG_TARGET_HAS_nand_i32;
1752     case INDEX_op_nor_i32:
1753         return TCG_TARGET_HAS_nor_i32;
1754     case INDEX_op_clz_i32:
1755         return TCG_TARGET_HAS_clz_i32;
1756     case INDEX_op_ctz_i32:
1757         return TCG_TARGET_HAS_ctz_i32;
1758     case INDEX_op_ctpop_i32:
1759         return TCG_TARGET_HAS_ctpop_i32;
1760 
1761     case INDEX_op_brcond2_i32:
1762     case INDEX_op_setcond2_i32:
1763         return TCG_TARGET_REG_BITS == 32;
1764 
1765     case INDEX_op_mov_i64:
1766     case INDEX_op_setcond_i64:
1767     case INDEX_op_brcond_i64:
1768     case INDEX_op_ld8u_i64:
1769     case INDEX_op_ld8s_i64:
1770     case INDEX_op_ld16u_i64:
1771     case INDEX_op_ld16s_i64:
1772     case INDEX_op_ld32u_i64:
1773     case INDEX_op_ld32s_i64:
1774     case INDEX_op_ld_i64:
1775     case INDEX_op_st8_i64:
1776     case INDEX_op_st16_i64:
1777     case INDEX_op_st32_i64:
1778     case INDEX_op_st_i64:
1779     case INDEX_op_add_i64:
1780     case INDEX_op_sub_i64:
1781     case INDEX_op_mul_i64:
1782     case INDEX_op_and_i64:
1783     case INDEX_op_or_i64:
1784     case INDEX_op_xor_i64:
1785     case INDEX_op_shl_i64:
1786     case INDEX_op_shr_i64:
1787     case INDEX_op_sar_i64:
1788     case INDEX_op_ext_i32_i64:
1789     case INDEX_op_extu_i32_i64:
1790         return TCG_TARGET_REG_BITS == 64;
1791 
1792     case INDEX_op_movcond_i64:
1793         return TCG_TARGET_HAS_movcond_i64;
1794     case INDEX_op_div_i64:
1795     case INDEX_op_divu_i64:
1796         return TCG_TARGET_HAS_div_i64;
1797     case INDEX_op_rem_i64:
1798     case INDEX_op_remu_i64:
1799         return TCG_TARGET_HAS_rem_i64;
1800     case INDEX_op_div2_i64:
1801     case INDEX_op_divu2_i64:
1802         return TCG_TARGET_HAS_div2_i64;
1803     case INDEX_op_rotl_i64:
1804     case INDEX_op_rotr_i64:
1805         return TCG_TARGET_HAS_rot_i64;
1806     case INDEX_op_deposit_i64:
1807         return TCG_TARGET_HAS_deposit_i64;
1808     case INDEX_op_extract_i64:
1809         return TCG_TARGET_HAS_extract_i64;
1810     case INDEX_op_sextract_i64:
1811         return TCG_TARGET_HAS_sextract_i64;
1812     case INDEX_op_extract2_i64:
1813         return TCG_TARGET_HAS_extract2_i64;
1814     case INDEX_op_extrl_i64_i32:
1815         return TCG_TARGET_HAS_extrl_i64_i32;
1816     case INDEX_op_extrh_i64_i32:
1817         return TCG_TARGET_HAS_extrh_i64_i32;
1818     case INDEX_op_ext8s_i64:
1819         return TCG_TARGET_HAS_ext8s_i64;
1820     case INDEX_op_ext16s_i64:
1821         return TCG_TARGET_HAS_ext16s_i64;
1822     case INDEX_op_ext32s_i64:
1823         return TCG_TARGET_HAS_ext32s_i64;
1824     case INDEX_op_ext8u_i64:
1825         return TCG_TARGET_HAS_ext8u_i64;
1826     case INDEX_op_ext16u_i64:
1827         return TCG_TARGET_HAS_ext16u_i64;
1828     case INDEX_op_ext32u_i64:
1829         return TCG_TARGET_HAS_ext32u_i64;
1830     case INDEX_op_bswap16_i64:
1831         return TCG_TARGET_HAS_bswap16_i64;
1832     case INDEX_op_bswap32_i64:
1833         return TCG_TARGET_HAS_bswap32_i64;
1834     case INDEX_op_bswap64_i64:
1835         return TCG_TARGET_HAS_bswap64_i64;
1836     case INDEX_op_not_i64:
1837         return TCG_TARGET_HAS_not_i64;
1838     case INDEX_op_neg_i64:
1839         return TCG_TARGET_HAS_neg_i64;
1840     case INDEX_op_andc_i64:
1841         return TCG_TARGET_HAS_andc_i64;
1842     case INDEX_op_orc_i64:
1843         return TCG_TARGET_HAS_orc_i64;
1844     case INDEX_op_eqv_i64:
1845         return TCG_TARGET_HAS_eqv_i64;
1846     case INDEX_op_nand_i64:
1847         return TCG_TARGET_HAS_nand_i64;
1848     case INDEX_op_nor_i64:
1849         return TCG_TARGET_HAS_nor_i64;
1850     case INDEX_op_clz_i64:
1851         return TCG_TARGET_HAS_clz_i64;
1852     case INDEX_op_ctz_i64:
1853         return TCG_TARGET_HAS_ctz_i64;
1854     case INDEX_op_ctpop_i64:
1855         return TCG_TARGET_HAS_ctpop_i64;
1856     case INDEX_op_add2_i64:
1857         return TCG_TARGET_HAS_add2_i64;
1858     case INDEX_op_sub2_i64:
1859         return TCG_TARGET_HAS_sub2_i64;
1860     case INDEX_op_mulu2_i64:
1861         return TCG_TARGET_HAS_mulu2_i64;
1862     case INDEX_op_muls2_i64:
1863         return TCG_TARGET_HAS_muls2_i64;
1864     case INDEX_op_muluh_i64:
1865         return TCG_TARGET_HAS_muluh_i64;
1866     case INDEX_op_mulsh_i64:
1867         return TCG_TARGET_HAS_mulsh_i64;
1868 
1869     case INDEX_op_mov_vec:
1870     case INDEX_op_dup_vec:
1871     case INDEX_op_dupm_vec:
1872     case INDEX_op_ld_vec:
1873     case INDEX_op_st_vec:
1874     case INDEX_op_add_vec:
1875     case INDEX_op_sub_vec:
1876     case INDEX_op_and_vec:
1877     case INDEX_op_or_vec:
1878     case INDEX_op_xor_vec:
1879     case INDEX_op_cmp_vec:
1880         return have_vec;
1881     case INDEX_op_dup2_vec:
1882         return have_vec && TCG_TARGET_REG_BITS == 32;
1883     case INDEX_op_not_vec:
1884         return have_vec && TCG_TARGET_HAS_not_vec;
1885     case INDEX_op_neg_vec:
1886         return have_vec && TCG_TARGET_HAS_neg_vec;
1887     case INDEX_op_abs_vec:
1888         return have_vec && TCG_TARGET_HAS_abs_vec;
1889     case INDEX_op_andc_vec:
1890         return have_vec && TCG_TARGET_HAS_andc_vec;
1891     case INDEX_op_orc_vec:
1892         return have_vec && TCG_TARGET_HAS_orc_vec;
1893     case INDEX_op_mul_vec:
1894         return have_vec && TCG_TARGET_HAS_mul_vec;
1895     case INDEX_op_shli_vec:
1896     case INDEX_op_shri_vec:
1897     case INDEX_op_sari_vec:
1898         return have_vec && TCG_TARGET_HAS_shi_vec;
1899     case INDEX_op_shls_vec:
1900     case INDEX_op_shrs_vec:
1901     case INDEX_op_sars_vec:
1902         return have_vec && TCG_TARGET_HAS_shs_vec;
1903     case INDEX_op_shlv_vec:
1904     case INDEX_op_shrv_vec:
1905     case INDEX_op_sarv_vec:
1906         return have_vec && TCG_TARGET_HAS_shv_vec;
1907     case INDEX_op_rotli_vec:
1908         return have_vec && TCG_TARGET_HAS_roti_vec;
1909     case INDEX_op_rotls_vec:
1910         return have_vec && TCG_TARGET_HAS_rots_vec;
1911     case INDEX_op_rotlv_vec:
1912     case INDEX_op_rotrv_vec:
1913         return have_vec && TCG_TARGET_HAS_rotv_vec;
1914     case INDEX_op_ssadd_vec:
1915     case INDEX_op_usadd_vec:
1916     case INDEX_op_sssub_vec:
1917     case INDEX_op_ussub_vec:
1918         return have_vec && TCG_TARGET_HAS_sat_vec;
1919     case INDEX_op_smin_vec:
1920     case INDEX_op_umin_vec:
1921     case INDEX_op_smax_vec:
1922     case INDEX_op_umax_vec:
1923         return have_vec && TCG_TARGET_HAS_minmax_vec;
1924     case INDEX_op_bitsel_vec:
1925         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1926     case INDEX_op_cmpsel_vec:
1927         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1928 
1929     default:
1930         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1931         return true;
1932     }
1933 }
1934 
1935 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1936    and endian swap. Maybe it would be better to do the alignment
1937    and endian swap in tcg_reg_alloc_call(). */
1938 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1939 {
1940     int i, real_args, nb_rets, pi;
1941     unsigned sizemask, flags;
1942     TCGHelperInfo *info;
1943     TCGOp *op;
1944 
1945     info = g_hash_table_lookup(helper_table, (gpointer)func);
1946     flags = info->flags;
1947     sizemask = info->sizemask;
1948 
1949 #ifdef CONFIG_PLUGIN
1950     /* detect non-plugin helpers */
1951     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1952         tcg_ctx->plugin_insn->calls_helpers = true;
1953     }
1954 #endif
1955 
1956 #if defined(__sparc__) && !defined(__arch64__) \
1957     && !defined(CONFIG_TCG_INTERPRETER)
1958     /* We have 64-bit values in one register, but need to pass as two
1959        separate parameters.  Split them.  */
1960     int orig_sizemask = sizemask;
1961     int orig_nargs = nargs;
1962     TCGv_i64 retl, reth;
1963     TCGTemp *split_args[MAX_OPC_PARAM];
1964 
1965     retl = NULL;
1966     reth = NULL;
1967     if (sizemask != 0) {
1968         for (i = real_args = 0; i < nargs; ++i) {
1969             int is_64bit = sizemask & (1 << (i+1)*2);
1970             if (is_64bit) {
1971                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1972                 TCGv_i32 h = tcg_temp_new_i32();
1973                 TCGv_i32 l = tcg_temp_new_i32();
1974                 tcg_gen_extr_i64_i32(l, h, orig);
1975                 split_args[real_args++] = tcgv_i32_temp(h);
1976                 split_args[real_args++] = tcgv_i32_temp(l);
1977             } else {
1978                 split_args[real_args++] = args[i];
1979             }
1980         }
1981         nargs = real_args;
1982         args = split_args;
1983         sizemask = 0;
1984     }
1985 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1986     for (i = 0; i < nargs; ++i) {
1987         int is_64bit = sizemask & (1 << (i+1)*2);
1988         int is_signed = sizemask & (2 << (i+1)*2);
1989         if (!is_64bit) {
1990             TCGv_i64 temp = tcg_temp_new_i64();
1991             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1992             if (is_signed) {
1993                 tcg_gen_ext32s_i64(temp, orig);
1994             } else {
1995                 tcg_gen_ext32u_i64(temp, orig);
1996             }
1997             args[i] = tcgv_i64_temp(temp);
1998         }
1999     }
2000 #endif /* TCG_TARGET_EXTEND_ARGS */
2001 
2002     op = tcg_emit_op(INDEX_op_call);
2003 
2004     pi = 0;
2005     if (ret != NULL) {
2006 #if defined(__sparc__) && !defined(__arch64__) \
2007     && !defined(CONFIG_TCG_INTERPRETER)
2008         if (orig_sizemask & 1) {
2009             /* The 32-bit ABI is going to return the 64-bit value in
2010                the %o0/%o1 register pair.  Prepare for this by using
2011                two return temporaries, and reassemble below.  */
2012             retl = tcg_temp_new_i64();
2013             reth = tcg_temp_new_i64();
2014             op->args[pi++] = tcgv_i64_arg(reth);
2015             op->args[pi++] = tcgv_i64_arg(retl);
2016             nb_rets = 2;
2017         } else {
2018             op->args[pi++] = temp_arg(ret);
2019             nb_rets = 1;
2020         }
2021 #else
2022         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
2023 #ifdef HOST_WORDS_BIGENDIAN
2024             op->args[pi++] = temp_arg(ret + 1);
2025             op->args[pi++] = temp_arg(ret);
2026 #else
2027             op->args[pi++] = temp_arg(ret);
2028             op->args[pi++] = temp_arg(ret + 1);
2029 #endif
2030             nb_rets = 2;
2031         } else {
2032             op->args[pi++] = temp_arg(ret);
2033             nb_rets = 1;
2034         }
2035 #endif
2036     } else {
2037         nb_rets = 0;
2038     }
2039     TCGOP_CALLO(op) = nb_rets;
2040 
2041     real_args = 0;
2042     for (i = 0; i < nargs; i++) {
2043         int is_64bit = sizemask & (1 << (i+1)*2);
2044         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
2045 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
2046             /* some targets want aligned 64 bit args */
2047             if (real_args & 1) {
2048                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
2049                 real_args++;
2050             }
2051 #endif
2052            /* If stack grows up, then we will be placing successive
2053               arguments at lower addresses, which means we need to
2054               reverse the order compared to how we would normally
2055               treat either big or little-endian.  For those arguments
2056               that will wind up in registers, this still works for
2057               HPPA (the only current STACK_GROWSUP target) since the
2058               argument registers are *also* allocated in decreasing
2059               order.  If another such target is added, this logic may
2060               have to get more complicated to differentiate between
2061               stack arguments and register arguments.  */
2062 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
2063             op->args[pi++] = temp_arg(args[i] + 1);
2064             op->args[pi++] = temp_arg(args[i]);
2065 #else
2066             op->args[pi++] = temp_arg(args[i]);
2067             op->args[pi++] = temp_arg(args[i] + 1);
2068 #endif
2069             real_args += 2;
2070             continue;
2071         }
2072 
2073         op->args[pi++] = temp_arg(args[i]);
2074         real_args++;
2075     }
2076     op->args[pi++] = (uintptr_t)func;
2077     op->args[pi++] = flags;
2078     TCGOP_CALLI(op) = real_args;
2079 
2080     /* Make sure the fields didn't overflow.  */
2081     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
2082     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
2083 
2084 #if defined(__sparc__) && !defined(__arch64__) \
2085     && !defined(CONFIG_TCG_INTERPRETER)
2086     /* Free all of the parts we allocated above.  */
2087     for (i = real_args = 0; i < orig_nargs; ++i) {
2088         int is_64bit = orig_sizemask & (1 << (i+1)*2);
2089         if (is_64bit) {
2090             tcg_temp_free_internal(args[real_args++]);
2091             tcg_temp_free_internal(args[real_args++]);
2092         } else {
2093             real_args++;
2094         }
2095     }
2096     if (orig_sizemask & 1) {
2097         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
2098            Note that describing these as TCGv_i64 eliminates an unnecessary
2099            zero-extension that tcg_gen_concat_i32_i64 would create.  */
2100         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
2101         tcg_temp_free_i64(retl);
2102         tcg_temp_free_i64(reth);
2103     }
2104 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2105     for (i = 0; i < nargs; ++i) {
2106         int is_64bit = sizemask & (1 << (i+1)*2);
2107         if (!is_64bit) {
2108             tcg_temp_free_internal(args[i]);
2109         }
2110     }
2111 #endif /* TCG_TARGET_EXTEND_ARGS */
2112 }
2113 
2114 static void tcg_reg_alloc_start(TCGContext *s)
2115 {
2116     int i, n;
2117 
2118     for (i = 0, n = s->nb_temps; i < n; i++) {
2119         TCGTemp *ts = &s->temps[i];
2120         TCGTempVal val = TEMP_VAL_MEM;
2121 
2122         switch (ts->kind) {
2123         case TEMP_CONST:
2124             val = TEMP_VAL_CONST;
2125             break;
2126         case TEMP_FIXED:
2127             val = TEMP_VAL_REG;
2128             break;
2129         case TEMP_GLOBAL:
2130             break;
2131         case TEMP_NORMAL:
2132             val = TEMP_VAL_DEAD;
2133             /* fall through */
2134         case TEMP_LOCAL:
2135             ts->mem_allocated = 0;
2136             break;
2137         default:
2138             g_assert_not_reached();
2139         }
2140         ts->val_type = val;
2141     }
2142 
2143     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2144 }
2145 
2146 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2147                                  TCGTemp *ts)
2148 {
2149     int idx = temp_idx(ts);
2150 
2151     switch (ts->kind) {
2152     case TEMP_FIXED:
2153     case TEMP_GLOBAL:
2154         pstrcpy(buf, buf_size, ts->name);
2155         break;
2156     case TEMP_LOCAL:
2157         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2158         break;
2159     case TEMP_NORMAL:
2160         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2161         break;
2162     case TEMP_CONST:
2163         switch (ts->type) {
2164         case TCG_TYPE_I32:
2165             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2166             break;
2167 #if TCG_TARGET_REG_BITS > 32
2168         case TCG_TYPE_I64:
2169             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2170             break;
2171 #endif
2172         case TCG_TYPE_V64:
2173         case TCG_TYPE_V128:
2174         case TCG_TYPE_V256:
2175             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2176                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2177             break;
2178         default:
2179             g_assert_not_reached();
2180         }
2181         break;
2182     }
2183     return buf;
2184 }
2185 
2186 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2187                              int buf_size, TCGArg arg)
2188 {
2189     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2190 }
2191 
2192 /* Find helper name.  */
2193 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
2194 {
2195     const char *ret = NULL;
2196     if (helper_table) {
2197         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
2198         if (info) {
2199             ret = info->name;
2200         }
2201     }
2202     return ret;
2203 }
2204 
2205 static const char * const cond_name[] =
2206 {
2207     [TCG_COND_NEVER] = "never",
2208     [TCG_COND_ALWAYS] = "always",
2209     [TCG_COND_EQ] = "eq",
2210     [TCG_COND_NE] = "ne",
2211     [TCG_COND_LT] = "lt",
2212     [TCG_COND_GE] = "ge",
2213     [TCG_COND_LE] = "le",
2214     [TCG_COND_GT] = "gt",
2215     [TCG_COND_LTU] = "ltu",
2216     [TCG_COND_GEU] = "geu",
2217     [TCG_COND_LEU] = "leu",
2218     [TCG_COND_GTU] = "gtu"
2219 };
2220 
2221 static const char * const ldst_name[] =
2222 {
2223     [MO_UB]   = "ub",
2224     [MO_SB]   = "sb",
2225     [MO_LEUW] = "leuw",
2226     [MO_LESW] = "lesw",
2227     [MO_LEUL] = "leul",
2228     [MO_LESL] = "lesl",
2229     [MO_LEQ]  = "leq",
2230     [MO_BEUW] = "beuw",
2231     [MO_BESW] = "besw",
2232     [MO_BEUL] = "beul",
2233     [MO_BESL] = "besl",
2234     [MO_BEQ]  = "beq",
2235 };
2236 
2237 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2238 #ifdef TARGET_ALIGNED_ONLY
2239     [MO_UNALN >> MO_ASHIFT]    = "un+",
2240     [MO_ALIGN >> MO_ASHIFT]    = "",
2241 #else
2242     [MO_UNALN >> MO_ASHIFT]    = "",
2243     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2244 #endif
2245     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2246     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2247     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2248     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2249     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2250     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2251 };
2252 
2253 static inline bool tcg_regset_single(TCGRegSet d)
2254 {
2255     return (d & (d - 1)) == 0;
2256 }
2257 
2258 static inline TCGReg tcg_regset_first(TCGRegSet d)
2259 {
2260     if (TCG_TARGET_NB_REGS <= 32) {
2261         return ctz32(d);
2262     } else {
2263         return ctz64(d);
2264     }
2265 }
2266 
2267 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2268 {
2269     char buf[128];
2270     TCGOp *op;
2271 
2272     QTAILQ_FOREACH(op, &s->ops, link) {
2273         int i, k, nb_oargs, nb_iargs, nb_cargs;
2274         const TCGOpDef *def;
2275         TCGOpcode c;
2276         int col = 0;
2277 
2278         c = op->opc;
2279         def = &tcg_op_defs[c];
2280 
2281         if (c == INDEX_op_insn_start) {
2282             nb_oargs = 0;
2283             col += qemu_log("\n ----");
2284 
2285             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2286                 target_ulong a;
2287 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2288                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2289 #else
2290                 a = op->args[i];
2291 #endif
2292                 col += qemu_log(" " TARGET_FMT_lx, a);
2293             }
2294         } else if (c == INDEX_op_call) {
2295             /* variable number of arguments */
2296             nb_oargs = TCGOP_CALLO(op);
2297             nb_iargs = TCGOP_CALLI(op);
2298             nb_cargs = def->nb_cargs;
2299 
2300             /* function name, flags, out args */
2301             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2302                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2303                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2304             for (i = 0; i < nb_oargs; i++) {
2305                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2306                                                        op->args[i]));
2307             }
2308             for (i = 0; i < nb_iargs; i++) {
2309                 TCGArg arg = op->args[nb_oargs + i];
2310                 const char *t = "<dummy>";
2311                 if (arg != TCG_CALL_DUMMY_ARG) {
2312                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2313                 }
2314                 col += qemu_log(",%s", t);
2315             }
2316         } else {
2317             col += qemu_log(" %s ", def->name);
2318 
2319             nb_oargs = def->nb_oargs;
2320             nb_iargs = def->nb_iargs;
2321             nb_cargs = def->nb_cargs;
2322 
2323             if (def->flags & TCG_OPF_VECTOR) {
2324                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2325                                 8 << TCGOP_VECE(op));
2326             }
2327 
2328             k = 0;
2329             for (i = 0; i < nb_oargs; i++) {
2330                 if (k != 0) {
2331                     col += qemu_log(",");
2332                 }
2333                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2334                                                       op->args[k++]));
2335             }
2336             for (i = 0; i < nb_iargs; i++) {
2337                 if (k != 0) {
2338                     col += qemu_log(",");
2339                 }
2340                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2341                                                       op->args[k++]));
2342             }
2343             switch (c) {
2344             case INDEX_op_brcond_i32:
2345             case INDEX_op_setcond_i32:
2346             case INDEX_op_movcond_i32:
2347             case INDEX_op_brcond2_i32:
2348             case INDEX_op_setcond2_i32:
2349             case INDEX_op_brcond_i64:
2350             case INDEX_op_setcond_i64:
2351             case INDEX_op_movcond_i64:
2352             case INDEX_op_cmp_vec:
2353             case INDEX_op_cmpsel_vec:
2354                 if (op->args[k] < ARRAY_SIZE(cond_name)
2355                     && cond_name[op->args[k]]) {
2356                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2357                 } else {
2358                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2359                 }
2360                 i = 1;
2361                 break;
2362             case INDEX_op_qemu_ld_i32:
2363             case INDEX_op_qemu_st_i32:
2364             case INDEX_op_qemu_st8_i32:
2365             case INDEX_op_qemu_ld_i64:
2366             case INDEX_op_qemu_st_i64:
2367                 {
2368                     TCGMemOpIdx oi = op->args[k++];
2369                     MemOp op = get_memop(oi);
2370                     unsigned ix = get_mmuidx(oi);
2371 
2372                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2373                         col += qemu_log(",$0x%x,%u", op, ix);
2374                     } else {
2375                         const char *s_al, *s_op;
2376                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2377                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2378                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2379                     }
2380                     i = 1;
2381                 }
2382                 break;
2383             default:
2384                 i = 0;
2385                 break;
2386             }
2387             switch (c) {
2388             case INDEX_op_set_label:
2389             case INDEX_op_br:
2390             case INDEX_op_brcond_i32:
2391             case INDEX_op_brcond_i64:
2392             case INDEX_op_brcond2_i32:
2393                 col += qemu_log("%s$L%d", k ? "," : "",
2394                                 arg_label(op->args[k])->id);
2395                 i++, k++;
2396                 break;
2397             default:
2398                 break;
2399             }
2400             for (; i < nb_cargs; i++, k++) {
2401                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2402             }
2403         }
2404 
2405         if (have_prefs || op->life) {
2406 
2407             QemuLogFile *logfile;
2408 
2409             rcu_read_lock();
2410             logfile = qatomic_rcu_read(&qemu_logfile);
2411             if (logfile) {
2412                 for (; col < 40; ++col) {
2413                     putc(' ', logfile->fd);
2414                 }
2415             }
2416             rcu_read_unlock();
2417         }
2418 
2419         if (op->life) {
2420             unsigned life = op->life;
2421 
2422             if (life & (SYNC_ARG * 3)) {
2423                 qemu_log("  sync:");
2424                 for (i = 0; i < 2; ++i) {
2425                     if (life & (SYNC_ARG << i)) {
2426                         qemu_log(" %d", i);
2427                     }
2428                 }
2429             }
2430             life /= DEAD_ARG;
2431             if (life) {
2432                 qemu_log("  dead:");
2433                 for (i = 0; life; ++i, life >>= 1) {
2434                     if (life & 1) {
2435                         qemu_log(" %d", i);
2436                     }
2437                 }
2438             }
2439         }
2440 
2441         if (have_prefs) {
2442             for (i = 0; i < nb_oargs; ++i) {
2443                 TCGRegSet set = op->output_pref[i];
2444 
2445                 if (i == 0) {
2446                     qemu_log("  pref=");
2447                 } else {
2448                     qemu_log(",");
2449                 }
2450                 if (set == 0) {
2451                     qemu_log("none");
2452                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2453                     qemu_log("all");
2454 #ifdef CONFIG_DEBUG_TCG
2455                 } else if (tcg_regset_single(set)) {
2456                     TCGReg reg = tcg_regset_first(set);
2457                     qemu_log("%s", tcg_target_reg_names[reg]);
2458 #endif
2459                 } else if (TCG_TARGET_NB_REGS <= 32) {
2460                     qemu_log("%#x", (uint32_t)set);
2461                 } else {
2462                     qemu_log("%#" PRIx64, (uint64_t)set);
2463                 }
2464             }
2465         }
2466 
2467         qemu_log("\n");
2468     }
2469 }
2470 
2471 /* we give more priority to constraints with less registers */
2472 static int get_constraint_priority(const TCGOpDef *def, int k)
2473 {
2474     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2475     int n;
2476 
2477     if (arg_ct->oalias) {
2478         /* an alias is equivalent to a single register */
2479         n = 1;
2480     } else {
2481         n = ctpop64(arg_ct->regs);
2482     }
2483     return TCG_TARGET_NB_REGS - n + 1;
2484 }
2485 
2486 /* sort from highest priority to lowest */
2487 static void sort_constraints(TCGOpDef *def, int start, int n)
2488 {
2489     int i, j;
2490     TCGArgConstraint *a = def->args_ct;
2491 
2492     for (i = 0; i < n; i++) {
2493         a[start + i].sort_index = start + i;
2494     }
2495     if (n <= 1) {
2496         return;
2497     }
2498     for (i = 0; i < n - 1; i++) {
2499         for (j = i + 1; j < n; j++) {
2500             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2501             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2502             if (p1 < p2) {
2503                 int tmp = a[start + i].sort_index;
2504                 a[start + i].sort_index = a[start + j].sort_index;
2505                 a[start + j].sort_index = tmp;
2506             }
2507         }
2508     }
2509 }
2510 
2511 static void process_op_defs(TCGContext *s)
2512 {
2513     TCGOpcode op;
2514 
2515     for (op = 0; op < NB_OPS; op++) {
2516         TCGOpDef *def = &tcg_op_defs[op];
2517         const TCGTargetOpDef *tdefs;
2518         int i, nb_args;
2519 
2520         if (def->flags & TCG_OPF_NOT_PRESENT) {
2521             continue;
2522         }
2523 
2524         nb_args = def->nb_iargs + def->nb_oargs;
2525         if (nb_args == 0) {
2526             continue;
2527         }
2528 
2529         /*
2530          * Macro magic should make it impossible, but double-check that
2531          * the array index is in range.  Since the signness of an enum
2532          * is implementation defined, force the result to unsigned.
2533          */
2534         unsigned con_set = tcg_target_op_def(op);
2535         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2536         tdefs = &constraint_sets[con_set];
2537 
2538         for (i = 0; i < nb_args; i++) {
2539             const char *ct_str = tdefs->args_ct_str[i];
2540             /* Incomplete TCGTargetOpDef entry. */
2541             tcg_debug_assert(ct_str != NULL);
2542 
2543             while (*ct_str != '\0') {
2544                 switch(*ct_str) {
2545                 case '0' ... '9':
2546                     {
2547                         int oarg = *ct_str - '0';
2548                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2549                         tcg_debug_assert(oarg < def->nb_oargs);
2550                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2551                         def->args_ct[i] = def->args_ct[oarg];
2552                         /* The output sets oalias.  */
2553                         def->args_ct[oarg].oalias = true;
2554                         def->args_ct[oarg].alias_index = i;
2555                         /* The input sets ialias. */
2556                         def->args_ct[i].ialias = true;
2557                         def->args_ct[i].alias_index = oarg;
2558                     }
2559                     ct_str++;
2560                     break;
2561                 case '&':
2562                     def->args_ct[i].newreg = true;
2563                     ct_str++;
2564                     break;
2565                 case 'i':
2566                     def->args_ct[i].ct |= TCG_CT_CONST;
2567                     ct_str++;
2568                     break;
2569 
2570                 /* Include all of the target-specific constraints. */
2571 
2572 #undef CONST
2573 #define CONST(CASE, MASK) \
2574     case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2575 #define REGS(CASE, MASK) \
2576     case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2577 
2578 #include "tcg-target-con-str.h"
2579 
2580 #undef REGS
2581 #undef CONST
2582                 default:
2583                     /* Typo in TCGTargetOpDef constraint. */
2584                     g_assert_not_reached();
2585                 }
2586             }
2587         }
2588 
2589         /* TCGTargetOpDef entry with too much information? */
2590         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2591 
2592         /* sort the constraints (XXX: this is just an heuristic) */
2593         sort_constraints(def, 0, def->nb_oargs);
2594         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2595     }
2596 }
2597 
2598 void tcg_op_remove(TCGContext *s, TCGOp *op)
2599 {
2600     TCGLabel *label;
2601 
2602     switch (op->opc) {
2603     case INDEX_op_br:
2604         label = arg_label(op->args[0]);
2605         label->refs--;
2606         break;
2607     case INDEX_op_brcond_i32:
2608     case INDEX_op_brcond_i64:
2609         label = arg_label(op->args[3]);
2610         label->refs--;
2611         break;
2612     case INDEX_op_brcond2_i32:
2613         label = arg_label(op->args[5]);
2614         label->refs--;
2615         break;
2616     default:
2617         break;
2618     }
2619 
2620     QTAILQ_REMOVE(&s->ops, op, link);
2621     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2622     s->nb_ops--;
2623 
2624 #ifdef CONFIG_PROFILER
2625     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2626 #endif
2627 }
2628 
2629 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2630 {
2631     TCGContext *s = tcg_ctx;
2632     TCGOp *op;
2633 
2634     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2635         op = tcg_malloc(sizeof(TCGOp));
2636     } else {
2637         op = QTAILQ_FIRST(&s->free_ops);
2638         QTAILQ_REMOVE(&s->free_ops, op, link);
2639     }
2640     memset(op, 0, offsetof(TCGOp, link));
2641     op->opc = opc;
2642     s->nb_ops++;
2643 
2644     return op;
2645 }
2646 
2647 TCGOp *tcg_emit_op(TCGOpcode opc)
2648 {
2649     TCGOp *op = tcg_op_alloc(opc);
2650     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2651     return op;
2652 }
2653 
2654 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2655 {
2656     TCGOp *new_op = tcg_op_alloc(opc);
2657     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2658     return new_op;
2659 }
2660 
2661 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2662 {
2663     TCGOp *new_op = tcg_op_alloc(opc);
2664     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2665     return new_op;
2666 }
2667 
2668 /* Reachable analysis : remove unreachable code.  */
2669 static void reachable_code_pass(TCGContext *s)
2670 {
2671     TCGOp *op, *op_next;
2672     bool dead = false;
2673 
2674     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2675         bool remove = dead;
2676         TCGLabel *label;
2677         int call_flags;
2678 
2679         switch (op->opc) {
2680         case INDEX_op_set_label:
2681             label = arg_label(op->args[0]);
2682             if (label->refs == 0) {
2683                 /*
2684                  * While there is an occasional backward branch, virtually
2685                  * all branches generated by the translators are forward.
2686                  * Which means that generally we will have already removed
2687                  * all references to the label that will be, and there is
2688                  * little to be gained by iterating.
2689                  */
2690                 remove = true;
2691             } else {
2692                 /* Once we see a label, insns become live again.  */
2693                 dead = false;
2694                 remove = false;
2695 
2696                 /*
2697                  * Optimization can fold conditional branches to unconditional.
2698                  * If we find a label with one reference which is preceded by
2699                  * an unconditional branch to it, remove both.  This needed to
2700                  * wait until the dead code in between them was removed.
2701                  */
2702                 if (label->refs == 1) {
2703                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2704                     if (op_prev->opc == INDEX_op_br &&
2705                         label == arg_label(op_prev->args[0])) {
2706                         tcg_op_remove(s, op_prev);
2707                         remove = true;
2708                     }
2709                 }
2710             }
2711             break;
2712 
2713         case INDEX_op_br:
2714         case INDEX_op_exit_tb:
2715         case INDEX_op_goto_ptr:
2716             /* Unconditional branches; everything following is dead.  */
2717             dead = true;
2718             break;
2719 
2720         case INDEX_op_call:
2721             /* Notice noreturn helper calls, raising exceptions.  */
2722             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2723             if (call_flags & TCG_CALL_NO_RETURN) {
2724                 dead = true;
2725             }
2726             break;
2727 
2728         case INDEX_op_insn_start:
2729             /* Never remove -- we need to keep these for unwind.  */
2730             remove = false;
2731             break;
2732 
2733         default:
2734             break;
2735         }
2736 
2737         if (remove) {
2738             tcg_op_remove(s, op);
2739         }
2740     }
2741 }
2742 
2743 #define TS_DEAD  1
2744 #define TS_MEM   2
2745 
2746 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2747 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2748 
2749 /* For liveness_pass_1, the register preferences for a given temp.  */
2750 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2751 {
2752     return ts->state_ptr;
2753 }
2754 
2755 /* For liveness_pass_1, reset the preferences for a given temp to the
2756  * maximal regset for its type.
2757  */
2758 static inline void la_reset_pref(TCGTemp *ts)
2759 {
2760     *la_temp_pref(ts)
2761         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2762 }
2763 
2764 /* liveness analysis: end of function: all temps are dead, and globals
2765    should be in memory. */
2766 static void la_func_end(TCGContext *s, int ng, int nt)
2767 {
2768     int i;
2769 
2770     for (i = 0; i < ng; ++i) {
2771         s->temps[i].state = TS_DEAD | TS_MEM;
2772         la_reset_pref(&s->temps[i]);
2773     }
2774     for (i = ng; i < nt; ++i) {
2775         s->temps[i].state = TS_DEAD;
2776         la_reset_pref(&s->temps[i]);
2777     }
2778 }
2779 
2780 /* liveness analysis: end of basic block: all temps are dead, globals
2781    and local temps should be in memory. */
2782 static void la_bb_end(TCGContext *s, int ng, int nt)
2783 {
2784     int i;
2785 
2786     for (i = 0; i < nt; ++i) {
2787         TCGTemp *ts = &s->temps[i];
2788         int state;
2789 
2790         switch (ts->kind) {
2791         case TEMP_FIXED:
2792         case TEMP_GLOBAL:
2793         case TEMP_LOCAL:
2794             state = TS_DEAD | TS_MEM;
2795             break;
2796         case TEMP_NORMAL:
2797         case TEMP_CONST:
2798             state = TS_DEAD;
2799             break;
2800         default:
2801             g_assert_not_reached();
2802         }
2803         ts->state = state;
2804         la_reset_pref(ts);
2805     }
2806 }
2807 
2808 /* liveness analysis: sync globals back to memory.  */
2809 static void la_global_sync(TCGContext *s, int ng)
2810 {
2811     int i;
2812 
2813     for (i = 0; i < ng; ++i) {
2814         int state = s->temps[i].state;
2815         s->temps[i].state = state | TS_MEM;
2816         if (state == TS_DEAD) {
2817             /* If the global was previously dead, reset prefs.  */
2818             la_reset_pref(&s->temps[i]);
2819         }
2820     }
2821 }
2822 
2823 /*
2824  * liveness analysis: conditional branch: all temps are dead,
2825  * globals and local temps should be synced.
2826  */
2827 static void la_bb_sync(TCGContext *s, int ng, int nt)
2828 {
2829     la_global_sync(s, ng);
2830 
2831     for (int i = ng; i < nt; ++i) {
2832         TCGTemp *ts = &s->temps[i];
2833         int state;
2834 
2835         switch (ts->kind) {
2836         case TEMP_LOCAL:
2837             state = ts->state;
2838             ts->state = state | TS_MEM;
2839             if (state != TS_DEAD) {
2840                 continue;
2841             }
2842             break;
2843         case TEMP_NORMAL:
2844             s->temps[i].state = TS_DEAD;
2845             break;
2846         case TEMP_CONST:
2847             continue;
2848         default:
2849             g_assert_not_reached();
2850         }
2851         la_reset_pref(&s->temps[i]);
2852     }
2853 }
2854 
2855 /* liveness analysis: sync globals back to memory and kill.  */
2856 static void la_global_kill(TCGContext *s, int ng)
2857 {
2858     int i;
2859 
2860     for (i = 0; i < ng; i++) {
2861         s->temps[i].state = TS_DEAD | TS_MEM;
2862         la_reset_pref(&s->temps[i]);
2863     }
2864 }
2865 
2866 /* liveness analysis: note live globals crossing calls.  */
2867 static void la_cross_call(TCGContext *s, int nt)
2868 {
2869     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2870     int i;
2871 
2872     for (i = 0; i < nt; i++) {
2873         TCGTemp *ts = &s->temps[i];
2874         if (!(ts->state & TS_DEAD)) {
2875             TCGRegSet *pset = la_temp_pref(ts);
2876             TCGRegSet set = *pset;
2877 
2878             set &= mask;
2879             /* If the combination is not possible, restart.  */
2880             if (set == 0) {
2881                 set = tcg_target_available_regs[ts->type] & mask;
2882             }
2883             *pset = set;
2884         }
2885     }
2886 }
2887 
2888 /* Liveness analysis : update the opc_arg_life array to tell if a
2889    given input arguments is dead. Instructions updating dead
2890    temporaries are removed. */
2891 static void liveness_pass_1(TCGContext *s)
2892 {
2893     int nb_globals = s->nb_globals;
2894     int nb_temps = s->nb_temps;
2895     TCGOp *op, *op_prev;
2896     TCGRegSet *prefs;
2897     int i;
2898 
2899     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2900     for (i = 0; i < nb_temps; ++i) {
2901         s->temps[i].state_ptr = prefs + i;
2902     }
2903 
2904     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2905     la_func_end(s, nb_globals, nb_temps);
2906 
2907     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2908         int nb_iargs, nb_oargs;
2909         TCGOpcode opc_new, opc_new2;
2910         bool have_opc_new2;
2911         TCGLifeData arg_life = 0;
2912         TCGTemp *ts;
2913         TCGOpcode opc = op->opc;
2914         const TCGOpDef *def = &tcg_op_defs[opc];
2915 
2916         switch (opc) {
2917         case INDEX_op_call:
2918             {
2919                 int call_flags;
2920                 int nb_call_regs;
2921 
2922                 nb_oargs = TCGOP_CALLO(op);
2923                 nb_iargs = TCGOP_CALLI(op);
2924                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2925 
2926                 /* pure functions can be removed if their result is unused */
2927                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2928                     for (i = 0; i < nb_oargs; i++) {
2929                         ts = arg_temp(op->args[i]);
2930                         if (ts->state != TS_DEAD) {
2931                             goto do_not_remove_call;
2932                         }
2933                     }
2934                     goto do_remove;
2935                 }
2936             do_not_remove_call:
2937 
2938                 /* Output args are dead.  */
2939                 for (i = 0; i < nb_oargs; i++) {
2940                     ts = arg_temp(op->args[i]);
2941                     if (ts->state & TS_DEAD) {
2942                         arg_life |= DEAD_ARG << i;
2943                     }
2944                     if (ts->state & TS_MEM) {
2945                         arg_life |= SYNC_ARG << i;
2946                     }
2947                     ts->state = TS_DEAD;
2948                     la_reset_pref(ts);
2949 
2950                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2951                     op->output_pref[i] = 0;
2952                 }
2953 
2954                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2955                                     TCG_CALL_NO_READ_GLOBALS))) {
2956                     la_global_kill(s, nb_globals);
2957                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2958                     la_global_sync(s, nb_globals);
2959                 }
2960 
2961                 /* Record arguments that die in this helper.  */
2962                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2963                     ts = arg_temp(op->args[i]);
2964                     if (ts && ts->state & TS_DEAD) {
2965                         arg_life |= DEAD_ARG << i;
2966                     }
2967                 }
2968 
2969                 /* For all live registers, remove call-clobbered prefs.  */
2970                 la_cross_call(s, nb_temps);
2971 
2972                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2973 
2974                 /* Input arguments are live for preceding opcodes.  */
2975                 for (i = 0; i < nb_iargs; i++) {
2976                     ts = arg_temp(op->args[i + nb_oargs]);
2977                     if (ts && ts->state & TS_DEAD) {
2978                         /* For those arguments that die, and will be allocated
2979                          * in registers, clear the register set for that arg,
2980                          * to be filled in below.  For args that will be on
2981                          * the stack, reset to any available reg.
2982                          */
2983                         *la_temp_pref(ts)
2984                             = (i < nb_call_regs ? 0 :
2985                                tcg_target_available_regs[ts->type]);
2986                         ts->state &= ~TS_DEAD;
2987                     }
2988                 }
2989 
2990                 /* For each input argument, add its input register to prefs.
2991                    If a temp is used once, this produces a single set bit.  */
2992                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2993                     ts = arg_temp(op->args[i + nb_oargs]);
2994                     if (ts) {
2995                         tcg_regset_set_reg(*la_temp_pref(ts),
2996                                            tcg_target_call_iarg_regs[i]);
2997                     }
2998                 }
2999             }
3000             break;
3001         case INDEX_op_insn_start:
3002             break;
3003         case INDEX_op_discard:
3004             /* mark the temporary as dead */
3005             ts = arg_temp(op->args[0]);
3006             ts->state = TS_DEAD;
3007             la_reset_pref(ts);
3008             break;
3009 
3010         case INDEX_op_add2_i32:
3011             opc_new = INDEX_op_add_i32;
3012             goto do_addsub2;
3013         case INDEX_op_sub2_i32:
3014             opc_new = INDEX_op_sub_i32;
3015             goto do_addsub2;
3016         case INDEX_op_add2_i64:
3017             opc_new = INDEX_op_add_i64;
3018             goto do_addsub2;
3019         case INDEX_op_sub2_i64:
3020             opc_new = INDEX_op_sub_i64;
3021         do_addsub2:
3022             nb_iargs = 4;
3023             nb_oargs = 2;
3024             /* Test if the high part of the operation is dead, but not
3025                the low part.  The result can be optimized to a simple
3026                add or sub.  This happens often for x86_64 guest when the
3027                cpu mode is set to 32 bit.  */
3028             if (arg_temp(op->args[1])->state == TS_DEAD) {
3029                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3030                     goto do_remove;
3031                 }
3032                 /* Replace the opcode and adjust the args in place,
3033                    leaving 3 unused args at the end.  */
3034                 op->opc = opc = opc_new;
3035                 op->args[1] = op->args[2];
3036                 op->args[2] = op->args[4];
3037                 /* Fall through and mark the single-word operation live.  */
3038                 nb_iargs = 2;
3039                 nb_oargs = 1;
3040             }
3041             goto do_not_remove;
3042 
3043         case INDEX_op_mulu2_i32:
3044             opc_new = INDEX_op_mul_i32;
3045             opc_new2 = INDEX_op_muluh_i32;
3046             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3047             goto do_mul2;
3048         case INDEX_op_muls2_i32:
3049             opc_new = INDEX_op_mul_i32;
3050             opc_new2 = INDEX_op_mulsh_i32;
3051             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3052             goto do_mul2;
3053         case INDEX_op_mulu2_i64:
3054             opc_new = INDEX_op_mul_i64;
3055             opc_new2 = INDEX_op_muluh_i64;
3056             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3057             goto do_mul2;
3058         case INDEX_op_muls2_i64:
3059             opc_new = INDEX_op_mul_i64;
3060             opc_new2 = INDEX_op_mulsh_i64;
3061             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3062             goto do_mul2;
3063         do_mul2:
3064             nb_iargs = 2;
3065             nb_oargs = 2;
3066             if (arg_temp(op->args[1])->state == TS_DEAD) {
3067                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3068                     /* Both parts of the operation are dead.  */
3069                     goto do_remove;
3070                 }
3071                 /* The high part of the operation is dead; generate the low. */
3072                 op->opc = opc = opc_new;
3073                 op->args[1] = op->args[2];
3074                 op->args[2] = op->args[3];
3075             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3076                 /* The low part of the operation is dead; generate the high. */
3077                 op->opc = opc = opc_new2;
3078                 op->args[0] = op->args[1];
3079                 op->args[1] = op->args[2];
3080                 op->args[2] = op->args[3];
3081             } else {
3082                 goto do_not_remove;
3083             }
3084             /* Mark the single-word operation live.  */
3085             nb_oargs = 1;
3086             goto do_not_remove;
3087 
3088         default:
3089             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3090             nb_iargs = def->nb_iargs;
3091             nb_oargs = def->nb_oargs;
3092 
3093             /* Test if the operation can be removed because all
3094                its outputs are dead. We assume that nb_oargs == 0
3095                implies side effects */
3096             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3097                 for (i = 0; i < nb_oargs; i++) {
3098                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3099                         goto do_not_remove;
3100                     }
3101                 }
3102                 goto do_remove;
3103             }
3104             goto do_not_remove;
3105 
3106         do_remove:
3107             tcg_op_remove(s, op);
3108             break;
3109 
3110         do_not_remove:
3111             for (i = 0; i < nb_oargs; i++) {
3112                 ts = arg_temp(op->args[i]);
3113 
3114                 /* Remember the preference of the uses that followed.  */
3115                 op->output_pref[i] = *la_temp_pref(ts);
3116 
3117                 /* Output args are dead.  */
3118                 if (ts->state & TS_DEAD) {
3119                     arg_life |= DEAD_ARG << i;
3120                 }
3121                 if (ts->state & TS_MEM) {
3122                     arg_life |= SYNC_ARG << i;
3123                 }
3124                 ts->state = TS_DEAD;
3125                 la_reset_pref(ts);
3126             }
3127 
3128             /* If end of basic block, update.  */
3129             if (def->flags & TCG_OPF_BB_EXIT) {
3130                 la_func_end(s, nb_globals, nb_temps);
3131             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3132                 la_bb_sync(s, nb_globals, nb_temps);
3133             } else if (def->flags & TCG_OPF_BB_END) {
3134                 la_bb_end(s, nb_globals, nb_temps);
3135             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3136                 la_global_sync(s, nb_globals);
3137                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3138                     la_cross_call(s, nb_temps);
3139                 }
3140             }
3141 
3142             /* Record arguments that die in this opcode.  */
3143             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3144                 ts = arg_temp(op->args[i]);
3145                 if (ts->state & TS_DEAD) {
3146                     arg_life |= DEAD_ARG << i;
3147                 }
3148             }
3149 
3150             /* Input arguments are live for preceding opcodes.  */
3151             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3152                 ts = arg_temp(op->args[i]);
3153                 if (ts->state & TS_DEAD) {
3154                     /* For operands that were dead, initially allow
3155                        all regs for the type.  */
3156                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3157                     ts->state &= ~TS_DEAD;
3158                 }
3159             }
3160 
3161             /* Incorporate constraints for this operand.  */
3162             switch (opc) {
3163             case INDEX_op_mov_i32:
3164             case INDEX_op_mov_i64:
3165                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3166                    have proper constraints.  That said, special case
3167                    moves to propagate preferences backward.  */
3168                 if (IS_DEAD_ARG(1)) {
3169                     *la_temp_pref(arg_temp(op->args[0]))
3170                         = *la_temp_pref(arg_temp(op->args[1]));
3171                 }
3172                 break;
3173 
3174             default:
3175                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3176                     const TCGArgConstraint *ct = &def->args_ct[i];
3177                     TCGRegSet set, *pset;
3178 
3179                     ts = arg_temp(op->args[i]);
3180                     pset = la_temp_pref(ts);
3181                     set = *pset;
3182 
3183                     set &= ct->regs;
3184                     if (ct->ialias) {
3185                         set &= op->output_pref[ct->alias_index];
3186                     }
3187                     /* If the combination is not possible, restart.  */
3188                     if (set == 0) {
3189                         set = ct->regs;
3190                     }
3191                     *pset = set;
3192                 }
3193                 break;
3194             }
3195             break;
3196         }
3197         op->life = arg_life;
3198     }
3199 }
3200 
3201 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3202 static bool liveness_pass_2(TCGContext *s)
3203 {
3204     int nb_globals = s->nb_globals;
3205     int nb_temps, i;
3206     bool changes = false;
3207     TCGOp *op, *op_next;
3208 
3209     /* Create a temporary for each indirect global.  */
3210     for (i = 0; i < nb_globals; ++i) {
3211         TCGTemp *its = &s->temps[i];
3212         if (its->indirect_reg) {
3213             TCGTemp *dts = tcg_temp_alloc(s);
3214             dts->type = its->type;
3215             dts->base_type = its->base_type;
3216             its->state_ptr = dts;
3217         } else {
3218             its->state_ptr = NULL;
3219         }
3220         /* All globals begin dead.  */
3221         its->state = TS_DEAD;
3222     }
3223     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3224         TCGTemp *its = &s->temps[i];
3225         its->state_ptr = NULL;
3226         its->state = TS_DEAD;
3227     }
3228 
3229     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3230         TCGOpcode opc = op->opc;
3231         const TCGOpDef *def = &tcg_op_defs[opc];
3232         TCGLifeData arg_life = op->life;
3233         int nb_iargs, nb_oargs, call_flags;
3234         TCGTemp *arg_ts, *dir_ts;
3235 
3236         if (opc == INDEX_op_call) {
3237             nb_oargs = TCGOP_CALLO(op);
3238             nb_iargs = TCGOP_CALLI(op);
3239             call_flags = op->args[nb_oargs + nb_iargs + 1];
3240         } else {
3241             nb_iargs = def->nb_iargs;
3242             nb_oargs = def->nb_oargs;
3243 
3244             /* Set flags similar to how calls require.  */
3245             if (def->flags & TCG_OPF_COND_BRANCH) {
3246                 /* Like reading globals: sync_globals */
3247                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3248             } else if (def->flags & TCG_OPF_BB_END) {
3249                 /* Like writing globals: save_globals */
3250                 call_flags = 0;
3251             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3252                 /* Like reading globals: sync_globals */
3253                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3254             } else {
3255                 /* No effect on globals.  */
3256                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3257                               TCG_CALL_NO_WRITE_GLOBALS);
3258             }
3259         }
3260 
3261         /* Make sure that input arguments are available.  */
3262         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3263             arg_ts = arg_temp(op->args[i]);
3264             if (arg_ts) {
3265                 dir_ts = arg_ts->state_ptr;
3266                 if (dir_ts && arg_ts->state == TS_DEAD) {
3267                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3268                                       ? INDEX_op_ld_i32
3269                                       : INDEX_op_ld_i64);
3270                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3271 
3272                     lop->args[0] = temp_arg(dir_ts);
3273                     lop->args[1] = temp_arg(arg_ts->mem_base);
3274                     lop->args[2] = arg_ts->mem_offset;
3275 
3276                     /* Loaded, but synced with memory.  */
3277                     arg_ts->state = TS_MEM;
3278                 }
3279             }
3280         }
3281 
3282         /* Perform input replacement, and mark inputs that became dead.
3283            No action is required except keeping temp_state up to date
3284            so that we reload when needed.  */
3285         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3286             arg_ts = arg_temp(op->args[i]);
3287             if (arg_ts) {
3288                 dir_ts = arg_ts->state_ptr;
3289                 if (dir_ts) {
3290                     op->args[i] = temp_arg(dir_ts);
3291                     changes = true;
3292                     if (IS_DEAD_ARG(i)) {
3293                         arg_ts->state = TS_DEAD;
3294                     }
3295                 }
3296             }
3297         }
3298 
3299         /* Liveness analysis should ensure that the following are
3300            all correct, for call sites and basic block end points.  */
3301         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3302             /* Nothing to do */
3303         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3304             for (i = 0; i < nb_globals; ++i) {
3305                 /* Liveness should see that globals are synced back,
3306                    that is, either TS_DEAD or TS_MEM.  */
3307                 arg_ts = &s->temps[i];
3308                 tcg_debug_assert(arg_ts->state_ptr == 0
3309                                  || arg_ts->state != 0);
3310             }
3311         } else {
3312             for (i = 0; i < nb_globals; ++i) {
3313                 /* Liveness should see that globals are saved back,
3314                    that is, TS_DEAD, waiting to be reloaded.  */
3315                 arg_ts = &s->temps[i];
3316                 tcg_debug_assert(arg_ts->state_ptr == 0
3317                                  || arg_ts->state == TS_DEAD);
3318             }
3319         }
3320 
3321         /* Outputs become available.  */
3322         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3323             arg_ts = arg_temp(op->args[0]);
3324             dir_ts = arg_ts->state_ptr;
3325             if (dir_ts) {
3326                 op->args[0] = temp_arg(dir_ts);
3327                 changes = true;
3328 
3329                 /* The output is now live and modified.  */
3330                 arg_ts->state = 0;
3331 
3332                 if (NEED_SYNC_ARG(0)) {
3333                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3334                                       ? INDEX_op_st_i32
3335                                       : INDEX_op_st_i64);
3336                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3337                     TCGTemp *out_ts = dir_ts;
3338 
3339                     if (IS_DEAD_ARG(0)) {
3340                         out_ts = arg_temp(op->args[1]);
3341                         arg_ts->state = TS_DEAD;
3342                         tcg_op_remove(s, op);
3343                     } else {
3344                         arg_ts->state = TS_MEM;
3345                     }
3346 
3347                     sop->args[0] = temp_arg(out_ts);
3348                     sop->args[1] = temp_arg(arg_ts->mem_base);
3349                     sop->args[2] = arg_ts->mem_offset;
3350                 } else {
3351                     tcg_debug_assert(!IS_DEAD_ARG(0));
3352                 }
3353             }
3354         } else {
3355             for (i = 0; i < nb_oargs; i++) {
3356                 arg_ts = arg_temp(op->args[i]);
3357                 dir_ts = arg_ts->state_ptr;
3358                 if (!dir_ts) {
3359                     continue;
3360                 }
3361                 op->args[i] = temp_arg(dir_ts);
3362                 changes = true;
3363 
3364                 /* The output is now live and modified.  */
3365                 arg_ts->state = 0;
3366 
3367                 /* Sync outputs upon their last write.  */
3368                 if (NEED_SYNC_ARG(i)) {
3369                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3370                                       ? INDEX_op_st_i32
3371                                       : INDEX_op_st_i64);
3372                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3373 
3374                     sop->args[0] = temp_arg(dir_ts);
3375                     sop->args[1] = temp_arg(arg_ts->mem_base);
3376                     sop->args[2] = arg_ts->mem_offset;
3377 
3378                     arg_ts->state = TS_MEM;
3379                 }
3380                 /* Drop outputs that are dead.  */
3381                 if (IS_DEAD_ARG(i)) {
3382                     arg_ts->state = TS_DEAD;
3383                 }
3384             }
3385         }
3386     }
3387 
3388     return changes;
3389 }
3390 
3391 #ifdef CONFIG_DEBUG_TCG
3392 static void dump_regs(TCGContext *s)
3393 {
3394     TCGTemp *ts;
3395     int i;
3396     char buf[64];
3397 
3398     for(i = 0; i < s->nb_temps; i++) {
3399         ts = &s->temps[i];
3400         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3401         switch(ts->val_type) {
3402         case TEMP_VAL_REG:
3403             printf("%s", tcg_target_reg_names[ts->reg]);
3404             break;
3405         case TEMP_VAL_MEM:
3406             printf("%d(%s)", (int)ts->mem_offset,
3407                    tcg_target_reg_names[ts->mem_base->reg]);
3408             break;
3409         case TEMP_VAL_CONST:
3410             printf("$0x%" PRIx64, ts->val);
3411             break;
3412         case TEMP_VAL_DEAD:
3413             printf("D");
3414             break;
3415         default:
3416             printf("???");
3417             break;
3418         }
3419         printf("\n");
3420     }
3421 
3422     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3423         if (s->reg_to_temp[i] != NULL) {
3424             printf("%s: %s\n",
3425                    tcg_target_reg_names[i],
3426                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3427         }
3428     }
3429 }
3430 
3431 static void check_regs(TCGContext *s)
3432 {
3433     int reg;
3434     int k;
3435     TCGTemp *ts;
3436     char buf[64];
3437 
3438     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3439         ts = s->reg_to_temp[reg];
3440         if (ts != NULL) {
3441             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3442                 printf("Inconsistency for register %s:\n",
3443                        tcg_target_reg_names[reg]);
3444                 goto fail;
3445             }
3446         }
3447     }
3448     for (k = 0; k < s->nb_temps; k++) {
3449         ts = &s->temps[k];
3450         if (ts->val_type == TEMP_VAL_REG
3451             && ts->kind != TEMP_FIXED
3452             && s->reg_to_temp[ts->reg] != ts) {
3453             printf("Inconsistency for temp %s:\n",
3454                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3455         fail:
3456             printf("reg state:\n");
3457             dump_regs(s);
3458             tcg_abort();
3459         }
3460     }
3461 }
3462 #endif
3463 
3464 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3465 {
3466 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3467     /* Sparc64 stack is accessed with offset of 2047 */
3468     s->current_frame_offset = (s->current_frame_offset +
3469                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3470         ~(sizeof(tcg_target_long) - 1);
3471 #endif
3472     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3473         s->frame_end) {
3474         tcg_abort();
3475     }
3476     ts->mem_offset = s->current_frame_offset;
3477     ts->mem_base = s->frame_temp;
3478     ts->mem_allocated = 1;
3479     s->current_frame_offset += sizeof(tcg_target_long);
3480 }
3481 
3482 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3483 
3484 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3485    mark it free; otherwise mark it dead.  */
3486 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3487 {
3488     TCGTempVal new_type;
3489 
3490     switch (ts->kind) {
3491     case TEMP_FIXED:
3492         return;
3493     case TEMP_GLOBAL:
3494     case TEMP_LOCAL:
3495         new_type = TEMP_VAL_MEM;
3496         break;
3497     case TEMP_NORMAL:
3498         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3499         break;
3500     case TEMP_CONST:
3501         new_type = TEMP_VAL_CONST;
3502         break;
3503     default:
3504         g_assert_not_reached();
3505     }
3506     if (ts->val_type == TEMP_VAL_REG) {
3507         s->reg_to_temp[ts->reg] = NULL;
3508     }
3509     ts->val_type = new_type;
3510 }
3511 
3512 /* Mark a temporary as dead.  */
3513 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3514 {
3515     temp_free_or_dead(s, ts, 1);
3516 }
3517 
3518 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3519    registers needs to be allocated to store a constant.  If 'free_or_dead'
3520    is non-zero, subsequently release the temporary; if it is positive, the
3521    temp is dead; if it is negative, the temp is free.  */
3522 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3523                       TCGRegSet preferred_regs, int free_or_dead)
3524 {
3525     if (!temp_readonly(ts) && !ts->mem_coherent) {
3526         if (!ts->mem_allocated) {
3527             temp_allocate_frame(s, ts);
3528         }
3529         switch (ts->val_type) {
3530         case TEMP_VAL_CONST:
3531             /* If we're going to free the temp immediately, then we won't
3532                require it later in a register, so attempt to store the
3533                constant to memory directly.  */
3534             if (free_or_dead
3535                 && tcg_out_sti(s, ts->type, ts->val,
3536                                ts->mem_base->reg, ts->mem_offset)) {
3537                 break;
3538             }
3539             temp_load(s, ts, tcg_target_available_regs[ts->type],
3540                       allocated_regs, preferred_regs);
3541             /* fallthrough */
3542 
3543         case TEMP_VAL_REG:
3544             tcg_out_st(s, ts->type, ts->reg,
3545                        ts->mem_base->reg, ts->mem_offset);
3546             break;
3547 
3548         case TEMP_VAL_MEM:
3549             break;
3550 
3551         case TEMP_VAL_DEAD:
3552         default:
3553             tcg_abort();
3554         }
3555         ts->mem_coherent = 1;
3556     }
3557     if (free_or_dead) {
3558         temp_free_or_dead(s, ts, free_or_dead);
3559     }
3560 }
3561 
3562 /* free register 'reg' by spilling the corresponding temporary if necessary */
3563 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3564 {
3565     TCGTemp *ts = s->reg_to_temp[reg];
3566     if (ts != NULL) {
3567         temp_sync(s, ts, allocated_regs, 0, -1);
3568     }
3569 }
3570 
3571 /**
3572  * tcg_reg_alloc:
3573  * @required_regs: Set of registers in which we must allocate.
3574  * @allocated_regs: Set of registers which must be avoided.
3575  * @preferred_regs: Set of registers we should prefer.
3576  * @rev: True if we search the registers in "indirect" order.
3577  *
3578  * The allocated register must be in @required_regs & ~@allocated_regs,
3579  * but if we can put it in @preferred_regs we may save a move later.
3580  */
3581 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3582                             TCGRegSet allocated_regs,
3583                             TCGRegSet preferred_regs, bool rev)
3584 {
3585     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3586     TCGRegSet reg_ct[2];
3587     const int *order;
3588 
3589     reg_ct[1] = required_regs & ~allocated_regs;
3590     tcg_debug_assert(reg_ct[1] != 0);
3591     reg_ct[0] = reg_ct[1] & preferred_regs;
3592 
3593     /* Skip the preferred_regs option if it cannot be satisfied,
3594        or if the preference made no difference.  */
3595     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3596 
3597     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3598 
3599     /* Try free registers, preferences first.  */
3600     for (j = f; j < 2; j++) {
3601         TCGRegSet set = reg_ct[j];
3602 
3603         if (tcg_regset_single(set)) {
3604             /* One register in the set.  */
3605             TCGReg reg = tcg_regset_first(set);
3606             if (s->reg_to_temp[reg] == NULL) {
3607                 return reg;
3608             }
3609         } else {
3610             for (i = 0; i < n; i++) {
3611                 TCGReg reg = order[i];
3612                 if (s->reg_to_temp[reg] == NULL &&
3613                     tcg_regset_test_reg(set, reg)) {
3614                     return reg;
3615                 }
3616             }
3617         }
3618     }
3619 
3620     /* We must spill something.  */
3621     for (j = f; j < 2; j++) {
3622         TCGRegSet set = reg_ct[j];
3623 
3624         if (tcg_regset_single(set)) {
3625             /* One register in the set.  */
3626             TCGReg reg = tcg_regset_first(set);
3627             tcg_reg_free(s, reg, allocated_regs);
3628             return reg;
3629         } else {
3630             for (i = 0; i < n; i++) {
3631                 TCGReg reg = order[i];
3632                 if (tcg_regset_test_reg(set, reg)) {
3633                     tcg_reg_free(s, reg, allocated_regs);
3634                     return reg;
3635                 }
3636             }
3637         }
3638     }
3639 
3640     tcg_abort();
3641 }
3642 
3643 /* Make sure the temporary is in a register.  If needed, allocate the register
3644    from DESIRED while avoiding ALLOCATED.  */
3645 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3646                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3647 {
3648     TCGReg reg;
3649 
3650     switch (ts->val_type) {
3651     case TEMP_VAL_REG:
3652         return;
3653     case TEMP_VAL_CONST:
3654         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3655                             preferred_regs, ts->indirect_base);
3656         if (ts->type <= TCG_TYPE_I64) {
3657             tcg_out_movi(s, ts->type, reg, ts->val);
3658         } else {
3659             uint64_t val = ts->val;
3660             MemOp vece = MO_64;
3661 
3662             /*
3663              * Find the minimal vector element that matches the constant.
3664              * The targets will, in general, have to do this search anyway,
3665              * do this generically.
3666              */
3667             if (val == dup_const(MO_8, val)) {
3668                 vece = MO_8;
3669             } else if (val == dup_const(MO_16, val)) {
3670                 vece = MO_16;
3671             } else if (val == dup_const(MO_32, val)) {
3672                 vece = MO_32;
3673             }
3674 
3675             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3676         }
3677         ts->mem_coherent = 0;
3678         break;
3679     case TEMP_VAL_MEM:
3680         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3681                             preferred_regs, ts->indirect_base);
3682         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3683         ts->mem_coherent = 1;
3684         break;
3685     case TEMP_VAL_DEAD:
3686     default:
3687         tcg_abort();
3688     }
3689     ts->reg = reg;
3690     ts->val_type = TEMP_VAL_REG;
3691     s->reg_to_temp[reg] = ts;
3692 }
3693 
3694 /* Save a temporary to memory. 'allocated_regs' is used in case a
3695    temporary registers needs to be allocated to store a constant.  */
3696 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3697 {
3698     /* The liveness analysis already ensures that globals are back
3699        in memory. Keep an tcg_debug_assert for safety. */
3700     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3701 }
3702 
3703 /* save globals to their canonical location and assume they can be
3704    modified be the following code. 'allocated_regs' is used in case a
3705    temporary registers needs to be allocated to store a constant. */
3706 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3707 {
3708     int i, n;
3709 
3710     for (i = 0, n = s->nb_globals; i < n; i++) {
3711         temp_save(s, &s->temps[i], allocated_regs);
3712     }
3713 }
3714 
3715 /* sync globals to their canonical location and assume they can be
3716    read by the following code. 'allocated_regs' is used in case a
3717    temporary registers needs to be allocated to store a constant. */
3718 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3719 {
3720     int i, n;
3721 
3722     for (i = 0, n = s->nb_globals; i < n; i++) {
3723         TCGTemp *ts = &s->temps[i];
3724         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3725                          || ts->kind == TEMP_FIXED
3726                          || ts->mem_coherent);
3727     }
3728 }
3729 
3730 /* at the end of a basic block, we assume all temporaries are dead and
3731    all globals are stored at their canonical location. */
3732 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3733 {
3734     int i;
3735 
3736     for (i = s->nb_globals; i < s->nb_temps; i++) {
3737         TCGTemp *ts = &s->temps[i];
3738 
3739         switch (ts->kind) {
3740         case TEMP_LOCAL:
3741             temp_save(s, ts, allocated_regs);
3742             break;
3743         case TEMP_NORMAL:
3744             /* The liveness analysis already ensures that temps are dead.
3745                Keep an tcg_debug_assert for safety. */
3746             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3747             break;
3748         case TEMP_CONST:
3749             /* Similarly, we should have freed any allocated register. */
3750             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3751             break;
3752         default:
3753             g_assert_not_reached();
3754         }
3755     }
3756 
3757     save_globals(s, allocated_regs);
3758 }
3759 
3760 /*
3761  * At a conditional branch, we assume all temporaries are dead and
3762  * all globals and local temps are synced to their location.
3763  */
3764 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3765 {
3766     sync_globals(s, allocated_regs);
3767 
3768     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3769         TCGTemp *ts = &s->temps[i];
3770         /*
3771          * The liveness analysis already ensures that temps are dead.
3772          * Keep tcg_debug_asserts for safety.
3773          */
3774         switch (ts->kind) {
3775         case TEMP_LOCAL:
3776             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3777             break;
3778         case TEMP_NORMAL:
3779             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3780             break;
3781         case TEMP_CONST:
3782             break;
3783         default:
3784             g_assert_not_reached();
3785         }
3786     }
3787 }
3788 
3789 /*
3790  * Specialized code generation for INDEX_op_mov_* with a constant.
3791  */
3792 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3793                                   tcg_target_ulong val, TCGLifeData arg_life,
3794                                   TCGRegSet preferred_regs)
3795 {
3796     /* ENV should not be modified.  */
3797     tcg_debug_assert(!temp_readonly(ots));
3798 
3799     /* The movi is not explicitly generated here.  */
3800     if (ots->val_type == TEMP_VAL_REG) {
3801         s->reg_to_temp[ots->reg] = NULL;
3802     }
3803     ots->val_type = TEMP_VAL_CONST;
3804     ots->val = val;
3805     ots->mem_coherent = 0;
3806     if (NEED_SYNC_ARG(0)) {
3807         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3808     } else if (IS_DEAD_ARG(0)) {
3809         temp_dead(s, ots);
3810     }
3811 }
3812 
3813 /*
3814  * Specialized code generation for INDEX_op_mov_*.
3815  */
3816 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3817 {
3818     const TCGLifeData arg_life = op->life;
3819     TCGRegSet allocated_regs, preferred_regs;
3820     TCGTemp *ts, *ots;
3821     TCGType otype, itype;
3822 
3823     allocated_regs = s->reserved_regs;
3824     preferred_regs = op->output_pref[0];
3825     ots = arg_temp(op->args[0]);
3826     ts = arg_temp(op->args[1]);
3827 
3828     /* ENV should not be modified.  */
3829     tcg_debug_assert(!temp_readonly(ots));
3830 
3831     /* Note that otype != itype for no-op truncation.  */
3832     otype = ots->type;
3833     itype = ts->type;
3834 
3835     if (ts->val_type == TEMP_VAL_CONST) {
3836         /* propagate constant or generate sti */
3837         tcg_target_ulong val = ts->val;
3838         if (IS_DEAD_ARG(1)) {
3839             temp_dead(s, ts);
3840         }
3841         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3842         return;
3843     }
3844 
3845     /* If the source value is in memory we're going to be forced
3846        to have it in a register in order to perform the copy.  Copy
3847        the SOURCE value into its own register first, that way we
3848        don't have to reload SOURCE the next time it is used. */
3849     if (ts->val_type == TEMP_VAL_MEM) {
3850         temp_load(s, ts, tcg_target_available_regs[itype],
3851                   allocated_regs, preferred_regs);
3852     }
3853 
3854     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3855     if (IS_DEAD_ARG(0)) {
3856         /* mov to a non-saved dead register makes no sense (even with
3857            liveness analysis disabled). */
3858         tcg_debug_assert(NEED_SYNC_ARG(0));
3859         if (!ots->mem_allocated) {
3860             temp_allocate_frame(s, ots);
3861         }
3862         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3863         if (IS_DEAD_ARG(1)) {
3864             temp_dead(s, ts);
3865         }
3866         temp_dead(s, ots);
3867     } else {
3868         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3869             /* the mov can be suppressed */
3870             if (ots->val_type == TEMP_VAL_REG) {
3871                 s->reg_to_temp[ots->reg] = NULL;
3872             }
3873             ots->reg = ts->reg;
3874             temp_dead(s, ts);
3875         } else {
3876             if (ots->val_type != TEMP_VAL_REG) {
3877                 /* When allocating a new register, make sure to not spill the
3878                    input one. */
3879                 tcg_regset_set_reg(allocated_regs, ts->reg);
3880                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3881                                          allocated_regs, preferred_regs,
3882                                          ots->indirect_base);
3883             }
3884             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3885                 /*
3886                  * Cross register class move not supported.
3887                  * Store the source register into the destination slot
3888                  * and leave the destination temp as TEMP_VAL_MEM.
3889                  */
3890                 assert(!temp_readonly(ots));
3891                 if (!ts->mem_allocated) {
3892                     temp_allocate_frame(s, ots);
3893                 }
3894                 tcg_out_st(s, ts->type, ts->reg,
3895                            ots->mem_base->reg, ots->mem_offset);
3896                 ots->mem_coherent = 1;
3897                 temp_free_or_dead(s, ots, -1);
3898                 return;
3899             }
3900         }
3901         ots->val_type = TEMP_VAL_REG;
3902         ots->mem_coherent = 0;
3903         s->reg_to_temp[ots->reg] = ots;
3904         if (NEED_SYNC_ARG(0)) {
3905             temp_sync(s, ots, allocated_regs, 0, 0);
3906         }
3907     }
3908 }
3909 
3910 /*
3911  * Specialized code generation for INDEX_op_dup_vec.
3912  */
3913 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3914 {
3915     const TCGLifeData arg_life = op->life;
3916     TCGRegSet dup_out_regs, dup_in_regs;
3917     TCGTemp *its, *ots;
3918     TCGType itype, vtype;
3919     intptr_t endian_fixup;
3920     unsigned vece;
3921     bool ok;
3922 
3923     ots = arg_temp(op->args[0]);
3924     its = arg_temp(op->args[1]);
3925 
3926     /* ENV should not be modified.  */
3927     tcg_debug_assert(!temp_readonly(ots));
3928 
3929     itype = its->type;
3930     vece = TCGOP_VECE(op);
3931     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3932 
3933     if (its->val_type == TEMP_VAL_CONST) {
3934         /* Propagate constant via movi -> dupi.  */
3935         tcg_target_ulong val = its->val;
3936         if (IS_DEAD_ARG(1)) {
3937             temp_dead(s, its);
3938         }
3939         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3940         return;
3941     }
3942 
3943     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3944     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3945 
3946     /* Allocate the output register now.  */
3947     if (ots->val_type != TEMP_VAL_REG) {
3948         TCGRegSet allocated_regs = s->reserved_regs;
3949 
3950         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3951             /* Make sure to not spill the input register. */
3952             tcg_regset_set_reg(allocated_regs, its->reg);
3953         }
3954         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3955                                  op->output_pref[0], ots->indirect_base);
3956         ots->val_type = TEMP_VAL_REG;
3957         ots->mem_coherent = 0;
3958         s->reg_to_temp[ots->reg] = ots;
3959     }
3960 
3961     switch (its->val_type) {
3962     case TEMP_VAL_REG:
3963         /*
3964          * The dup constriaints must be broad, covering all possible VECE.
3965          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3966          * to fail, indicating that extra moves are required for that case.
3967          */
3968         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3969             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3970                 goto done;
3971             }
3972             /* Try again from memory or a vector input register.  */
3973         }
3974         if (!its->mem_coherent) {
3975             /*
3976              * The input register is not synced, and so an extra store
3977              * would be required to use memory.  Attempt an integer-vector
3978              * register move first.  We do not have a TCGRegSet for this.
3979              */
3980             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3981                 break;
3982             }
3983             /* Sync the temp back to its slot and load from there.  */
3984             temp_sync(s, its, s->reserved_regs, 0, 0);
3985         }
3986         /* fall through */
3987 
3988     case TEMP_VAL_MEM:
3989 #ifdef HOST_WORDS_BIGENDIAN
3990         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3991         endian_fixup -= 1 << vece;
3992 #else
3993         endian_fixup = 0;
3994 #endif
3995         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3996                              its->mem_offset + endian_fixup)) {
3997             goto done;
3998         }
3999         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4000         break;
4001 
4002     default:
4003         g_assert_not_reached();
4004     }
4005 
4006     /* We now have a vector input register, so dup must succeed. */
4007     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4008     tcg_debug_assert(ok);
4009 
4010  done:
4011     if (IS_DEAD_ARG(1)) {
4012         temp_dead(s, its);
4013     }
4014     if (NEED_SYNC_ARG(0)) {
4015         temp_sync(s, ots, s->reserved_regs, 0, 0);
4016     }
4017     if (IS_DEAD_ARG(0)) {
4018         temp_dead(s, ots);
4019     }
4020 }
4021 
4022 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4023 {
4024     const TCGLifeData arg_life = op->life;
4025     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4026     TCGRegSet i_allocated_regs;
4027     TCGRegSet o_allocated_regs;
4028     int i, k, nb_iargs, nb_oargs;
4029     TCGReg reg;
4030     TCGArg arg;
4031     const TCGArgConstraint *arg_ct;
4032     TCGTemp *ts;
4033     TCGArg new_args[TCG_MAX_OP_ARGS];
4034     int const_args[TCG_MAX_OP_ARGS];
4035 
4036     nb_oargs = def->nb_oargs;
4037     nb_iargs = def->nb_iargs;
4038 
4039     /* copy constants */
4040     memcpy(new_args + nb_oargs + nb_iargs,
4041            op->args + nb_oargs + nb_iargs,
4042            sizeof(TCGArg) * def->nb_cargs);
4043 
4044     i_allocated_regs = s->reserved_regs;
4045     o_allocated_regs = s->reserved_regs;
4046 
4047     /* satisfy input constraints */
4048     for (k = 0; k < nb_iargs; k++) {
4049         TCGRegSet i_preferred_regs, o_preferred_regs;
4050 
4051         i = def->args_ct[nb_oargs + k].sort_index;
4052         arg = op->args[i];
4053         arg_ct = &def->args_ct[i];
4054         ts = arg_temp(arg);
4055 
4056         if (ts->val_type == TEMP_VAL_CONST
4057             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
4058             /* constant is OK for instruction */
4059             const_args[i] = 1;
4060             new_args[i] = ts->val;
4061             continue;
4062         }
4063 
4064         i_preferred_regs = o_preferred_regs = 0;
4065         if (arg_ct->ialias) {
4066             o_preferred_regs = op->output_pref[arg_ct->alias_index];
4067 
4068             /*
4069              * If the input is readonly, then it cannot also be an
4070              * output and aliased to itself.  If the input is not
4071              * dead after the instruction, we must allocate a new
4072              * register and move it.
4073              */
4074             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4075                 goto allocate_in_reg;
4076             }
4077 
4078             /*
4079              * Check if the current register has already been allocated
4080              * for another input aliased to an output.
4081              */
4082             if (ts->val_type == TEMP_VAL_REG) {
4083                 reg = ts->reg;
4084                 for (int k2 = 0; k2 < k; k2++) {
4085                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
4086                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
4087                         goto allocate_in_reg;
4088                     }
4089                 }
4090             }
4091             i_preferred_regs = o_preferred_regs;
4092         }
4093 
4094         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
4095         reg = ts->reg;
4096 
4097         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
4098  allocate_in_reg:
4099             /*
4100              * Allocate a new register matching the constraint
4101              * and move the temporary register into it.
4102              */
4103             temp_load(s, ts, tcg_target_available_regs[ts->type],
4104                       i_allocated_regs, 0);
4105             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
4106                                 o_preferred_regs, ts->indirect_base);
4107             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4108                 /*
4109                  * Cross register class move not supported.  Sync the
4110                  * temp back to its slot and load from there.
4111                  */
4112                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4113                 tcg_out_ld(s, ts->type, reg,
4114                            ts->mem_base->reg, ts->mem_offset);
4115             }
4116         }
4117         new_args[i] = reg;
4118         const_args[i] = 0;
4119         tcg_regset_set_reg(i_allocated_regs, reg);
4120     }
4121 
4122     /* mark dead temporaries and free the associated registers */
4123     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4124         if (IS_DEAD_ARG(i)) {
4125             temp_dead(s, arg_temp(op->args[i]));
4126         }
4127     }
4128 
4129     if (def->flags & TCG_OPF_COND_BRANCH) {
4130         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4131     } else if (def->flags & TCG_OPF_BB_END) {
4132         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4133     } else {
4134         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4135             /* XXX: permit generic clobber register list ? */
4136             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4137                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4138                     tcg_reg_free(s, i, i_allocated_regs);
4139                 }
4140             }
4141         }
4142         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4143             /* sync globals if the op has side effects and might trigger
4144                an exception. */
4145             sync_globals(s, i_allocated_regs);
4146         }
4147 
4148         /* satisfy the output constraints */
4149         for(k = 0; k < nb_oargs; k++) {
4150             i = def->args_ct[k].sort_index;
4151             arg = op->args[i];
4152             arg_ct = &def->args_ct[i];
4153             ts = arg_temp(arg);
4154 
4155             /* ENV should not be modified.  */
4156             tcg_debug_assert(!temp_readonly(ts));
4157 
4158             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4159                 reg = new_args[arg_ct->alias_index];
4160             } else if (arg_ct->newreg) {
4161                 reg = tcg_reg_alloc(s, arg_ct->regs,
4162                                     i_allocated_regs | o_allocated_regs,
4163                                     op->output_pref[k], ts->indirect_base);
4164             } else {
4165                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4166                                     op->output_pref[k], ts->indirect_base);
4167             }
4168             tcg_regset_set_reg(o_allocated_regs, reg);
4169             if (ts->val_type == TEMP_VAL_REG) {
4170                 s->reg_to_temp[ts->reg] = NULL;
4171             }
4172             ts->val_type = TEMP_VAL_REG;
4173             ts->reg = reg;
4174             /*
4175              * Temp value is modified, so the value kept in memory is
4176              * potentially not the same.
4177              */
4178             ts->mem_coherent = 0;
4179             s->reg_to_temp[reg] = ts;
4180             new_args[i] = reg;
4181         }
4182     }
4183 
4184     /* emit instruction */
4185     if (def->flags & TCG_OPF_VECTOR) {
4186         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4187                        new_args, const_args);
4188     } else {
4189         tcg_out_op(s, op->opc, new_args, const_args);
4190     }
4191 
4192     /* move the outputs in the correct register if needed */
4193     for(i = 0; i < nb_oargs; i++) {
4194         ts = arg_temp(op->args[i]);
4195 
4196         /* ENV should not be modified.  */
4197         tcg_debug_assert(!temp_readonly(ts));
4198 
4199         if (NEED_SYNC_ARG(i)) {
4200             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4201         } else if (IS_DEAD_ARG(i)) {
4202             temp_dead(s, ts);
4203         }
4204     }
4205 }
4206 
4207 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4208 {
4209     const TCGLifeData arg_life = op->life;
4210     TCGTemp *ots, *itsl, *itsh;
4211     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4212 
4213     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4214     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4215     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4216 
4217     ots = arg_temp(op->args[0]);
4218     itsl = arg_temp(op->args[1]);
4219     itsh = arg_temp(op->args[2]);
4220 
4221     /* ENV should not be modified.  */
4222     tcg_debug_assert(!temp_readonly(ots));
4223 
4224     /* Allocate the output register now.  */
4225     if (ots->val_type != TEMP_VAL_REG) {
4226         TCGRegSet allocated_regs = s->reserved_regs;
4227         TCGRegSet dup_out_regs =
4228             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4229 
4230         /* Make sure to not spill the input registers. */
4231         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4232             tcg_regset_set_reg(allocated_regs, itsl->reg);
4233         }
4234         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4235             tcg_regset_set_reg(allocated_regs, itsh->reg);
4236         }
4237 
4238         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4239                                  op->output_pref[0], ots->indirect_base);
4240         ots->val_type = TEMP_VAL_REG;
4241         ots->mem_coherent = 0;
4242         s->reg_to_temp[ots->reg] = ots;
4243     }
4244 
4245     /* Promote dup2 of immediates to dupi_vec. */
4246     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4247         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4248         MemOp vece = MO_64;
4249 
4250         if (val == dup_const(MO_8, val)) {
4251             vece = MO_8;
4252         } else if (val == dup_const(MO_16, val)) {
4253             vece = MO_16;
4254         } else if (val == dup_const(MO_32, val)) {
4255             vece = MO_32;
4256         }
4257 
4258         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4259         goto done;
4260     }
4261 
4262     /* If the two inputs form one 64-bit value, try dupm_vec. */
4263     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4264         if (!itsl->mem_coherent) {
4265             temp_sync(s, itsl, s->reserved_regs, 0, 0);
4266         }
4267         if (!itsh->mem_coherent) {
4268             temp_sync(s, itsh, s->reserved_regs, 0, 0);
4269         }
4270 #ifdef HOST_WORDS_BIGENDIAN
4271         TCGTemp *its = itsh;
4272 #else
4273         TCGTemp *its = itsl;
4274 #endif
4275         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4276                              its->mem_base->reg, its->mem_offset)) {
4277             goto done;
4278         }
4279     }
4280 
4281     /* Fall back to generic expansion. */
4282     return false;
4283 
4284  done:
4285     if (IS_DEAD_ARG(1)) {
4286         temp_dead(s, itsl);
4287     }
4288     if (IS_DEAD_ARG(2)) {
4289         temp_dead(s, itsh);
4290     }
4291     if (NEED_SYNC_ARG(0)) {
4292         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4293     } else if (IS_DEAD_ARG(0)) {
4294         temp_dead(s, ots);
4295     }
4296     return true;
4297 }
4298 
4299 #ifdef TCG_TARGET_STACK_GROWSUP
4300 #define STACK_DIR(x) (-(x))
4301 #else
4302 #define STACK_DIR(x) (x)
4303 #endif
4304 
4305 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4306 {
4307     const int nb_oargs = TCGOP_CALLO(op);
4308     const int nb_iargs = TCGOP_CALLI(op);
4309     const TCGLifeData arg_life = op->life;
4310     int flags, nb_regs, i;
4311     TCGReg reg;
4312     TCGArg arg;
4313     TCGTemp *ts;
4314     intptr_t stack_offset;
4315     size_t call_stack_size;
4316     tcg_insn_unit *func_addr;
4317     int allocate_args;
4318     TCGRegSet allocated_regs;
4319 
4320     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
4321     flags = op->args[nb_oargs + nb_iargs + 1];
4322 
4323     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4324     if (nb_regs > nb_iargs) {
4325         nb_regs = nb_iargs;
4326     }
4327 
4328     /* assign stack slots first */
4329     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4330     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
4331         ~(TCG_TARGET_STACK_ALIGN - 1);
4332     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4333     if (allocate_args) {
4334         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4335            preallocate call stack */
4336         tcg_abort();
4337     }
4338 
4339     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4340     for (i = nb_regs; i < nb_iargs; i++) {
4341         arg = op->args[nb_oargs + i];
4342 #ifdef TCG_TARGET_STACK_GROWSUP
4343         stack_offset -= sizeof(tcg_target_long);
4344 #endif
4345         if (arg != TCG_CALL_DUMMY_ARG) {
4346             ts = arg_temp(arg);
4347             temp_load(s, ts, tcg_target_available_regs[ts->type],
4348                       s->reserved_regs, 0);
4349             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4350         }
4351 #ifndef TCG_TARGET_STACK_GROWSUP
4352         stack_offset += sizeof(tcg_target_long);
4353 #endif
4354     }
4355 
4356     /* assign input registers */
4357     allocated_regs = s->reserved_regs;
4358     for (i = 0; i < nb_regs; i++) {
4359         arg = op->args[nb_oargs + i];
4360         if (arg != TCG_CALL_DUMMY_ARG) {
4361             ts = arg_temp(arg);
4362             reg = tcg_target_call_iarg_regs[i];
4363 
4364             if (ts->val_type == TEMP_VAL_REG) {
4365                 if (ts->reg != reg) {
4366                     tcg_reg_free(s, reg, allocated_regs);
4367                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4368                         /*
4369                          * Cross register class move not supported.  Sync the
4370                          * temp back to its slot and load from there.
4371                          */
4372                         temp_sync(s, ts, allocated_regs, 0, 0);
4373                         tcg_out_ld(s, ts->type, reg,
4374                                    ts->mem_base->reg, ts->mem_offset);
4375                     }
4376                 }
4377             } else {
4378                 TCGRegSet arg_set = 0;
4379 
4380                 tcg_reg_free(s, reg, allocated_regs);
4381                 tcg_regset_set_reg(arg_set, reg);
4382                 temp_load(s, ts, arg_set, allocated_regs, 0);
4383             }
4384 
4385             tcg_regset_set_reg(allocated_regs, reg);
4386         }
4387     }
4388 
4389     /* mark dead temporaries and free the associated registers */
4390     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4391         if (IS_DEAD_ARG(i)) {
4392             temp_dead(s, arg_temp(op->args[i]));
4393         }
4394     }
4395 
4396     /* clobber call registers */
4397     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4398         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4399             tcg_reg_free(s, i, allocated_regs);
4400         }
4401     }
4402 
4403     /* Save globals if they might be written by the helper, sync them if
4404        they might be read. */
4405     if (flags & TCG_CALL_NO_READ_GLOBALS) {
4406         /* Nothing to do */
4407     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4408         sync_globals(s, allocated_regs);
4409     } else {
4410         save_globals(s, allocated_regs);
4411     }
4412 
4413     tcg_out_call(s, func_addr);
4414 
4415     /* assign output registers and emit moves if needed */
4416     for(i = 0; i < nb_oargs; i++) {
4417         arg = op->args[i];
4418         ts = arg_temp(arg);
4419 
4420         /* ENV should not be modified.  */
4421         tcg_debug_assert(!temp_readonly(ts));
4422 
4423         reg = tcg_target_call_oarg_regs[i];
4424         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4425         if (ts->val_type == TEMP_VAL_REG) {
4426             s->reg_to_temp[ts->reg] = NULL;
4427         }
4428         ts->val_type = TEMP_VAL_REG;
4429         ts->reg = reg;
4430         ts->mem_coherent = 0;
4431         s->reg_to_temp[reg] = ts;
4432         if (NEED_SYNC_ARG(i)) {
4433             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4434         } else if (IS_DEAD_ARG(i)) {
4435             temp_dead(s, ts);
4436         }
4437     }
4438 }
4439 
4440 #ifdef CONFIG_PROFILER
4441 
4442 /* avoid copy/paste errors */
4443 #define PROF_ADD(to, from, field)                       \
4444     do {                                                \
4445         (to)->field += qatomic_read(&((from)->field));  \
4446     } while (0)
4447 
4448 #define PROF_MAX(to, from, field)                                       \
4449     do {                                                                \
4450         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4451         if (val__ > (to)->field) {                                      \
4452             (to)->field = val__;                                        \
4453         }                                                               \
4454     } while (0)
4455 
4456 /* Pass in a zero'ed @prof */
4457 static inline
4458 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4459 {
4460     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4461     unsigned int i;
4462 
4463     for (i = 0; i < n_ctxs; i++) {
4464         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4465         const TCGProfile *orig = &s->prof;
4466 
4467         if (counters) {
4468             PROF_ADD(prof, orig, cpu_exec_time);
4469             PROF_ADD(prof, orig, tb_count1);
4470             PROF_ADD(prof, orig, tb_count);
4471             PROF_ADD(prof, orig, op_count);
4472             PROF_MAX(prof, orig, op_count_max);
4473             PROF_ADD(prof, orig, temp_count);
4474             PROF_MAX(prof, orig, temp_count_max);
4475             PROF_ADD(prof, orig, del_op_count);
4476             PROF_ADD(prof, orig, code_in_len);
4477             PROF_ADD(prof, orig, code_out_len);
4478             PROF_ADD(prof, orig, search_out_len);
4479             PROF_ADD(prof, orig, interm_time);
4480             PROF_ADD(prof, orig, code_time);
4481             PROF_ADD(prof, orig, la_time);
4482             PROF_ADD(prof, orig, opt_time);
4483             PROF_ADD(prof, orig, restore_count);
4484             PROF_ADD(prof, orig, restore_time);
4485         }
4486         if (table) {
4487             int i;
4488 
4489             for (i = 0; i < NB_OPS; i++) {
4490                 PROF_ADD(prof, orig, table_op_count[i]);
4491             }
4492         }
4493     }
4494 }
4495 
4496 #undef PROF_ADD
4497 #undef PROF_MAX
4498 
4499 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4500 {
4501     tcg_profile_snapshot(prof, true, false);
4502 }
4503 
4504 static void tcg_profile_snapshot_table(TCGProfile *prof)
4505 {
4506     tcg_profile_snapshot(prof, false, true);
4507 }
4508 
4509 void tcg_dump_op_count(void)
4510 {
4511     TCGProfile prof = {};
4512     int i;
4513 
4514     tcg_profile_snapshot_table(&prof);
4515     for (i = 0; i < NB_OPS; i++) {
4516         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4517                     prof.table_op_count[i]);
4518     }
4519 }
4520 
4521 int64_t tcg_cpu_exec_time(void)
4522 {
4523     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4524     unsigned int i;
4525     int64_t ret = 0;
4526 
4527     for (i = 0; i < n_ctxs; i++) {
4528         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4529         const TCGProfile *prof = &s->prof;
4530 
4531         ret += qatomic_read(&prof->cpu_exec_time);
4532     }
4533     return ret;
4534 }
4535 #else
4536 void tcg_dump_op_count(void)
4537 {
4538     qemu_printf("[TCG profiler not compiled]\n");
4539 }
4540 
4541 int64_t tcg_cpu_exec_time(void)
4542 {
4543     error_report("%s: TCG profiler not compiled", __func__);
4544     exit(EXIT_FAILURE);
4545 }
4546 #endif
4547 
4548 
4549 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4550 {
4551 #ifdef CONFIG_PROFILER
4552     TCGProfile *prof = &s->prof;
4553 #endif
4554     int i, num_insns;
4555     TCGOp *op;
4556 
4557 #ifdef CONFIG_PROFILER
4558     {
4559         int n = 0;
4560 
4561         QTAILQ_FOREACH(op, &s->ops, link) {
4562             n++;
4563         }
4564         qatomic_set(&prof->op_count, prof->op_count + n);
4565         if (n > prof->op_count_max) {
4566             qatomic_set(&prof->op_count_max, n);
4567         }
4568 
4569         n = s->nb_temps;
4570         qatomic_set(&prof->temp_count, prof->temp_count + n);
4571         if (n > prof->temp_count_max) {
4572             qatomic_set(&prof->temp_count_max, n);
4573         }
4574     }
4575 #endif
4576 
4577 #ifdef DEBUG_DISAS
4578     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4579                  && qemu_log_in_addr_range(tb->pc))) {
4580         FILE *logfile = qemu_log_lock();
4581         qemu_log("OP:\n");
4582         tcg_dump_ops(s, false);
4583         qemu_log("\n");
4584         qemu_log_unlock(logfile);
4585     }
4586 #endif
4587 
4588 #ifdef CONFIG_DEBUG_TCG
4589     /* Ensure all labels referenced have been emitted.  */
4590     {
4591         TCGLabel *l;
4592         bool error = false;
4593 
4594         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4595             if (unlikely(!l->present) && l->refs) {
4596                 qemu_log_mask(CPU_LOG_TB_OP,
4597                               "$L%d referenced but not present.\n", l->id);
4598                 error = true;
4599             }
4600         }
4601         assert(!error);
4602     }
4603 #endif
4604 
4605 #ifdef CONFIG_PROFILER
4606     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4607 #endif
4608 
4609 #ifdef USE_TCG_OPTIMIZATIONS
4610     tcg_optimize(s);
4611 #endif
4612 
4613 #ifdef CONFIG_PROFILER
4614     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4615     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4616 #endif
4617 
4618     reachable_code_pass(s);
4619     liveness_pass_1(s);
4620 
4621     if (s->nb_indirects > 0) {
4622 #ifdef DEBUG_DISAS
4623         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4624                      && qemu_log_in_addr_range(tb->pc))) {
4625             FILE *logfile = qemu_log_lock();
4626             qemu_log("OP before indirect lowering:\n");
4627             tcg_dump_ops(s, false);
4628             qemu_log("\n");
4629             qemu_log_unlock(logfile);
4630         }
4631 #endif
4632         /* Replace indirect temps with direct temps.  */
4633         if (liveness_pass_2(s)) {
4634             /* If changes were made, re-run liveness.  */
4635             liveness_pass_1(s);
4636         }
4637     }
4638 
4639 #ifdef CONFIG_PROFILER
4640     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4641 #endif
4642 
4643 #ifdef DEBUG_DISAS
4644     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4645                  && qemu_log_in_addr_range(tb->pc))) {
4646         FILE *logfile = qemu_log_lock();
4647         qemu_log("OP after optimization and liveness analysis:\n");
4648         tcg_dump_ops(s, true);
4649         qemu_log("\n");
4650         qemu_log_unlock(logfile);
4651     }
4652 #endif
4653 
4654     tcg_reg_alloc_start(s);
4655 
4656     /*
4657      * Reset the buffer pointers when restarting after overflow.
4658      * TODO: Move this into translate-all.c with the rest of the
4659      * buffer management.  Having only this done here is confusing.
4660      */
4661     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4662     s->code_ptr = s->code_buf;
4663 
4664 #ifdef TCG_TARGET_NEED_LDST_LABELS
4665     QSIMPLEQ_INIT(&s->ldst_labels);
4666 #endif
4667 #ifdef TCG_TARGET_NEED_POOL_LABELS
4668     s->pool_labels = NULL;
4669 #endif
4670 
4671     num_insns = -1;
4672     QTAILQ_FOREACH(op, &s->ops, link) {
4673         TCGOpcode opc = op->opc;
4674 
4675 #ifdef CONFIG_PROFILER
4676         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4677 #endif
4678 
4679         switch (opc) {
4680         case INDEX_op_mov_i32:
4681         case INDEX_op_mov_i64:
4682         case INDEX_op_mov_vec:
4683             tcg_reg_alloc_mov(s, op);
4684             break;
4685         case INDEX_op_dup_vec:
4686             tcg_reg_alloc_dup(s, op);
4687             break;
4688         case INDEX_op_insn_start:
4689             if (num_insns >= 0) {
4690                 size_t off = tcg_current_code_size(s);
4691                 s->gen_insn_end_off[num_insns] = off;
4692                 /* Assert that we do not overflow our stored offset.  */
4693                 assert(s->gen_insn_end_off[num_insns] == off);
4694             }
4695             num_insns++;
4696             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4697                 target_ulong a;
4698 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4699                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4700 #else
4701                 a = op->args[i];
4702 #endif
4703                 s->gen_insn_data[num_insns][i] = a;
4704             }
4705             break;
4706         case INDEX_op_discard:
4707             temp_dead(s, arg_temp(op->args[0]));
4708             break;
4709         case INDEX_op_set_label:
4710             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4711             tcg_out_label(s, arg_label(op->args[0]));
4712             break;
4713         case INDEX_op_call:
4714             tcg_reg_alloc_call(s, op);
4715             break;
4716         case INDEX_op_dup2_vec:
4717             if (tcg_reg_alloc_dup2(s, op)) {
4718                 break;
4719             }
4720             /* fall through */
4721         default:
4722             /* Sanity check that we've not introduced any unhandled opcodes. */
4723             tcg_debug_assert(tcg_op_supported(opc));
4724             /* Note: in order to speed up the code, it would be much
4725                faster to have specialized register allocator functions for
4726                some common argument patterns */
4727             tcg_reg_alloc_op(s, op);
4728             break;
4729         }
4730 #ifdef CONFIG_DEBUG_TCG
4731         check_regs(s);
4732 #endif
4733         /* Test for (pending) buffer overflow.  The assumption is that any
4734            one operation beginning below the high water mark cannot overrun
4735            the buffer completely.  Thus we can test for overflow after
4736            generating code without having to check during generation.  */
4737         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4738             return -1;
4739         }
4740         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4741         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4742             return -2;
4743         }
4744     }
4745     tcg_debug_assert(num_insns >= 0);
4746     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4747 
4748     /* Generate TB finalization at the end of block */
4749 #ifdef TCG_TARGET_NEED_LDST_LABELS
4750     i = tcg_out_ldst_finalize(s);
4751     if (i < 0) {
4752         return i;
4753     }
4754 #endif
4755 #ifdef TCG_TARGET_NEED_POOL_LABELS
4756     i = tcg_out_pool_finalize(s);
4757     if (i < 0) {
4758         return i;
4759     }
4760 #endif
4761     if (!tcg_resolve_relocs(s)) {
4762         return -2;
4763     }
4764 
4765 #ifndef CONFIG_TCG_INTERPRETER
4766     /* flush instruction cache */
4767     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4768                         (uintptr_t)s->code_buf,
4769                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4770 #endif
4771 
4772     return tcg_current_code_size(s);
4773 }
4774 
4775 #ifdef CONFIG_PROFILER
4776 void tcg_dump_info(void)
4777 {
4778     TCGProfile prof = {};
4779     const TCGProfile *s;
4780     int64_t tb_count;
4781     int64_t tb_div_count;
4782     int64_t tot;
4783 
4784     tcg_profile_snapshot_counters(&prof);
4785     s = &prof;
4786     tb_count = s->tb_count;
4787     tb_div_count = tb_count ? tb_count : 1;
4788     tot = s->interm_time + s->code_time;
4789 
4790     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4791                 tot, tot / 2.4e9);
4792     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4793                 " %0.1f%%)\n",
4794                 tb_count, s->tb_count1 - tb_count,
4795                 (double)(s->tb_count1 - s->tb_count)
4796                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4797     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4798                 (double)s->op_count / tb_div_count, s->op_count_max);
4799     qemu_printf("deleted ops/TB      %0.2f\n",
4800                 (double)s->del_op_count / tb_div_count);
4801     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4802                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4803     qemu_printf("avg host code/TB    %0.1f\n",
4804                 (double)s->code_out_len / tb_div_count);
4805     qemu_printf("avg search data/TB  %0.1f\n",
4806                 (double)s->search_out_len / tb_div_count);
4807 
4808     qemu_printf("cycles/op           %0.1f\n",
4809                 s->op_count ? (double)tot / s->op_count : 0);
4810     qemu_printf("cycles/in byte      %0.1f\n",
4811                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4812     qemu_printf("cycles/out byte     %0.1f\n",
4813                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4814     qemu_printf("cycles/search byte     %0.1f\n",
4815                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4816     if (tot == 0) {
4817         tot = 1;
4818     }
4819     qemu_printf("  gen_interm time   %0.1f%%\n",
4820                 (double)s->interm_time / tot * 100.0);
4821     qemu_printf("  gen_code time     %0.1f%%\n",
4822                 (double)s->code_time / tot * 100.0);
4823     qemu_printf("optim./code time    %0.1f%%\n",
4824                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4825                 * 100.0);
4826     qemu_printf("liveness/code time  %0.1f%%\n",
4827                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4828     qemu_printf("cpu_restore count   %" PRId64 "\n",
4829                 s->restore_count);
4830     qemu_printf("  avg cycles        %0.1f\n",
4831                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4832 }
4833 #else
4834 void tcg_dump_info(void)
4835 {
4836     qemu_printf("[TCG profiler not compiled]\n");
4837 }
4838 #endif
4839 
4840 #ifdef ELF_HOST_MACHINE
4841 /* In order to use this feature, the backend needs to do three things:
4842 
4843    (1) Define ELF_HOST_MACHINE to indicate both what value to
4844        put into the ELF image and to indicate support for the feature.
4845 
4846    (2) Define tcg_register_jit.  This should create a buffer containing
4847        the contents of a .debug_frame section that describes the post-
4848        prologue unwind info for the tcg machine.
4849 
4850    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4851 */
4852 
4853 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4854 typedef enum {
4855     JIT_NOACTION = 0,
4856     JIT_REGISTER_FN,
4857     JIT_UNREGISTER_FN
4858 } jit_actions_t;
4859 
4860 struct jit_code_entry {
4861     struct jit_code_entry *next_entry;
4862     struct jit_code_entry *prev_entry;
4863     const void *symfile_addr;
4864     uint64_t symfile_size;
4865 };
4866 
4867 struct jit_descriptor {
4868     uint32_t version;
4869     uint32_t action_flag;
4870     struct jit_code_entry *relevant_entry;
4871     struct jit_code_entry *first_entry;
4872 };
4873 
4874 void __jit_debug_register_code(void) __attribute__((noinline));
4875 void __jit_debug_register_code(void)
4876 {
4877     asm("");
4878 }
4879 
4880 /* Must statically initialize the version, because GDB may check
4881    the version before we can set it.  */
4882 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4883 
4884 /* End GDB interface.  */
4885 
4886 static int find_string(const char *strtab, const char *str)
4887 {
4888     const char *p = strtab + 1;
4889 
4890     while (1) {
4891         if (strcmp(p, str) == 0) {
4892             return p - strtab;
4893         }
4894         p += strlen(p) + 1;
4895     }
4896 }
4897 
4898 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4899                                  const void *debug_frame,
4900                                  size_t debug_frame_size)
4901 {
4902     struct __attribute__((packed)) DebugInfo {
4903         uint32_t  len;
4904         uint16_t  version;
4905         uint32_t  abbrev;
4906         uint8_t   ptr_size;
4907         uint8_t   cu_die;
4908         uint16_t  cu_lang;
4909         uintptr_t cu_low_pc;
4910         uintptr_t cu_high_pc;
4911         uint8_t   fn_die;
4912         char      fn_name[16];
4913         uintptr_t fn_low_pc;
4914         uintptr_t fn_high_pc;
4915         uint8_t   cu_eoc;
4916     };
4917 
4918     struct ElfImage {
4919         ElfW(Ehdr) ehdr;
4920         ElfW(Phdr) phdr;
4921         ElfW(Shdr) shdr[7];
4922         ElfW(Sym)  sym[2];
4923         struct DebugInfo di;
4924         uint8_t    da[24];
4925         char       str[80];
4926     };
4927 
4928     struct ElfImage *img;
4929 
4930     static const struct ElfImage img_template = {
4931         .ehdr = {
4932             .e_ident[EI_MAG0] = ELFMAG0,
4933             .e_ident[EI_MAG1] = ELFMAG1,
4934             .e_ident[EI_MAG2] = ELFMAG2,
4935             .e_ident[EI_MAG3] = ELFMAG3,
4936             .e_ident[EI_CLASS] = ELF_CLASS,
4937             .e_ident[EI_DATA] = ELF_DATA,
4938             .e_ident[EI_VERSION] = EV_CURRENT,
4939             .e_type = ET_EXEC,
4940             .e_machine = ELF_HOST_MACHINE,
4941             .e_version = EV_CURRENT,
4942             .e_phoff = offsetof(struct ElfImage, phdr),
4943             .e_shoff = offsetof(struct ElfImage, shdr),
4944             .e_ehsize = sizeof(ElfW(Shdr)),
4945             .e_phentsize = sizeof(ElfW(Phdr)),
4946             .e_phnum = 1,
4947             .e_shentsize = sizeof(ElfW(Shdr)),
4948             .e_shnum = ARRAY_SIZE(img->shdr),
4949             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4950 #ifdef ELF_HOST_FLAGS
4951             .e_flags = ELF_HOST_FLAGS,
4952 #endif
4953 #ifdef ELF_OSABI
4954             .e_ident[EI_OSABI] = ELF_OSABI,
4955 #endif
4956         },
4957         .phdr = {
4958             .p_type = PT_LOAD,
4959             .p_flags = PF_X,
4960         },
4961         .shdr = {
4962             [0] = { .sh_type = SHT_NULL },
4963             /* Trick: The contents of code_gen_buffer are not present in
4964                this fake ELF file; that got allocated elsewhere.  Therefore
4965                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4966                will not look for contents.  We can record any address.  */
4967             [1] = { /* .text */
4968                 .sh_type = SHT_NOBITS,
4969                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4970             },
4971             [2] = { /* .debug_info */
4972                 .sh_type = SHT_PROGBITS,
4973                 .sh_offset = offsetof(struct ElfImage, di),
4974                 .sh_size = sizeof(struct DebugInfo),
4975             },
4976             [3] = { /* .debug_abbrev */
4977                 .sh_type = SHT_PROGBITS,
4978                 .sh_offset = offsetof(struct ElfImage, da),
4979                 .sh_size = sizeof(img->da),
4980             },
4981             [4] = { /* .debug_frame */
4982                 .sh_type = SHT_PROGBITS,
4983                 .sh_offset = sizeof(struct ElfImage),
4984             },
4985             [5] = { /* .symtab */
4986                 .sh_type = SHT_SYMTAB,
4987                 .sh_offset = offsetof(struct ElfImage, sym),
4988                 .sh_size = sizeof(img->sym),
4989                 .sh_info = 1,
4990                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4991                 .sh_entsize = sizeof(ElfW(Sym)),
4992             },
4993             [6] = { /* .strtab */
4994                 .sh_type = SHT_STRTAB,
4995                 .sh_offset = offsetof(struct ElfImage, str),
4996                 .sh_size = sizeof(img->str),
4997             }
4998         },
4999         .sym = {
5000             [1] = { /* code_gen_buffer */
5001                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5002                 .st_shndx = 1,
5003             }
5004         },
5005         .di = {
5006             .len = sizeof(struct DebugInfo) - 4,
5007             .version = 2,
5008             .ptr_size = sizeof(void *),
5009             .cu_die = 1,
5010             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5011             .fn_die = 2,
5012             .fn_name = "code_gen_buffer"
5013         },
5014         .da = {
5015             1,          /* abbrev number (the cu) */
5016             0x11, 1,    /* DW_TAG_compile_unit, has children */
5017             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5018             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5019             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5020             0, 0,       /* end of abbrev */
5021             2,          /* abbrev number (the fn) */
5022             0x2e, 0,    /* DW_TAG_subprogram, no children */
5023             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5024             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5025             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5026             0, 0,       /* end of abbrev */
5027             0           /* no more abbrev */
5028         },
5029         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5030                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5031     };
5032 
5033     /* We only need a single jit entry; statically allocate it.  */
5034     static struct jit_code_entry one_entry;
5035 
5036     uintptr_t buf = (uintptr_t)buf_ptr;
5037     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5038     DebugFrameHeader *dfh;
5039 
5040     img = g_malloc(img_size);
5041     *img = img_template;
5042 
5043     img->phdr.p_vaddr = buf;
5044     img->phdr.p_paddr = buf;
5045     img->phdr.p_memsz = buf_size;
5046 
5047     img->shdr[1].sh_name = find_string(img->str, ".text");
5048     img->shdr[1].sh_addr = buf;
5049     img->shdr[1].sh_size = buf_size;
5050 
5051     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5052     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5053 
5054     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5055     img->shdr[4].sh_size = debug_frame_size;
5056 
5057     img->shdr[5].sh_name = find_string(img->str, ".symtab");
5058     img->shdr[6].sh_name = find_string(img->str, ".strtab");
5059 
5060     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5061     img->sym[1].st_value = buf;
5062     img->sym[1].st_size = buf_size;
5063 
5064     img->di.cu_low_pc = buf;
5065     img->di.cu_high_pc = buf + buf_size;
5066     img->di.fn_low_pc = buf;
5067     img->di.fn_high_pc = buf + buf_size;
5068 
5069     dfh = (DebugFrameHeader *)(img + 1);
5070     memcpy(dfh, debug_frame, debug_frame_size);
5071     dfh->fde.func_start = buf;
5072     dfh->fde.func_len = buf_size;
5073 
5074 #ifdef DEBUG_JIT
5075     /* Enable this block to be able to debug the ELF image file creation.
5076        One can use readelf, objdump, or other inspection utilities.  */
5077     {
5078         FILE *f = fopen("/tmp/qemu.jit", "w+b");
5079         if (f) {
5080             if (fwrite(img, img_size, 1, f) != img_size) {
5081                 /* Avoid stupid unused return value warning for fwrite.  */
5082             }
5083             fclose(f);
5084         }
5085     }
5086 #endif
5087 
5088     one_entry.symfile_addr = img;
5089     one_entry.symfile_size = img_size;
5090 
5091     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5092     __jit_debug_descriptor.relevant_entry = &one_entry;
5093     __jit_debug_descriptor.first_entry = &one_entry;
5094     __jit_debug_register_code();
5095 }
5096 #else
5097 /* No support for the feature.  Provide the entry point expected by exec.c,
5098    and implement the internal function we declared earlier.  */
5099 
5100 static void tcg_register_jit_int(const void *buf, size_t size,
5101                                  const void *debug_frame,
5102                                  size_t debug_frame_size)
5103 {
5104 }
5105 
5106 void tcg_register_jit(const void *buf, size_t buf_size)
5107 {
5108 }
5109 #endif /* ELF_HOST_MACHINE */
5110 
5111 #if !TCG_TARGET_MAYBE_vec
5112 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5113 {
5114     g_assert_not_reached();
5115 }
5116 #endif
5117