xref: /openbmc/qemu/tcg/tcg.c (revision 31eb7ddd)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 
39 /* Note: the long term plan is to reduce the dependencies on the QEMU
40    CPU definitions. Currently they are used for qemu_ld/st
41    instructions */
42 #define NO_CPU_IO_DEFS
43 #include "cpu.h"
44 
45 #include "exec/exec-all.h"
46 
47 #if !defined(CONFIG_USER_ONLY)
48 #include "hw/boards.h"
49 #endif
50 
51 #include "tcg-op.h"
52 
53 #if UINTPTR_MAX == UINT32_MAX
54 # define ELF_CLASS  ELFCLASS32
55 #else
56 # define ELF_CLASS  ELFCLASS64
57 #endif
58 #ifdef HOST_WORDS_BIGENDIAN
59 # define ELF_DATA   ELFDATA2MSB
60 #else
61 # define ELF_DATA   ELFDATA2LSB
62 #endif
63 
64 #include "elf.h"
65 #include "exec/log.h"
66 #include "sysemu/sysemu.h"
67 
68 /* Forward declarations for functions declared in tcg-target.inc.c and
69    used here. */
70 static void tcg_target_init(TCGContext *s);
71 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
105 static const char *target_parse_constraint(TCGArgConstraint *ct,
106                                            const char *ct_str, TCGType type);
107 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
108                        intptr_t arg2);
109 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
110 static void tcg_out_movi(TCGContext *s, TCGType type,
111                          TCGReg ret, tcg_target_long arg);
112 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
113                        const int *const_args);
114 #if TCG_TARGET_MAYBE_vec
115 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
116                             TCGReg dst, TCGReg src);
117 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, TCGReg base, intptr_t offset);
119 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
120                              TCGReg dst, tcg_target_long arg);
121 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
122                            unsigned vece, const TCGArg *args,
123                            const int *const_args);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
136                                     TCGReg dst, tcg_target_long arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
141                                   unsigned vece, const TCGArg *args,
142                                   const int *const_args)
143 {
144     g_assert_not_reached();
145 }
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148                        intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150                         TCGReg base, intptr_t ofs);
151 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
152 static int tcg_target_const_match(tcg_target_long val, TCGType type,
153                                   const TCGArgConstraint *arg_ct);
154 #ifdef TCG_TARGET_NEED_LDST_LABELS
155 static int tcg_out_ldst_finalize(TCGContext *s);
156 #endif
157 
158 #define TCG_HIGHWATER 1024
159 
160 static TCGContext **tcg_ctxs;
161 static unsigned int n_tcg_ctxs;
162 TCGv_env cpu_env = 0;
163 
164 struct tcg_region_tree {
165     QemuMutex lock;
166     GTree *tree;
167     /* padding to avoid false sharing is computed at run-time */
168 };
169 
170 /*
171  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
172  * dynamically allocate from as demand dictates. Given appropriate region
173  * sizing, this minimizes flushes even when some TCG threads generate a lot
174  * more code than others.
175  */
176 struct tcg_region_state {
177     QemuMutex lock;
178 
179     /* fields set at init time */
180     void *start;
181     void *start_aligned;
182     void *end;
183     size_t n;
184     size_t size; /* size of one region */
185     size_t stride; /* .size + guard size */
186 
187     /* fields protected by the lock */
188     size_t current; /* current region index */
189     size_t agg_size_full; /* aggregate size of full regions */
190 };
191 
192 static struct tcg_region_state region;
193 /*
194  * This is an array of struct tcg_region_tree's, with padding.
195  * We use void * to simplify the computation of region_trees[i]; each
196  * struct is found every tree_size bytes.
197  */
198 static void *region_trees;
199 static size_t tree_size;
200 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
201 static TCGRegSet tcg_target_call_clobber_regs;
202 
203 #if TCG_TARGET_INSN_UNIT_SIZE == 1
204 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
205 {
206     *s->code_ptr++ = v;
207 }
208 
209 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
210                                                       uint8_t v)
211 {
212     *p = v;
213 }
214 #endif
215 
216 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
217 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
218 {
219     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
220         *s->code_ptr++ = v;
221     } else {
222         tcg_insn_unit *p = s->code_ptr;
223         memcpy(p, &v, sizeof(v));
224         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
225     }
226 }
227 
228 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
229                                                        uint16_t v)
230 {
231     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
232         *p = v;
233     } else {
234         memcpy(p, &v, sizeof(v));
235     }
236 }
237 #endif
238 
239 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
240 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
241 {
242     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
243         *s->code_ptr++ = v;
244     } else {
245         tcg_insn_unit *p = s->code_ptr;
246         memcpy(p, &v, sizeof(v));
247         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
248     }
249 }
250 
251 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
252                                                        uint32_t v)
253 {
254     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
255         *p = v;
256     } else {
257         memcpy(p, &v, sizeof(v));
258     }
259 }
260 #endif
261 
262 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
263 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
264 {
265     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
266         *s->code_ptr++ = v;
267     } else {
268         tcg_insn_unit *p = s->code_ptr;
269         memcpy(p, &v, sizeof(v));
270         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
271     }
272 }
273 
274 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
275                                                        uint64_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
278         *p = v;
279     } else {
280         memcpy(p, &v, sizeof(v));
281     }
282 }
283 #endif
284 
285 /* label relocation processing */
286 
287 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
288                           TCGLabel *l, intptr_t addend)
289 {
290     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
291 
292     r->type = type;
293     r->ptr = code_ptr;
294     r->addend = addend;
295     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
296 }
297 
298 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
299 {
300     tcg_debug_assert(!l->has_value);
301     l->has_value = 1;
302     l->u.value_ptr = ptr;
303 }
304 
305 TCGLabel *gen_new_label(void)
306 {
307     TCGContext *s = tcg_ctx;
308     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
309 
310     memset(l, 0, sizeof(TCGLabel));
311     l->id = s->nb_labels++;
312     QSIMPLEQ_INIT(&l->relocs);
313 
314     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
315 
316     return l;
317 }
318 
319 static bool tcg_resolve_relocs(TCGContext *s)
320 {
321     TCGLabel *l;
322 
323     QSIMPLEQ_FOREACH(l, &s->labels, next) {
324         TCGRelocation *r;
325         uintptr_t value = l->u.value;
326 
327         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
328             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
329                 return false;
330             }
331         }
332     }
333     return true;
334 }
335 
336 static void set_jmp_reset_offset(TCGContext *s, int which)
337 {
338     size_t off = tcg_current_code_size(s);
339     s->tb_jmp_reset_offset[which] = off;
340     /* Make sure that we didn't overflow the stored offset.  */
341     assert(s->tb_jmp_reset_offset[which] == off);
342 }
343 
344 #include "tcg-target.inc.c"
345 
346 /* compare a pointer @ptr and a tb_tc @s */
347 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
348 {
349     if (ptr >= s->ptr + s->size) {
350         return 1;
351     } else if (ptr < s->ptr) {
352         return -1;
353     }
354     return 0;
355 }
356 
357 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
358 {
359     const struct tb_tc *a = ap;
360     const struct tb_tc *b = bp;
361 
362     /*
363      * When both sizes are set, we know this isn't a lookup.
364      * This is the most likely case: every TB must be inserted; lookups
365      * are a lot less frequent.
366      */
367     if (likely(a->size && b->size)) {
368         if (a->ptr > b->ptr) {
369             return 1;
370         } else if (a->ptr < b->ptr) {
371             return -1;
372         }
373         /* a->ptr == b->ptr should happen only on deletions */
374         g_assert(a->size == b->size);
375         return 0;
376     }
377     /*
378      * All lookups have either .size field set to 0.
379      * From the glib sources we see that @ap is always the lookup key. However
380      * the docs provide no guarantee, so we just mark this case as likely.
381      */
382     if (likely(a->size == 0)) {
383         return ptr_cmp_tb_tc(a->ptr, b);
384     }
385     return ptr_cmp_tb_tc(b->ptr, a);
386 }
387 
388 static void tcg_region_trees_init(void)
389 {
390     size_t i;
391 
392     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
393     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
394     for (i = 0; i < region.n; i++) {
395         struct tcg_region_tree *rt = region_trees + i * tree_size;
396 
397         qemu_mutex_init(&rt->lock);
398         rt->tree = g_tree_new(tb_tc_cmp);
399     }
400 }
401 
402 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
403 {
404     size_t region_idx;
405 
406     if (p < region.start_aligned) {
407         region_idx = 0;
408     } else {
409         ptrdiff_t offset = p - region.start_aligned;
410 
411         if (offset > region.stride * (region.n - 1)) {
412             region_idx = region.n - 1;
413         } else {
414             region_idx = offset / region.stride;
415         }
416     }
417     return region_trees + region_idx * tree_size;
418 }
419 
420 void tcg_tb_insert(TranslationBlock *tb)
421 {
422     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
423 
424     qemu_mutex_lock(&rt->lock);
425     g_tree_insert(rt->tree, &tb->tc, tb);
426     qemu_mutex_unlock(&rt->lock);
427 }
428 
429 void tcg_tb_remove(TranslationBlock *tb)
430 {
431     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
432 
433     qemu_mutex_lock(&rt->lock);
434     g_tree_remove(rt->tree, &tb->tc);
435     qemu_mutex_unlock(&rt->lock);
436 }
437 
438 /*
439  * Find the TB 'tb' such that
440  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
441  * Return NULL if not found.
442  */
443 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
444 {
445     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
446     TranslationBlock *tb;
447     struct tb_tc s = { .ptr = (void *)tc_ptr };
448 
449     qemu_mutex_lock(&rt->lock);
450     tb = g_tree_lookup(rt->tree, &s);
451     qemu_mutex_unlock(&rt->lock);
452     return tb;
453 }
454 
455 static void tcg_region_tree_lock_all(void)
456 {
457     size_t i;
458 
459     for (i = 0; i < region.n; i++) {
460         struct tcg_region_tree *rt = region_trees + i * tree_size;
461 
462         qemu_mutex_lock(&rt->lock);
463     }
464 }
465 
466 static void tcg_region_tree_unlock_all(void)
467 {
468     size_t i;
469 
470     for (i = 0; i < region.n; i++) {
471         struct tcg_region_tree *rt = region_trees + i * tree_size;
472 
473         qemu_mutex_unlock(&rt->lock);
474     }
475 }
476 
477 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
478 {
479     size_t i;
480 
481     tcg_region_tree_lock_all();
482     for (i = 0; i < region.n; i++) {
483         struct tcg_region_tree *rt = region_trees + i * tree_size;
484 
485         g_tree_foreach(rt->tree, func, user_data);
486     }
487     tcg_region_tree_unlock_all();
488 }
489 
490 size_t tcg_nb_tbs(void)
491 {
492     size_t nb_tbs = 0;
493     size_t i;
494 
495     tcg_region_tree_lock_all();
496     for (i = 0; i < region.n; i++) {
497         struct tcg_region_tree *rt = region_trees + i * tree_size;
498 
499         nb_tbs += g_tree_nnodes(rt->tree);
500     }
501     tcg_region_tree_unlock_all();
502     return nb_tbs;
503 }
504 
505 static void tcg_region_tree_reset_all(void)
506 {
507     size_t i;
508 
509     tcg_region_tree_lock_all();
510     for (i = 0; i < region.n; i++) {
511         struct tcg_region_tree *rt = region_trees + i * tree_size;
512 
513         /* Increment the refcount first so that destroy acts as a reset */
514         g_tree_ref(rt->tree);
515         g_tree_destroy(rt->tree);
516     }
517     tcg_region_tree_unlock_all();
518 }
519 
520 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
521 {
522     void *start, *end;
523 
524     start = region.start_aligned + curr_region * region.stride;
525     end = start + region.size;
526 
527     if (curr_region == 0) {
528         start = region.start;
529     }
530     if (curr_region == region.n - 1) {
531         end = region.end;
532     }
533 
534     *pstart = start;
535     *pend = end;
536 }
537 
538 static void tcg_region_assign(TCGContext *s, size_t curr_region)
539 {
540     void *start, *end;
541 
542     tcg_region_bounds(curr_region, &start, &end);
543 
544     s->code_gen_buffer = start;
545     s->code_gen_ptr = start;
546     s->code_gen_buffer_size = end - start;
547     s->code_gen_highwater = end - TCG_HIGHWATER;
548 }
549 
550 static bool tcg_region_alloc__locked(TCGContext *s)
551 {
552     if (region.current == region.n) {
553         return true;
554     }
555     tcg_region_assign(s, region.current);
556     region.current++;
557     return false;
558 }
559 
560 /*
561  * Request a new region once the one in use has filled up.
562  * Returns true on error.
563  */
564 static bool tcg_region_alloc(TCGContext *s)
565 {
566     bool err;
567     /* read the region size now; alloc__locked will overwrite it on success */
568     size_t size_full = s->code_gen_buffer_size;
569 
570     qemu_mutex_lock(&region.lock);
571     err = tcg_region_alloc__locked(s);
572     if (!err) {
573         region.agg_size_full += size_full - TCG_HIGHWATER;
574     }
575     qemu_mutex_unlock(&region.lock);
576     return err;
577 }
578 
579 /*
580  * Perform a context's first region allocation.
581  * This function does _not_ increment region.agg_size_full.
582  */
583 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
584 {
585     return tcg_region_alloc__locked(s);
586 }
587 
588 /* Call from a safe-work context */
589 void tcg_region_reset_all(void)
590 {
591     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
592     unsigned int i;
593 
594     qemu_mutex_lock(&region.lock);
595     region.current = 0;
596     region.agg_size_full = 0;
597 
598     for (i = 0; i < n_ctxs; i++) {
599         TCGContext *s = atomic_read(&tcg_ctxs[i]);
600         bool err = tcg_region_initial_alloc__locked(s);
601 
602         g_assert(!err);
603     }
604     qemu_mutex_unlock(&region.lock);
605 
606     tcg_region_tree_reset_all();
607 }
608 
609 #ifdef CONFIG_USER_ONLY
610 static size_t tcg_n_regions(void)
611 {
612     return 1;
613 }
614 #else
615 /*
616  * It is likely that some vCPUs will translate more code than others, so we
617  * first try to set more regions than max_cpus, with those regions being of
618  * reasonable size. If that's not possible we make do by evenly dividing
619  * the code_gen_buffer among the vCPUs.
620  */
621 static size_t tcg_n_regions(void)
622 {
623     size_t i;
624 
625     /* Use a single region if all we have is one vCPU thread */
626 #if !defined(CONFIG_USER_ONLY)
627     MachineState *ms = MACHINE(qdev_get_machine());
628     unsigned int max_cpus = ms->smp.max_cpus;
629 #endif
630     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
631         return 1;
632     }
633 
634     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
635     for (i = 8; i > 0; i--) {
636         size_t regions_per_thread = i;
637         size_t region_size;
638 
639         region_size = tcg_init_ctx.code_gen_buffer_size;
640         region_size /= max_cpus * regions_per_thread;
641 
642         if (region_size >= 2 * 1024u * 1024) {
643             return max_cpus * regions_per_thread;
644         }
645     }
646     /* If we can't, then just allocate one region per vCPU thread */
647     return max_cpus;
648 }
649 #endif
650 
651 /*
652  * Initializes region partitioning.
653  *
654  * Called at init time from the parent thread (i.e. the one calling
655  * tcg_context_init), after the target's TCG globals have been set.
656  *
657  * Region partitioning works by splitting code_gen_buffer into separate regions,
658  * and then assigning regions to TCG threads so that the threads can translate
659  * code in parallel without synchronization.
660  *
661  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
662  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
663  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
664  * must have been parsed before calling this function, since it calls
665  * qemu_tcg_mttcg_enabled().
666  *
667  * In user-mode we use a single region.  Having multiple regions in user-mode
668  * is not supported, because the number of vCPU threads (recall that each thread
669  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
670  * OS, and usually this number is huge (tens of thousands is not uncommon).
671  * Thus, given this large bound on the number of vCPU threads and the fact
672  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
673  * that the availability of at least one region per vCPU thread.
674  *
675  * However, this user-mode limitation is unlikely to be a significant problem
676  * in practice. Multi-threaded guests share most if not all of their translated
677  * code, which makes parallel code generation less appealing than in softmmu.
678  */
679 void tcg_region_init(void)
680 {
681     void *buf = tcg_init_ctx.code_gen_buffer;
682     void *aligned;
683     size_t size = tcg_init_ctx.code_gen_buffer_size;
684     size_t page_size = qemu_real_host_page_size;
685     size_t region_size;
686     size_t n_regions;
687     size_t i;
688 
689     n_regions = tcg_n_regions();
690 
691     /* The first region will be 'aligned - buf' bytes larger than the others */
692     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
693     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
694     /*
695      * Make region_size a multiple of page_size, using aligned as the start.
696      * As a result of this we might end up with a few extra pages at the end of
697      * the buffer; we will assign those to the last region.
698      */
699     region_size = (size - (aligned - buf)) / n_regions;
700     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
701 
702     /* A region must have at least 2 pages; one code, one guard */
703     g_assert(region_size >= 2 * page_size);
704 
705     /* init the region struct */
706     qemu_mutex_init(&region.lock);
707     region.n = n_regions;
708     region.size = region_size - page_size;
709     region.stride = region_size;
710     region.start = buf;
711     region.start_aligned = aligned;
712     /* page-align the end, since its last page will be a guard page */
713     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
714     /* account for that last guard page */
715     region.end -= page_size;
716 
717     /* set guard pages */
718     for (i = 0; i < region.n; i++) {
719         void *start, *end;
720         int rc;
721 
722         tcg_region_bounds(i, &start, &end);
723         rc = qemu_mprotect_none(end, page_size);
724         g_assert(!rc);
725     }
726 
727     tcg_region_trees_init();
728 
729     /* In user-mode we support only one ctx, so do the initial allocation now */
730 #ifdef CONFIG_USER_ONLY
731     {
732         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
733 
734         g_assert(!err);
735     }
736 #endif
737 }
738 
739 /*
740  * All TCG threads except the parent (i.e. the one that called tcg_context_init
741  * and registered the target's TCG globals) must register with this function
742  * before initiating translation.
743  *
744  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
745  * of tcg_region_init() for the reasoning behind this.
746  *
747  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
748  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
749  * is not used anymore for translation once this function is called.
750  *
751  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
752  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
753  */
754 #ifdef CONFIG_USER_ONLY
755 void tcg_register_thread(void)
756 {
757     tcg_ctx = &tcg_init_ctx;
758 }
759 #else
760 void tcg_register_thread(void)
761 {
762     MachineState *ms = MACHINE(qdev_get_machine());
763     TCGContext *s = g_malloc(sizeof(*s));
764     unsigned int i, n;
765     bool err;
766 
767     *s = tcg_init_ctx;
768 
769     /* Relink mem_base.  */
770     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
771         if (tcg_init_ctx.temps[i].mem_base) {
772             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
773             tcg_debug_assert(b >= 0 && b < n);
774             s->temps[i].mem_base = &s->temps[b];
775         }
776     }
777 
778     /* Claim an entry in tcg_ctxs */
779     n = atomic_fetch_inc(&n_tcg_ctxs);
780     g_assert(n < ms->smp.max_cpus);
781     atomic_set(&tcg_ctxs[n], s);
782 
783     tcg_ctx = s;
784     qemu_mutex_lock(&region.lock);
785     err = tcg_region_initial_alloc__locked(tcg_ctx);
786     g_assert(!err);
787     qemu_mutex_unlock(&region.lock);
788 }
789 #endif /* !CONFIG_USER_ONLY */
790 
791 /*
792  * Returns the size (in bytes) of all translated code (i.e. from all regions)
793  * currently in the cache.
794  * See also: tcg_code_capacity()
795  * Do not confuse with tcg_current_code_size(); that one applies to a single
796  * TCG context.
797  */
798 size_t tcg_code_size(void)
799 {
800     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
801     unsigned int i;
802     size_t total;
803 
804     qemu_mutex_lock(&region.lock);
805     total = region.agg_size_full;
806     for (i = 0; i < n_ctxs; i++) {
807         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
808         size_t size;
809 
810         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
811         g_assert(size <= s->code_gen_buffer_size);
812         total += size;
813     }
814     qemu_mutex_unlock(&region.lock);
815     return total;
816 }
817 
818 /*
819  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
820  * regions.
821  * See also: tcg_code_size()
822  */
823 size_t tcg_code_capacity(void)
824 {
825     size_t guard_size, capacity;
826 
827     /* no need for synchronization; these variables are set at init time */
828     guard_size = region.stride - region.size;
829     capacity = region.end + guard_size - region.start;
830     capacity -= region.n * (guard_size + TCG_HIGHWATER);
831     return capacity;
832 }
833 
834 size_t tcg_tb_phys_invalidate_count(void)
835 {
836     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
837     unsigned int i;
838     size_t total = 0;
839 
840     for (i = 0; i < n_ctxs; i++) {
841         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
842 
843         total += atomic_read(&s->tb_phys_invalidate_count);
844     }
845     return total;
846 }
847 
848 /* pool based memory allocation */
849 void *tcg_malloc_internal(TCGContext *s, int size)
850 {
851     TCGPool *p;
852     int pool_size;
853 
854     if (size > TCG_POOL_CHUNK_SIZE) {
855         /* big malloc: insert a new pool (XXX: could optimize) */
856         p = g_malloc(sizeof(TCGPool) + size);
857         p->size = size;
858         p->next = s->pool_first_large;
859         s->pool_first_large = p;
860         return p->data;
861     } else {
862         p = s->pool_current;
863         if (!p) {
864             p = s->pool_first;
865             if (!p)
866                 goto new_pool;
867         } else {
868             if (!p->next) {
869             new_pool:
870                 pool_size = TCG_POOL_CHUNK_SIZE;
871                 p = g_malloc(sizeof(TCGPool) + pool_size);
872                 p->size = pool_size;
873                 p->next = NULL;
874                 if (s->pool_current)
875                     s->pool_current->next = p;
876                 else
877                     s->pool_first = p;
878             } else {
879                 p = p->next;
880             }
881         }
882     }
883     s->pool_current = p;
884     s->pool_cur = p->data + size;
885     s->pool_end = p->data + p->size;
886     return p->data;
887 }
888 
889 void tcg_pool_reset(TCGContext *s)
890 {
891     TCGPool *p, *t;
892     for (p = s->pool_first_large; p; p = t) {
893         t = p->next;
894         g_free(p);
895     }
896     s->pool_first_large = NULL;
897     s->pool_cur = s->pool_end = NULL;
898     s->pool_current = NULL;
899 }
900 
901 typedef struct TCGHelperInfo {
902     void *func;
903     const char *name;
904     unsigned flags;
905     unsigned sizemask;
906 } TCGHelperInfo;
907 
908 #include "exec/helper-proto.h"
909 
910 static const TCGHelperInfo all_helpers[] = {
911 #include "exec/helper-tcg.h"
912 };
913 static GHashTable *helper_table;
914 
915 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
916 static void process_op_defs(TCGContext *s);
917 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
918                                             TCGReg reg, const char *name);
919 
920 void tcg_context_init(TCGContext *s)
921 {
922     int op, total_args, n, i;
923     TCGOpDef *def;
924     TCGArgConstraint *args_ct;
925     int *sorted_args;
926     TCGTemp *ts;
927 
928     memset(s, 0, sizeof(*s));
929     s->nb_globals = 0;
930 
931     /* Count total number of arguments and allocate the corresponding
932        space */
933     total_args = 0;
934     for(op = 0; op < NB_OPS; op++) {
935         def = &tcg_op_defs[op];
936         n = def->nb_iargs + def->nb_oargs;
937         total_args += n;
938     }
939 
940     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
941     sorted_args = g_malloc(sizeof(int) * total_args);
942 
943     for(op = 0; op < NB_OPS; op++) {
944         def = &tcg_op_defs[op];
945         def->args_ct = args_ct;
946         def->sorted_args = sorted_args;
947         n = def->nb_iargs + def->nb_oargs;
948         sorted_args += n;
949         args_ct += n;
950     }
951 
952     /* Register helpers.  */
953     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
954     helper_table = g_hash_table_new(NULL, NULL);
955 
956     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
957         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
958                             (gpointer)&all_helpers[i]);
959     }
960 
961     tcg_target_init(s);
962     process_op_defs(s);
963 
964     /* Reverse the order of the saved registers, assuming they're all at
965        the start of tcg_target_reg_alloc_order.  */
966     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
967         int r = tcg_target_reg_alloc_order[n];
968         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
969             break;
970         }
971     }
972     for (i = 0; i < n; ++i) {
973         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
974     }
975     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
976         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
977     }
978 
979     tcg_ctx = s;
980     /*
981      * In user-mode we simply share the init context among threads, since we
982      * use a single region. See the documentation tcg_region_init() for the
983      * reasoning behind this.
984      * In softmmu we will have at most max_cpus TCG threads.
985      */
986 #ifdef CONFIG_USER_ONLY
987     tcg_ctxs = &tcg_ctx;
988     n_tcg_ctxs = 1;
989 #else
990     MachineState *ms = MACHINE(qdev_get_machine());
991     unsigned int max_cpus = ms->smp.max_cpus;
992     tcg_ctxs = g_new(TCGContext *, max_cpus);
993 #endif
994 
995     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
996     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
997     cpu_env = temp_tcgv_ptr(ts);
998 }
999 
1000 /*
1001  * Allocate TBs right before their corresponding translated code, making
1002  * sure that TBs and code are on different cache lines.
1003  */
1004 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1005 {
1006     uintptr_t align = qemu_icache_linesize;
1007     TranslationBlock *tb;
1008     void *next;
1009 
1010  retry:
1011     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1012     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1013 
1014     if (unlikely(next > s->code_gen_highwater)) {
1015         if (tcg_region_alloc(s)) {
1016             return NULL;
1017         }
1018         goto retry;
1019     }
1020     atomic_set(&s->code_gen_ptr, next);
1021     s->data_gen_ptr = NULL;
1022     return tb;
1023 }
1024 
1025 void tcg_prologue_init(TCGContext *s)
1026 {
1027     size_t prologue_size, total_size;
1028     void *buf0, *buf1;
1029 
1030     /* Put the prologue at the beginning of code_gen_buffer.  */
1031     buf0 = s->code_gen_buffer;
1032     total_size = s->code_gen_buffer_size;
1033     s->code_ptr = buf0;
1034     s->code_buf = buf0;
1035     s->data_gen_ptr = NULL;
1036     s->code_gen_prologue = buf0;
1037 
1038     /* Compute a high-water mark, at which we voluntarily flush the buffer
1039        and start over.  The size here is arbitrary, significantly larger
1040        than we expect the code generation for any one opcode to require.  */
1041     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1042 
1043 #ifdef TCG_TARGET_NEED_POOL_LABELS
1044     s->pool_labels = NULL;
1045 #endif
1046 
1047     /* Generate the prologue.  */
1048     tcg_target_qemu_prologue(s);
1049 
1050 #ifdef TCG_TARGET_NEED_POOL_LABELS
1051     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1052     {
1053         int result = tcg_out_pool_finalize(s);
1054         tcg_debug_assert(result == 0);
1055     }
1056 #endif
1057 
1058     buf1 = s->code_ptr;
1059     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1060 
1061     /* Deduct the prologue from the buffer.  */
1062     prologue_size = tcg_current_code_size(s);
1063     s->code_gen_ptr = buf1;
1064     s->code_gen_buffer = buf1;
1065     s->code_buf = buf1;
1066     total_size -= prologue_size;
1067     s->code_gen_buffer_size = total_size;
1068 
1069     tcg_register_jit(s->code_gen_buffer, total_size);
1070 
1071 #ifdef DEBUG_DISAS
1072     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1073         qemu_log_lock();
1074         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1075         if (s->data_gen_ptr) {
1076             size_t code_size = s->data_gen_ptr - buf0;
1077             size_t data_size = prologue_size - code_size;
1078             size_t i;
1079 
1080             log_disas(buf0, code_size);
1081 
1082             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1083                 if (sizeof(tcg_target_ulong) == 8) {
1084                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1085                              (uintptr_t)s->data_gen_ptr + i,
1086                              *(uint64_t *)(s->data_gen_ptr + i));
1087                 } else {
1088                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1089                              (uintptr_t)s->data_gen_ptr + i,
1090                              *(uint32_t *)(s->data_gen_ptr + i));
1091                 }
1092             }
1093         } else {
1094             log_disas(buf0, prologue_size);
1095         }
1096         qemu_log("\n");
1097         qemu_log_flush();
1098         qemu_log_unlock();
1099     }
1100 #endif
1101 
1102     /* Assert that goto_ptr is implemented completely.  */
1103     if (TCG_TARGET_HAS_goto_ptr) {
1104         tcg_debug_assert(s->code_gen_epilogue != NULL);
1105     }
1106 }
1107 
1108 void tcg_func_start(TCGContext *s)
1109 {
1110     tcg_pool_reset(s);
1111     s->nb_temps = s->nb_globals;
1112 
1113     /* No temps have been previously allocated for size or locality.  */
1114     memset(s->free_temps, 0, sizeof(s->free_temps));
1115 
1116     s->nb_ops = 0;
1117     s->nb_labels = 0;
1118     s->current_frame_offset = s->frame_start;
1119 
1120 #ifdef CONFIG_DEBUG_TCG
1121     s->goto_tb_issue_mask = 0;
1122 #endif
1123 
1124     QTAILQ_INIT(&s->ops);
1125     QTAILQ_INIT(&s->free_ops);
1126     QSIMPLEQ_INIT(&s->labels);
1127 }
1128 
1129 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1130 {
1131     int n = s->nb_temps++;
1132     tcg_debug_assert(n < TCG_MAX_TEMPS);
1133     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1134 }
1135 
1136 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1137 {
1138     TCGTemp *ts;
1139 
1140     tcg_debug_assert(s->nb_globals == s->nb_temps);
1141     s->nb_globals++;
1142     ts = tcg_temp_alloc(s);
1143     ts->temp_global = 1;
1144 
1145     return ts;
1146 }
1147 
1148 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1149                                             TCGReg reg, const char *name)
1150 {
1151     TCGTemp *ts;
1152 
1153     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1154         tcg_abort();
1155     }
1156 
1157     ts = tcg_global_alloc(s);
1158     ts->base_type = type;
1159     ts->type = type;
1160     ts->fixed_reg = 1;
1161     ts->reg = reg;
1162     ts->name = name;
1163     tcg_regset_set_reg(s->reserved_regs, reg);
1164 
1165     return ts;
1166 }
1167 
1168 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1169 {
1170     s->frame_start = start;
1171     s->frame_end = start + size;
1172     s->frame_temp
1173         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1174 }
1175 
1176 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1177                                      intptr_t offset, const char *name)
1178 {
1179     TCGContext *s = tcg_ctx;
1180     TCGTemp *base_ts = tcgv_ptr_temp(base);
1181     TCGTemp *ts = tcg_global_alloc(s);
1182     int indirect_reg = 0, bigendian = 0;
1183 #ifdef HOST_WORDS_BIGENDIAN
1184     bigendian = 1;
1185 #endif
1186 
1187     if (!base_ts->fixed_reg) {
1188         /* We do not support double-indirect registers.  */
1189         tcg_debug_assert(!base_ts->indirect_reg);
1190         base_ts->indirect_base = 1;
1191         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1192                             ? 2 : 1);
1193         indirect_reg = 1;
1194     }
1195 
1196     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1197         TCGTemp *ts2 = tcg_global_alloc(s);
1198         char buf[64];
1199 
1200         ts->base_type = TCG_TYPE_I64;
1201         ts->type = TCG_TYPE_I32;
1202         ts->indirect_reg = indirect_reg;
1203         ts->mem_allocated = 1;
1204         ts->mem_base = base_ts;
1205         ts->mem_offset = offset + bigendian * 4;
1206         pstrcpy(buf, sizeof(buf), name);
1207         pstrcat(buf, sizeof(buf), "_0");
1208         ts->name = strdup(buf);
1209 
1210         tcg_debug_assert(ts2 == ts + 1);
1211         ts2->base_type = TCG_TYPE_I64;
1212         ts2->type = TCG_TYPE_I32;
1213         ts2->indirect_reg = indirect_reg;
1214         ts2->mem_allocated = 1;
1215         ts2->mem_base = base_ts;
1216         ts2->mem_offset = offset + (1 - bigendian) * 4;
1217         pstrcpy(buf, sizeof(buf), name);
1218         pstrcat(buf, sizeof(buf), "_1");
1219         ts2->name = strdup(buf);
1220     } else {
1221         ts->base_type = type;
1222         ts->type = type;
1223         ts->indirect_reg = indirect_reg;
1224         ts->mem_allocated = 1;
1225         ts->mem_base = base_ts;
1226         ts->mem_offset = offset;
1227         ts->name = name;
1228     }
1229     return ts;
1230 }
1231 
1232 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1233 {
1234     TCGContext *s = tcg_ctx;
1235     TCGTemp *ts;
1236     int idx, k;
1237 
1238     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1239     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1240     if (idx < TCG_MAX_TEMPS) {
1241         /* There is already an available temp with the right type.  */
1242         clear_bit(idx, s->free_temps[k].l);
1243 
1244         ts = &s->temps[idx];
1245         ts->temp_allocated = 1;
1246         tcg_debug_assert(ts->base_type == type);
1247         tcg_debug_assert(ts->temp_local == temp_local);
1248     } else {
1249         ts = tcg_temp_alloc(s);
1250         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1251             TCGTemp *ts2 = tcg_temp_alloc(s);
1252 
1253             ts->base_type = type;
1254             ts->type = TCG_TYPE_I32;
1255             ts->temp_allocated = 1;
1256             ts->temp_local = temp_local;
1257 
1258             tcg_debug_assert(ts2 == ts + 1);
1259             ts2->base_type = TCG_TYPE_I64;
1260             ts2->type = TCG_TYPE_I32;
1261             ts2->temp_allocated = 1;
1262             ts2->temp_local = temp_local;
1263         } else {
1264             ts->base_type = type;
1265             ts->type = type;
1266             ts->temp_allocated = 1;
1267             ts->temp_local = temp_local;
1268         }
1269     }
1270 
1271 #if defined(CONFIG_DEBUG_TCG)
1272     s->temps_in_use++;
1273 #endif
1274     return ts;
1275 }
1276 
1277 TCGv_vec tcg_temp_new_vec(TCGType type)
1278 {
1279     TCGTemp *t;
1280 
1281 #ifdef CONFIG_DEBUG_TCG
1282     switch (type) {
1283     case TCG_TYPE_V64:
1284         assert(TCG_TARGET_HAS_v64);
1285         break;
1286     case TCG_TYPE_V128:
1287         assert(TCG_TARGET_HAS_v128);
1288         break;
1289     case TCG_TYPE_V256:
1290         assert(TCG_TARGET_HAS_v256);
1291         break;
1292     default:
1293         g_assert_not_reached();
1294     }
1295 #endif
1296 
1297     t = tcg_temp_new_internal(type, 0);
1298     return temp_tcgv_vec(t);
1299 }
1300 
1301 /* Create a new temp of the same type as an existing temp.  */
1302 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1303 {
1304     TCGTemp *t = tcgv_vec_temp(match);
1305 
1306     tcg_debug_assert(t->temp_allocated != 0);
1307 
1308     t = tcg_temp_new_internal(t->base_type, 0);
1309     return temp_tcgv_vec(t);
1310 }
1311 
1312 void tcg_temp_free_internal(TCGTemp *ts)
1313 {
1314     TCGContext *s = tcg_ctx;
1315     int k, idx;
1316 
1317 #if defined(CONFIG_DEBUG_TCG)
1318     s->temps_in_use--;
1319     if (s->temps_in_use < 0) {
1320         fprintf(stderr, "More temporaries freed than allocated!\n");
1321     }
1322 #endif
1323 
1324     tcg_debug_assert(ts->temp_global == 0);
1325     tcg_debug_assert(ts->temp_allocated != 0);
1326     ts->temp_allocated = 0;
1327 
1328     idx = temp_idx(ts);
1329     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1330     set_bit(idx, s->free_temps[k].l);
1331 }
1332 
1333 TCGv_i32 tcg_const_i32(int32_t val)
1334 {
1335     TCGv_i32 t0;
1336     t0 = tcg_temp_new_i32();
1337     tcg_gen_movi_i32(t0, val);
1338     return t0;
1339 }
1340 
1341 TCGv_i64 tcg_const_i64(int64_t val)
1342 {
1343     TCGv_i64 t0;
1344     t0 = tcg_temp_new_i64();
1345     tcg_gen_movi_i64(t0, val);
1346     return t0;
1347 }
1348 
1349 TCGv_i32 tcg_const_local_i32(int32_t val)
1350 {
1351     TCGv_i32 t0;
1352     t0 = tcg_temp_local_new_i32();
1353     tcg_gen_movi_i32(t0, val);
1354     return t0;
1355 }
1356 
1357 TCGv_i64 tcg_const_local_i64(int64_t val)
1358 {
1359     TCGv_i64 t0;
1360     t0 = tcg_temp_local_new_i64();
1361     tcg_gen_movi_i64(t0, val);
1362     return t0;
1363 }
1364 
1365 #if defined(CONFIG_DEBUG_TCG)
1366 void tcg_clear_temp_count(void)
1367 {
1368     TCGContext *s = tcg_ctx;
1369     s->temps_in_use = 0;
1370 }
1371 
1372 int tcg_check_temp_count(void)
1373 {
1374     TCGContext *s = tcg_ctx;
1375     if (s->temps_in_use) {
1376         /* Clear the count so that we don't give another
1377          * warning immediately next time around.
1378          */
1379         s->temps_in_use = 0;
1380         return 1;
1381     }
1382     return 0;
1383 }
1384 #endif
1385 
1386 /* Return true if OP may appear in the opcode stream.
1387    Test the runtime variable that controls each opcode.  */
1388 bool tcg_op_supported(TCGOpcode op)
1389 {
1390     const bool have_vec
1391         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1392 
1393     switch (op) {
1394     case INDEX_op_discard:
1395     case INDEX_op_set_label:
1396     case INDEX_op_call:
1397     case INDEX_op_br:
1398     case INDEX_op_mb:
1399     case INDEX_op_insn_start:
1400     case INDEX_op_exit_tb:
1401     case INDEX_op_goto_tb:
1402     case INDEX_op_qemu_ld_i32:
1403     case INDEX_op_qemu_st_i32:
1404     case INDEX_op_qemu_ld_i64:
1405     case INDEX_op_qemu_st_i64:
1406         return true;
1407 
1408     case INDEX_op_goto_ptr:
1409         return TCG_TARGET_HAS_goto_ptr;
1410 
1411     case INDEX_op_mov_i32:
1412     case INDEX_op_movi_i32:
1413     case INDEX_op_setcond_i32:
1414     case INDEX_op_brcond_i32:
1415     case INDEX_op_ld8u_i32:
1416     case INDEX_op_ld8s_i32:
1417     case INDEX_op_ld16u_i32:
1418     case INDEX_op_ld16s_i32:
1419     case INDEX_op_ld_i32:
1420     case INDEX_op_st8_i32:
1421     case INDEX_op_st16_i32:
1422     case INDEX_op_st_i32:
1423     case INDEX_op_add_i32:
1424     case INDEX_op_sub_i32:
1425     case INDEX_op_mul_i32:
1426     case INDEX_op_and_i32:
1427     case INDEX_op_or_i32:
1428     case INDEX_op_xor_i32:
1429     case INDEX_op_shl_i32:
1430     case INDEX_op_shr_i32:
1431     case INDEX_op_sar_i32:
1432         return true;
1433 
1434     case INDEX_op_movcond_i32:
1435         return TCG_TARGET_HAS_movcond_i32;
1436     case INDEX_op_div_i32:
1437     case INDEX_op_divu_i32:
1438         return TCG_TARGET_HAS_div_i32;
1439     case INDEX_op_rem_i32:
1440     case INDEX_op_remu_i32:
1441         return TCG_TARGET_HAS_rem_i32;
1442     case INDEX_op_div2_i32:
1443     case INDEX_op_divu2_i32:
1444         return TCG_TARGET_HAS_div2_i32;
1445     case INDEX_op_rotl_i32:
1446     case INDEX_op_rotr_i32:
1447         return TCG_TARGET_HAS_rot_i32;
1448     case INDEX_op_deposit_i32:
1449         return TCG_TARGET_HAS_deposit_i32;
1450     case INDEX_op_extract_i32:
1451         return TCG_TARGET_HAS_extract_i32;
1452     case INDEX_op_sextract_i32:
1453         return TCG_TARGET_HAS_sextract_i32;
1454     case INDEX_op_extract2_i32:
1455         return TCG_TARGET_HAS_extract2_i32;
1456     case INDEX_op_add2_i32:
1457         return TCG_TARGET_HAS_add2_i32;
1458     case INDEX_op_sub2_i32:
1459         return TCG_TARGET_HAS_sub2_i32;
1460     case INDEX_op_mulu2_i32:
1461         return TCG_TARGET_HAS_mulu2_i32;
1462     case INDEX_op_muls2_i32:
1463         return TCG_TARGET_HAS_muls2_i32;
1464     case INDEX_op_muluh_i32:
1465         return TCG_TARGET_HAS_muluh_i32;
1466     case INDEX_op_mulsh_i32:
1467         return TCG_TARGET_HAS_mulsh_i32;
1468     case INDEX_op_ext8s_i32:
1469         return TCG_TARGET_HAS_ext8s_i32;
1470     case INDEX_op_ext16s_i32:
1471         return TCG_TARGET_HAS_ext16s_i32;
1472     case INDEX_op_ext8u_i32:
1473         return TCG_TARGET_HAS_ext8u_i32;
1474     case INDEX_op_ext16u_i32:
1475         return TCG_TARGET_HAS_ext16u_i32;
1476     case INDEX_op_bswap16_i32:
1477         return TCG_TARGET_HAS_bswap16_i32;
1478     case INDEX_op_bswap32_i32:
1479         return TCG_TARGET_HAS_bswap32_i32;
1480     case INDEX_op_not_i32:
1481         return TCG_TARGET_HAS_not_i32;
1482     case INDEX_op_neg_i32:
1483         return TCG_TARGET_HAS_neg_i32;
1484     case INDEX_op_andc_i32:
1485         return TCG_TARGET_HAS_andc_i32;
1486     case INDEX_op_orc_i32:
1487         return TCG_TARGET_HAS_orc_i32;
1488     case INDEX_op_eqv_i32:
1489         return TCG_TARGET_HAS_eqv_i32;
1490     case INDEX_op_nand_i32:
1491         return TCG_TARGET_HAS_nand_i32;
1492     case INDEX_op_nor_i32:
1493         return TCG_TARGET_HAS_nor_i32;
1494     case INDEX_op_clz_i32:
1495         return TCG_TARGET_HAS_clz_i32;
1496     case INDEX_op_ctz_i32:
1497         return TCG_TARGET_HAS_ctz_i32;
1498     case INDEX_op_ctpop_i32:
1499         return TCG_TARGET_HAS_ctpop_i32;
1500 
1501     case INDEX_op_brcond2_i32:
1502     case INDEX_op_setcond2_i32:
1503         return TCG_TARGET_REG_BITS == 32;
1504 
1505     case INDEX_op_mov_i64:
1506     case INDEX_op_movi_i64:
1507     case INDEX_op_setcond_i64:
1508     case INDEX_op_brcond_i64:
1509     case INDEX_op_ld8u_i64:
1510     case INDEX_op_ld8s_i64:
1511     case INDEX_op_ld16u_i64:
1512     case INDEX_op_ld16s_i64:
1513     case INDEX_op_ld32u_i64:
1514     case INDEX_op_ld32s_i64:
1515     case INDEX_op_ld_i64:
1516     case INDEX_op_st8_i64:
1517     case INDEX_op_st16_i64:
1518     case INDEX_op_st32_i64:
1519     case INDEX_op_st_i64:
1520     case INDEX_op_add_i64:
1521     case INDEX_op_sub_i64:
1522     case INDEX_op_mul_i64:
1523     case INDEX_op_and_i64:
1524     case INDEX_op_or_i64:
1525     case INDEX_op_xor_i64:
1526     case INDEX_op_shl_i64:
1527     case INDEX_op_shr_i64:
1528     case INDEX_op_sar_i64:
1529     case INDEX_op_ext_i32_i64:
1530     case INDEX_op_extu_i32_i64:
1531         return TCG_TARGET_REG_BITS == 64;
1532 
1533     case INDEX_op_movcond_i64:
1534         return TCG_TARGET_HAS_movcond_i64;
1535     case INDEX_op_div_i64:
1536     case INDEX_op_divu_i64:
1537         return TCG_TARGET_HAS_div_i64;
1538     case INDEX_op_rem_i64:
1539     case INDEX_op_remu_i64:
1540         return TCG_TARGET_HAS_rem_i64;
1541     case INDEX_op_div2_i64:
1542     case INDEX_op_divu2_i64:
1543         return TCG_TARGET_HAS_div2_i64;
1544     case INDEX_op_rotl_i64:
1545     case INDEX_op_rotr_i64:
1546         return TCG_TARGET_HAS_rot_i64;
1547     case INDEX_op_deposit_i64:
1548         return TCG_TARGET_HAS_deposit_i64;
1549     case INDEX_op_extract_i64:
1550         return TCG_TARGET_HAS_extract_i64;
1551     case INDEX_op_sextract_i64:
1552         return TCG_TARGET_HAS_sextract_i64;
1553     case INDEX_op_extract2_i64:
1554         return TCG_TARGET_HAS_extract2_i64;
1555     case INDEX_op_extrl_i64_i32:
1556         return TCG_TARGET_HAS_extrl_i64_i32;
1557     case INDEX_op_extrh_i64_i32:
1558         return TCG_TARGET_HAS_extrh_i64_i32;
1559     case INDEX_op_ext8s_i64:
1560         return TCG_TARGET_HAS_ext8s_i64;
1561     case INDEX_op_ext16s_i64:
1562         return TCG_TARGET_HAS_ext16s_i64;
1563     case INDEX_op_ext32s_i64:
1564         return TCG_TARGET_HAS_ext32s_i64;
1565     case INDEX_op_ext8u_i64:
1566         return TCG_TARGET_HAS_ext8u_i64;
1567     case INDEX_op_ext16u_i64:
1568         return TCG_TARGET_HAS_ext16u_i64;
1569     case INDEX_op_ext32u_i64:
1570         return TCG_TARGET_HAS_ext32u_i64;
1571     case INDEX_op_bswap16_i64:
1572         return TCG_TARGET_HAS_bswap16_i64;
1573     case INDEX_op_bswap32_i64:
1574         return TCG_TARGET_HAS_bswap32_i64;
1575     case INDEX_op_bswap64_i64:
1576         return TCG_TARGET_HAS_bswap64_i64;
1577     case INDEX_op_not_i64:
1578         return TCG_TARGET_HAS_not_i64;
1579     case INDEX_op_neg_i64:
1580         return TCG_TARGET_HAS_neg_i64;
1581     case INDEX_op_andc_i64:
1582         return TCG_TARGET_HAS_andc_i64;
1583     case INDEX_op_orc_i64:
1584         return TCG_TARGET_HAS_orc_i64;
1585     case INDEX_op_eqv_i64:
1586         return TCG_TARGET_HAS_eqv_i64;
1587     case INDEX_op_nand_i64:
1588         return TCG_TARGET_HAS_nand_i64;
1589     case INDEX_op_nor_i64:
1590         return TCG_TARGET_HAS_nor_i64;
1591     case INDEX_op_clz_i64:
1592         return TCG_TARGET_HAS_clz_i64;
1593     case INDEX_op_ctz_i64:
1594         return TCG_TARGET_HAS_ctz_i64;
1595     case INDEX_op_ctpop_i64:
1596         return TCG_TARGET_HAS_ctpop_i64;
1597     case INDEX_op_add2_i64:
1598         return TCG_TARGET_HAS_add2_i64;
1599     case INDEX_op_sub2_i64:
1600         return TCG_TARGET_HAS_sub2_i64;
1601     case INDEX_op_mulu2_i64:
1602         return TCG_TARGET_HAS_mulu2_i64;
1603     case INDEX_op_muls2_i64:
1604         return TCG_TARGET_HAS_muls2_i64;
1605     case INDEX_op_muluh_i64:
1606         return TCG_TARGET_HAS_muluh_i64;
1607     case INDEX_op_mulsh_i64:
1608         return TCG_TARGET_HAS_mulsh_i64;
1609 
1610     case INDEX_op_mov_vec:
1611     case INDEX_op_dup_vec:
1612     case INDEX_op_dupi_vec:
1613     case INDEX_op_dupm_vec:
1614     case INDEX_op_ld_vec:
1615     case INDEX_op_st_vec:
1616     case INDEX_op_add_vec:
1617     case INDEX_op_sub_vec:
1618     case INDEX_op_and_vec:
1619     case INDEX_op_or_vec:
1620     case INDEX_op_xor_vec:
1621     case INDEX_op_cmp_vec:
1622         return have_vec;
1623     case INDEX_op_dup2_vec:
1624         return have_vec && TCG_TARGET_REG_BITS == 32;
1625     case INDEX_op_not_vec:
1626         return have_vec && TCG_TARGET_HAS_not_vec;
1627     case INDEX_op_neg_vec:
1628         return have_vec && TCG_TARGET_HAS_neg_vec;
1629     case INDEX_op_abs_vec:
1630         return have_vec && TCG_TARGET_HAS_abs_vec;
1631     case INDEX_op_andc_vec:
1632         return have_vec && TCG_TARGET_HAS_andc_vec;
1633     case INDEX_op_orc_vec:
1634         return have_vec && TCG_TARGET_HAS_orc_vec;
1635     case INDEX_op_mul_vec:
1636         return have_vec && TCG_TARGET_HAS_mul_vec;
1637     case INDEX_op_shli_vec:
1638     case INDEX_op_shri_vec:
1639     case INDEX_op_sari_vec:
1640         return have_vec && TCG_TARGET_HAS_shi_vec;
1641     case INDEX_op_shls_vec:
1642     case INDEX_op_shrs_vec:
1643     case INDEX_op_sars_vec:
1644         return have_vec && TCG_TARGET_HAS_shs_vec;
1645     case INDEX_op_shlv_vec:
1646     case INDEX_op_shrv_vec:
1647     case INDEX_op_sarv_vec:
1648         return have_vec && TCG_TARGET_HAS_shv_vec;
1649     case INDEX_op_ssadd_vec:
1650     case INDEX_op_usadd_vec:
1651     case INDEX_op_sssub_vec:
1652     case INDEX_op_ussub_vec:
1653         return have_vec && TCG_TARGET_HAS_sat_vec;
1654     case INDEX_op_smin_vec:
1655     case INDEX_op_umin_vec:
1656     case INDEX_op_smax_vec:
1657     case INDEX_op_umax_vec:
1658         return have_vec && TCG_TARGET_HAS_minmax_vec;
1659     case INDEX_op_bitsel_vec:
1660         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1661     case INDEX_op_cmpsel_vec:
1662         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1663 
1664     default:
1665         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1666         return true;
1667     }
1668 }
1669 
1670 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1671    and endian swap. Maybe it would be better to do the alignment
1672    and endian swap in tcg_reg_alloc_call(). */
1673 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1674 {
1675     int i, real_args, nb_rets, pi;
1676     unsigned sizemask, flags;
1677     TCGHelperInfo *info;
1678     TCGOp *op;
1679 
1680     info = g_hash_table_lookup(helper_table, (gpointer)func);
1681     flags = info->flags;
1682     sizemask = info->sizemask;
1683 
1684 #if defined(__sparc__) && !defined(__arch64__) \
1685     && !defined(CONFIG_TCG_INTERPRETER)
1686     /* We have 64-bit values in one register, but need to pass as two
1687        separate parameters.  Split them.  */
1688     int orig_sizemask = sizemask;
1689     int orig_nargs = nargs;
1690     TCGv_i64 retl, reth;
1691     TCGTemp *split_args[MAX_OPC_PARAM];
1692 
1693     retl = NULL;
1694     reth = NULL;
1695     if (sizemask != 0) {
1696         for (i = real_args = 0; i < nargs; ++i) {
1697             int is_64bit = sizemask & (1 << (i+1)*2);
1698             if (is_64bit) {
1699                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1700                 TCGv_i32 h = tcg_temp_new_i32();
1701                 TCGv_i32 l = tcg_temp_new_i32();
1702                 tcg_gen_extr_i64_i32(l, h, orig);
1703                 split_args[real_args++] = tcgv_i32_temp(h);
1704                 split_args[real_args++] = tcgv_i32_temp(l);
1705             } else {
1706                 split_args[real_args++] = args[i];
1707             }
1708         }
1709         nargs = real_args;
1710         args = split_args;
1711         sizemask = 0;
1712     }
1713 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1714     for (i = 0; i < nargs; ++i) {
1715         int is_64bit = sizemask & (1 << (i+1)*2);
1716         int is_signed = sizemask & (2 << (i+1)*2);
1717         if (!is_64bit) {
1718             TCGv_i64 temp = tcg_temp_new_i64();
1719             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1720             if (is_signed) {
1721                 tcg_gen_ext32s_i64(temp, orig);
1722             } else {
1723                 tcg_gen_ext32u_i64(temp, orig);
1724             }
1725             args[i] = tcgv_i64_temp(temp);
1726         }
1727     }
1728 #endif /* TCG_TARGET_EXTEND_ARGS */
1729 
1730     op = tcg_emit_op(INDEX_op_call);
1731 
1732     pi = 0;
1733     if (ret != NULL) {
1734 #if defined(__sparc__) && !defined(__arch64__) \
1735     && !defined(CONFIG_TCG_INTERPRETER)
1736         if (orig_sizemask & 1) {
1737             /* The 32-bit ABI is going to return the 64-bit value in
1738                the %o0/%o1 register pair.  Prepare for this by using
1739                two return temporaries, and reassemble below.  */
1740             retl = tcg_temp_new_i64();
1741             reth = tcg_temp_new_i64();
1742             op->args[pi++] = tcgv_i64_arg(reth);
1743             op->args[pi++] = tcgv_i64_arg(retl);
1744             nb_rets = 2;
1745         } else {
1746             op->args[pi++] = temp_arg(ret);
1747             nb_rets = 1;
1748         }
1749 #else
1750         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1751 #ifdef HOST_WORDS_BIGENDIAN
1752             op->args[pi++] = temp_arg(ret + 1);
1753             op->args[pi++] = temp_arg(ret);
1754 #else
1755             op->args[pi++] = temp_arg(ret);
1756             op->args[pi++] = temp_arg(ret + 1);
1757 #endif
1758             nb_rets = 2;
1759         } else {
1760             op->args[pi++] = temp_arg(ret);
1761             nb_rets = 1;
1762         }
1763 #endif
1764     } else {
1765         nb_rets = 0;
1766     }
1767     TCGOP_CALLO(op) = nb_rets;
1768 
1769     real_args = 0;
1770     for (i = 0; i < nargs; i++) {
1771         int is_64bit = sizemask & (1 << (i+1)*2);
1772         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1773 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1774             /* some targets want aligned 64 bit args */
1775             if (real_args & 1) {
1776                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1777                 real_args++;
1778             }
1779 #endif
1780            /* If stack grows up, then we will be placing successive
1781               arguments at lower addresses, which means we need to
1782               reverse the order compared to how we would normally
1783               treat either big or little-endian.  For those arguments
1784               that will wind up in registers, this still works for
1785               HPPA (the only current STACK_GROWSUP target) since the
1786               argument registers are *also* allocated in decreasing
1787               order.  If another such target is added, this logic may
1788               have to get more complicated to differentiate between
1789               stack arguments and register arguments.  */
1790 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1791             op->args[pi++] = temp_arg(args[i] + 1);
1792             op->args[pi++] = temp_arg(args[i]);
1793 #else
1794             op->args[pi++] = temp_arg(args[i]);
1795             op->args[pi++] = temp_arg(args[i] + 1);
1796 #endif
1797             real_args += 2;
1798             continue;
1799         }
1800 
1801         op->args[pi++] = temp_arg(args[i]);
1802         real_args++;
1803     }
1804     op->args[pi++] = (uintptr_t)func;
1805     op->args[pi++] = flags;
1806     TCGOP_CALLI(op) = real_args;
1807 
1808     /* Make sure the fields didn't overflow.  */
1809     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1810     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1811 
1812 #if defined(__sparc__) && !defined(__arch64__) \
1813     && !defined(CONFIG_TCG_INTERPRETER)
1814     /* Free all of the parts we allocated above.  */
1815     for (i = real_args = 0; i < orig_nargs; ++i) {
1816         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1817         if (is_64bit) {
1818             tcg_temp_free_internal(args[real_args++]);
1819             tcg_temp_free_internal(args[real_args++]);
1820         } else {
1821             real_args++;
1822         }
1823     }
1824     if (orig_sizemask & 1) {
1825         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1826            Note that describing these as TCGv_i64 eliminates an unnecessary
1827            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1828         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1829         tcg_temp_free_i64(retl);
1830         tcg_temp_free_i64(reth);
1831     }
1832 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1833     for (i = 0; i < nargs; ++i) {
1834         int is_64bit = sizemask & (1 << (i+1)*2);
1835         if (!is_64bit) {
1836             tcg_temp_free_internal(args[i]);
1837         }
1838     }
1839 #endif /* TCG_TARGET_EXTEND_ARGS */
1840 }
1841 
1842 static void tcg_reg_alloc_start(TCGContext *s)
1843 {
1844     int i, n;
1845     TCGTemp *ts;
1846 
1847     for (i = 0, n = s->nb_globals; i < n; i++) {
1848         ts = &s->temps[i];
1849         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1850     }
1851     for (n = s->nb_temps; i < n; i++) {
1852         ts = &s->temps[i];
1853         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1854         ts->mem_allocated = 0;
1855         ts->fixed_reg = 0;
1856     }
1857 
1858     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1859 }
1860 
1861 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1862                                  TCGTemp *ts)
1863 {
1864     int idx = temp_idx(ts);
1865 
1866     if (ts->temp_global) {
1867         pstrcpy(buf, buf_size, ts->name);
1868     } else if (ts->temp_local) {
1869         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1870     } else {
1871         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1872     }
1873     return buf;
1874 }
1875 
1876 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1877                              int buf_size, TCGArg arg)
1878 {
1879     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1880 }
1881 
1882 /* Find helper name.  */
1883 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1884 {
1885     const char *ret = NULL;
1886     if (helper_table) {
1887         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1888         if (info) {
1889             ret = info->name;
1890         }
1891     }
1892     return ret;
1893 }
1894 
1895 static const char * const cond_name[] =
1896 {
1897     [TCG_COND_NEVER] = "never",
1898     [TCG_COND_ALWAYS] = "always",
1899     [TCG_COND_EQ] = "eq",
1900     [TCG_COND_NE] = "ne",
1901     [TCG_COND_LT] = "lt",
1902     [TCG_COND_GE] = "ge",
1903     [TCG_COND_LE] = "le",
1904     [TCG_COND_GT] = "gt",
1905     [TCG_COND_LTU] = "ltu",
1906     [TCG_COND_GEU] = "geu",
1907     [TCG_COND_LEU] = "leu",
1908     [TCG_COND_GTU] = "gtu"
1909 };
1910 
1911 static const char * const ldst_name[] =
1912 {
1913     [MO_UB]   = "ub",
1914     [MO_SB]   = "sb",
1915     [MO_LEUW] = "leuw",
1916     [MO_LESW] = "lesw",
1917     [MO_LEUL] = "leul",
1918     [MO_LESL] = "lesl",
1919     [MO_LEQ]  = "leq",
1920     [MO_BEUW] = "beuw",
1921     [MO_BESW] = "besw",
1922     [MO_BEUL] = "beul",
1923     [MO_BESL] = "besl",
1924     [MO_BEQ]  = "beq",
1925 };
1926 
1927 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1928 #ifdef ALIGNED_ONLY
1929     [MO_UNALN >> MO_ASHIFT]    = "un+",
1930     [MO_ALIGN >> MO_ASHIFT]    = "",
1931 #else
1932     [MO_UNALN >> MO_ASHIFT]    = "",
1933     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1934 #endif
1935     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1936     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1937     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1938     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1939     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1940     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1941 };
1942 
1943 static inline bool tcg_regset_single(TCGRegSet d)
1944 {
1945     return (d & (d - 1)) == 0;
1946 }
1947 
1948 static inline TCGReg tcg_regset_first(TCGRegSet d)
1949 {
1950     if (TCG_TARGET_NB_REGS <= 32) {
1951         return ctz32(d);
1952     } else {
1953         return ctz64(d);
1954     }
1955 }
1956 
1957 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1958 {
1959     char buf[128];
1960     TCGOp *op;
1961 
1962     QTAILQ_FOREACH(op, &s->ops, link) {
1963         int i, k, nb_oargs, nb_iargs, nb_cargs;
1964         const TCGOpDef *def;
1965         TCGOpcode c;
1966         int col = 0;
1967 
1968         c = op->opc;
1969         def = &tcg_op_defs[c];
1970 
1971         if (c == INDEX_op_insn_start) {
1972             nb_oargs = 0;
1973             col += qemu_log("\n ----");
1974 
1975             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1976                 target_ulong a;
1977 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1978                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1979 #else
1980                 a = op->args[i];
1981 #endif
1982                 col += qemu_log(" " TARGET_FMT_lx, a);
1983             }
1984         } else if (c == INDEX_op_call) {
1985             /* variable number of arguments */
1986             nb_oargs = TCGOP_CALLO(op);
1987             nb_iargs = TCGOP_CALLI(op);
1988             nb_cargs = def->nb_cargs;
1989 
1990             /* function name, flags, out args */
1991             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1992                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1993                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1994             for (i = 0; i < nb_oargs; i++) {
1995                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1996                                                        op->args[i]));
1997             }
1998             for (i = 0; i < nb_iargs; i++) {
1999                 TCGArg arg = op->args[nb_oargs + i];
2000                 const char *t = "<dummy>";
2001                 if (arg != TCG_CALL_DUMMY_ARG) {
2002                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2003                 }
2004                 col += qemu_log(",%s", t);
2005             }
2006         } else {
2007             col += qemu_log(" %s ", def->name);
2008 
2009             nb_oargs = def->nb_oargs;
2010             nb_iargs = def->nb_iargs;
2011             nb_cargs = def->nb_cargs;
2012 
2013             if (def->flags & TCG_OPF_VECTOR) {
2014                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2015                                 8 << TCGOP_VECE(op));
2016             }
2017 
2018             k = 0;
2019             for (i = 0; i < nb_oargs; i++) {
2020                 if (k != 0) {
2021                     col += qemu_log(",");
2022                 }
2023                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2024                                                       op->args[k++]));
2025             }
2026             for (i = 0; i < nb_iargs; i++) {
2027                 if (k != 0) {
2028                     col += qemu_log(",");
2029                 }
2030                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2031                                                       op->args[k++]));
2032             }
2033             switch (c) {
2034             case INDEX_op_brcond_i32:
2035             case INDEX_op_setcond_i32:
2036             case INDEX_op_movcond_i32:
2037             case INDEX_op_brcond2_i32:
2038             case INDEX_op_setcond2_i32:
2039             case INDEX_op_brcond_i64:
2040             case INDEX_op_setcond_i64:
2041             case INDEX_op_movcond_i64:
2042             case INDEX_op_cmp_vec:
2043             case INDEX_op_cmpsel_vec:
2044                 if (op->args[k] < ARRAY_SIZE(cond_name)
2045                     && cond_name[op->args[k]]) {
2046                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2047                 } else {
2048                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2049                 }
2050                 i = 1;
2051                 break;
2052             case INDEX_op_qemu_ld_i32:
2053             case INDEX_op_qemu_st_i32:
2054             case INDEX_op_qemu_ld_i64:
2055             case INDEX_op_qemu_st_i64:
2056                 {
2057                     TCGMemOpIdx oi = op->args[k++];
2058                     TCGMemOp op = get_memop(oi);
2059                     unsigned ix = get_mmuidx(oi);
2060 
2061                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2062                         col += qemu_log(",$0x%x,%u", op, ix);
2063                     } else {
2064                         const char *s_al, *s_op;
2065                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2066                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2067                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2068                     }
2069                     i = 1;
2070                 }
2071                 break;
2072             default:
2073                 i = 0;
2074                 break;
2075             }
2076             switch (c) {
2077             case INDEX_op_set_label:
2078             case INDEX_op_br:
2079             case INDEX_op_brcond_i32:
2080             case INDEX_op_brcond_i64:
2081             case INDEX_op_brcond2_i32:
2082                 col += qemu_log("%s$L%d", k ? "," : "",
2083                                 arg_label(op->args[k])->id);
2084                 i++, k++;
2085                 break;
2086             default:
2087                 break;
2088             }
2089             for (; i < nb_cargs; i++, k++) {
2090                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2091             }
2092         }
2093 
2094         if (have_prefs || op->life) {
2095             for (; col < 40; ++col) {
2096                 putc(' ', qemu_logfile);
2097             }
2098         }
2099 
2100         if (op->life) {
2101             unsigned life = op->life;
2102 
2103             if (life & (SYNC_ARG * 3)) {
2104                 qemu_log("  sync:");
2105                 for (i = 0; i < 2; ++i) {
2106                     if (life & (SYNC_ARG << i)) {
2107                         qemu_log(" %d", i);
2108                     }
2109                 }
2110             }
2111             life /= DEAD_ARG;
2112             if (life) {
2113                 qemu_log("  dead:");
2114                 for (i = 0; life; ++i, life >>= 1) {
2115                     if (life & 1) {
2116                         qemu_log(" %d", i);
2117                     }
2118                 }
2119             }
2120         }
2121 
2122         if (have_prefs) {
2123             for (i = 0; i < nb_oargs; ++i) {
2124                 TCGRegSet set = op->output_pref[i];
2125 
2126                 if (i == 0) {
2127                     qemu_log("  pref=");
2128                 } else {
2129                     qemu_log(",");
2130                 }
2131                 if (set == 0) {
2132                     qemu_log("none");
2133                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2134                     qemu_log("all");
2135 #ifdef CONFIG_DEBUG_TCG
2136                 } else if (tcg_regset_single(set)) {
2137                     TCGReg reg = tcg_regset_first(set);
2138                     qemu_log("%s", tcg_target_reg_names[reg]);
2139 #endif
2140                 } else if (TCG_TARGET_NB_REGS <= 32) {
2141                     qemu_log("%#x", (uint32_t)set);
2142                 } else {
2143                     qemu_log("%#" PRIx64, (uint64_t)set);
2144                 }
2145             }
2146         }
2147 
2148         qemu_log("\n");
2149     }
2150 }
2151 
2152 /* we give more priority to constraints with less registers */
2153 static int get_constraint_priority(const TCGOpDef *def, int k)
2154 {
2155     const TCGArgConstraint *arg_ct;
2156 
2157     int i, n;
2158     arg_ct = &def->args_ct[k];
2159     if (arg_ct->ct & TCG_CT_ALIAS) {
2160         /* an alias is equivalent to a single register */
2161         n = 1;
2162     } else {
2163         if (!(arg_ct->ct & TCG_CT_REG))
2164             return 0;
2165         n = 0;
2166         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2167             if (tcg_regset_test_reg(arg_ct->u.regs, i))
2168                 n++;
2169         }
2170     }
2171     return TCG_TARGET_NB_REGS - n + 1;
2172 }
2173 
2174 /* sort from highest priority to lowest */
2175 static void sort_constraints(TCGOpDef *def, int start, int n)
2176 {
2177     int i, j, p1, p2, tmp;
2178 
2179     for(i = 0; i < n; i++)
2180         def->sorted_args[start + i] = start + i;
2181     if (n <= 1)
2182         return;
2183     for(i = 0; i < n - 1; i++) {
2184         for(j = i + 1; j < n; j++) {
2185             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2186             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2187             if (p1 < p2) {
2188                 tmp = def->sorted_args[start + i];
2189                 def->sorted_args[start + i] = def->sorted_args[start + j];
2190                 def->sorted_args[start + j] = tmp;
2191             }
2192         }
2193     }
2194 }
2195 
2196 static void process_op_defs(TCGContext *s)
2197 {
2198     TCGOpcode op;
2199 
2200     for (op = 0; op < NB_OPS; op++) {
2201         TCGOpDef *def = &tcg_op_defs[op];
2202         const TCGTargetOpDef *tdefs;
2203         TCGType type;
2204         int i, nb_args;
2205 
2206         if (def->flags & TCG_OPF_NOT_PRESENT) {
2207             continue;
2208         }
2209 
2210         nb_args = def->nb_iargs + def->nb_oargs;
2211         if (nb_args == 0) {
2212             continue;
2213         }
2214 
2215         tdefs = tcg_target_op_def(op);
2216         /* Missing TCGTargetOpDef entry. */
2217         tcg_debug_assert(tdefs != NULL);
2218 
2219         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2220         for (i = 0; i < nb_args; i++) {
2221             const char *ct_str = tdefs->args_ct_str[i];
2222             /* Incomplete TCGTargetOpDef entry. */
2223             tcg_debug_assert(ct_str != NULL);
2224 
2225             def->args_ct[i].u.regs = 0;
2226             def->args_ct[i].ct = 0;
2227             while (*ct_str != '\0') {
2228                 switch(*ct_str) {
2229                 case '0' ... '9':
2230                     {
2231                         int oarg = *ct_str - '0';
2232                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2233                         tcg_debug_assert(oarg < def->nb_oargs);
2234                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2235                         /* TCG_CT_ALIAS is for the output arguments.
2236                            The input is tagged with TCG_CT_IALIAS. */
2237                         def->args_ct[i] = def->args_ct[oarg];
2238                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2239                         def->args_ct[oarg].alias_index = i;
2240                         def->args_ct[i].ct |= TCG_CT_IALIAS;
2241                         def->args_ct[i].alias_index = oarg;
2242                     }
2243                     ct_str++;
2244                     break;
2245                 case '&':
2246                     def->args_ct[i].ct |= TCG_CT_NEWREG;
2247                     ct_str++;
2248                     break;
2249                 case 'i':
2250                     def->args_ct[i].ct |= TCG_CT_CONST;
2251                     ct_str++;
2252                     break;
2253                 default:
2254                     ct_str = target_parse_constraint(&def->args_ct[i],
2255                                                      ct_str, type);
2256                     /* Typo in TCGTargetOpDef constraint. */
2257                     tcg_debug_assert(ct_str != NULL);
2258                 }
2259             }
2260         }
2261 
2262         /* TCGTargetOpDef entry with too much information? */
2263         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2264 
2265         /* sort the constraints (XXX: this is just an heuristic) */
2266         sort_constraints(def, 0, def->nb_oargs);
2267         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2268     }
2269 }
2270 
2271 void tcg_op_remove(TCGContext *s, TCGOp *op)
2272 {
2273     TCGLabel *label;
2274 
2275     switch (op->opc) {
2276     case INDEX_op_br:
2277         label = arg_label(op->args[0]);
2278         label->refs--;
2279         break;
2280     case INDEX_op_brcond_i32:
2281     case INDEX_op_brcond_i64:
2282         label = arg_label(op->args[3]);
2283         label->refs--;
2284         break;
2285     case INDEX_op_brcond2_i32:
2286         label = arg_label(op->args[5]);
2287         label->refs--;
2288         break;
2289     default:
2290         break;
2291     }
2292 
2293     QTAILQ_REMOVE(&s->ops, op, link);
2294     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2295     s->nb_ops--;
2296 
2297 #ifdef CONFIG_PROFILER
2298     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2299 #endif
2300 }
2301 
2302 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2303 {
2304     TCGContext *s = tcg_ctx;
2305     TCGOp *op;
2306 
2307     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2308         op = tcg_malloc(sizeof(TCGOp));
2309     } else {
2310         op = QTAILQ_FIRST(&s->free_ops);
2311         QTAILQ_REMOVE(&s->free_ops, op, link);
2312     }
2313     memset(op, 0, offsetof(TCGOp, link));
2314     op->opc = opc;
2315     s->nb_ops++;
2316 
2317     return op;
2318 }
2319 
2320 TCGOp *tcg_emit_op(TCGOpcode opc)
2321 {
2322     TCGOp *op = tcg_op_alloc(opc);
2323     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2324     return op;
2325 }
2326 
2327 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2328 {
2329     TCGOp *new_op = tcg_op_alloc(opc);
2330     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2331     return new_op;
2332 }
2333 
2334 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2335 {
2336     TCGOp *new_op = tcg_op_alloc(opc);
2337     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2338     return new_op;
2339 }
2340 
2341 /* Reachable analysis : remove unreachable code.  */
2342 static void reachable_code_pass(TCGContext *s)
2343 {
2344     TCGOp *op, *op_next;
2345     bool dead = false;
2346 
2347     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2348         bool remove = dead;
2349         TCGLabel *label;
2350         int call_flags;
2351 
2352         switch (op->opc) {
2353         case INDEX_op_set_label:
2354             label = arg_label(op->args[0]);
2355             if (label->refs == 0) {
2356                 /*
2357                  * While there is an occasional backward branch, virtually
2358                  * all branches generated by the translators are forward.
2359                  * Which means that generally we will have already removed
2360                  * all references to the label that will be, and there is
2361                  * little to be gained by iterating.
2362                  */
2363                 remove = true;
2364             } else {
2365                 /* Once we see a label, insns become live again.  */
2366                 dead = false;
2367                 remove = false;
2368 
2369                 /*
2370                  * Optimization can fold conditional branches to unconditional.
2371                  * If we find a label with one reference which is preceded by
2372                  * an unconditional branch to it, remove both.  This needed to
2373                  * wait until the dead code in between them was removed.
2374                  */
2375                 if (label->refs == 1) {
2376                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2377                     if (op_prev->opc == INDEX_op_br &&
2378                         label == arg_label(op_prev->args[0])) {
2379                         tcg_op_remove(s, op_prev);
2380                         remove = true;
2381                     }
2382                 }
2383             }
2384             break;
2385 
2386         case INDEX_op_br:
2387         case INDEX_op_exit_tb:
2388         case INDEX_op_goto_ptr:
2389             /* Unconditional branches; everything following is dead.  */
2390             dead = true;
2391             break;
2392 
2393         case INDEX_op_call:
2394             /* Notice noreturn helper calls, raising exceptions.  */
2395             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2396             if (call_flags & TCG_CALL_NO_RETURN) {
2397                 dead = true;
2398             }
2399             break;
2400 
2401         case INDEX_op_insn_start:
2402             /* Never remove -- we need to keep these for unwind.  */
2403             remove = false;
2404             break;
2405 
2406         default:
2407             break;
2408         }
2409 
2410         if (remove) {
2411             tcg_op_remove(s, op);
2412         }
2413     }
2414 }
2415 
2416 #define TS_DEAD  1
2417 #define TS_MEM   2
2418 
2419 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2420 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2421 
2422 /* For liveness_pass_1, the register preferences for a given temp.  */
2423 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2424 {
2425     return ts->state_ptr;
2426 }
2427 
2428 /* For liveness_pass_1, reset the preferences for a given temp to the
2429  * maximal regset for its type.
2430  */
2431 static inline void la_reset_pref(TCGTemp *ts)
2432 {
2433     *la_temp_pref(ts)
2434         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2435 }
2436 
2437 /* liveness analysis: end of function: all temps are dead, and globals
2438    should be in memory. */
2439 static void la_func_end(TCGContext *s, int ng, int nt)
2440 {
2441     int i;
2442 
2443     for (i = 0; i < ng; ++i) {
2444         s->temps[i].state = TS_DEAD | TS_MEM;
2445         la_reset_pref(&s->temps[i]);
2446     }
2447     for (i = ng; i < nt; ++i) {
2448         s->temps[i].state = TS_DEAD;
2449         la_reset_pref(&s->temps[i]);
2450     }
2451 }
2452 
2453 /* liveness analysis: end of basic block: all temps are dead, globals
2454    and local temps should be in memory. */
2455 static void la_bb_end(TCGContext *s, int ng, int nt)
2456 {
2457     int i;
2458 
2459     for (i = 0; i < ng; ++i) {
2460         s->temps[i].state = TS_DEAD | TS_MEM;
2461         la_reset_pref(&s->temps[i]);
2462     }
2463     for (i = ng; i < nt; ++i) {
2464         s->temps[i].state = (s->temps[i].temp_local
2465                              ? TS_DEAD | TS_MEM
2466                              : TS_DEAD);
2467         la_reset_pref(&s->temps[i]);
2468     }
2469 }
2470 
2471 /* liveness analysis: sync globals back to memory.  */
2472 static void la_global_sync(TCGContext *s, int ng)
2473 {
2474     int i;
2475 
2476     for (i = 0; i < ng; ++i) {
2477         int state = s->temps[i].state;
2478         s->temps[i].state = state | TS_MEM;
2479         if (state == TS_DEAD) {
2480             /* If the global was previously dead, reset prefs.  */
2481             la_reset_pref(&s->temps[i]);
2482         }
2483     }
2484 }
2485 
2486 /* liveness analysis: sync globals back to memory and kill.  */
2487 static void la_global_kill(TCGContext *s, int ng)
2488 {
2489     int i;
2490 
2491     for (i = 0; i < ng; i++) {
2492         s->temps[i].state = TS_DEAD | TS_MEM;
2493         la_reset_pref(&s->temps[i]);
2494     }
2495 }
2496 
2497 /* liveness analysis: note live globals crossing calls.  */
2498 static void la_cross_call(TCGContext *s, int nt)
2499 {
2500     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2501     int i;
2502 
2503     for (i = 0; i < nt; i++) {
2504         TCGTemp *ts = &s->temps[i];
2505         if (!(ts->state & TS_DEAD)) {
2506             TCGRegSet *pset = la_temp_pref(ts);
2507             TCGRegSet set = *pset;
2508 
2509             set &= mask;
2510             /* If the combination is not possible, restart.  */
2511             if (set == 0) {
2512                 set = tcg_target_available_regs[ts->type] & mask;
2513             }
2514             *pset = set;
2515         }
2516     }
2517 }
2518 
2519 /* Liveness analysis : update the opc_arg_life array to tell if a
2520    given input arguments is dead. Instructions updating dead
2521    temporaries are removed. */
2522 static void liveness_pass_1(TCGContext *s)
2523 {
2524     int nb_globals = s->nb_globals;
2525     int nb_temps = s->nb_temps;
2526     TCGOp *op, *op_prev;
2527     TCGRegSet *prefs;
2528     int i;
2529 
2530     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2531     for (i = 0; i < nb_temps; ++i) {
2532         s->temps[i].state_ptr = prefs + i;
2533     }
2534 
2535     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2536     la_func_end(s, nb_globals, nb_temps);
2537 
2538     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2539         int nb_iargs, nb_oargs;
2540         TCGOpcode opc_new, opc_new2;
2541         bool have_opc_new2;
2542         TCGLifeData arg_life = 0;
2543         TCGTemp *ts;
2544         TCGOpcode opc = op->opc;
2545         const TCGOpDef *def = &tcg_op_defs[opc];
2546 
2547         switch (opc) {
2548         case INDEX_op_call:
2549             {
2550                 int call_flags;
2551                 int nb_call_regs;
2552 
2553                 nb_oargs = TCGOP_CALLO(op);
2554                 nb_iargs = TCGOP_CALLI(op);
2555                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2556 
2557                 /* pure functions can be removed if their result is unused */
2558                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2559                     for (i = 0; i < nb_oargs; i++) {
2560                         ts = arg_temp(op->args[i]);
2561                         if (ts->state != TS_DEAD) {
2562                             goto do_not_remove_call;
2563                         }
2564                     }
2565                     goto do_remove;
2566                 }
2567             do_not_remove_call:
2568 
2569                 /* Output args are dead.  */
2570                 for (i = 0; i < nb_oargs; i++) {
2571                     ts = arg_temp(op->args[i]);
2572                     if (ts->state & TS_DEAD) {
2573                         arg_life |= DEAD_ARG << i;
2574                     }
2575                     if (ts->state & TS_MEM) {
2576                         arg_life |= SYNC_ARG << i;
2577                     }
2578                     ts->state = TS_DEAD;
2579                     la_reset_pref(ts);
2580 
2581                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2582                     op->output_pref[i] = 0;
2583                 }
2584 
2585                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2586                                     TCG_CALL_NO_READ_GLOBALS))) {
2587                     la_global_kill(s, nb_globals);
2588                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2589                     la_global_sync(s, nb_globals);
2590                 }
2591 
2592                 /* Record arguments that die in this helper.  */
2593                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2594                     ts = arg_temp(op->args[i]);
2595                     if (ts && ts->state & TS_DEAD) {
2596                         arg_life |= DEAD_ARG << i;
2597                     }
2598                 }
2599 
2600                 /* For all live registers, remove call-clobbered prefs.  */
2601                 la_cross_call(s, nb_temps);
2602 
2603                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2604 
2605                 /* Input arguments are live for preceding opcodes.  */
2606                 for (i = 0; i < nb_iargs; i++) {
2607                     ts = arg_temp(op->args[i + nb_oargs]);
2608                     if (ts && ts->state & TS_DEAD) {
2609                         /* For those arguments that die, and will be allocated
2610                          * in registers, clear the register set for that arg,
2611                          * to be filled in below.  For args that will be on
2612                          * the stack, reset to any available reg.
2613                          */
2614                         *la_temp_pref(ts)
2615                             = (i < nb_call_regs ? 0 :
2616                                tcg_target_available_regs[ts->type]);
2617                         ts->state &= ~TS_DEAD;
2618                     }
2619                 }
2620 
2621                 /* For each input argument, add its input register to prefs.
2622                    If a temp is used once, this produces a single set bit.  */
2623                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2624                     ts = arg_temp(op->args[i + nb_oargs]);
2625                     if (ts) {
2626                         tcg_regset_set_reg(*la_temp_pref(ts),
2627                                            tcg_target_call_iarg_regs[i]);
2628                     }
2629                 }
2630             }
2631             break;
2632         case INDEX_op_insn_start:
2633             break;
2634         case INDEX_op_discard:
2635             /* mark the temporary as dead */
2636             ts = arg_temp(op->args[0]);
2637             ts->state = TS_DEAD;
2638             la_reset_pref(ts);
2639             break;
2640 
2641         case INDEX_op_add2_i32:
2642             opc_new = INDEX_op_add_i32;
2643             goto do_addsub2;
2644         case INDEX_op_sub2_i32:
2645             opc_new = INDEX_op_sub_i32;
2646             goto do_addsub2;
2647         case INDEX_op_add2_i64:
2648             opc_new = INDEX_op_add_i64;
2649             goto do_addsub2;
2650         case INDEX_op_sub2_i64:
2651             opc_new = INDEX_op_sub_i64;
2652         do_addsub2:
2653             nb_iargs = 4;
2654             nb_oargs = 2;
2655             /* Test if the high part of the operation is dead, but not
2656                the low part.  The result can be optimized to a simple
2657                add or sub.  This happens often for x86_64 guest when the
2658                cpu mode is set to 32 bit.  */
2659             if (arg_temp(op->args[1])->state == TS_DEAD) {
2660                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2661                     goto do_remove;
2662                 }
2663                 /* Replace the opcode and adjust the args in place,
2664                    leaving 3 unused args at the end.  */
2665                 op->opc = opc = opc_new;
2666                 op->args[1] = op->args[2];
2667                 op->args[2] = op->args[4];
2668                 /* Fall through and mark the single-word operation live.  */
2669                 nb_iargs = 2;
2670                 nb_oargs = 1;
2671             }
2672             goto do_not_remove;
2673 
2674         case INDEX_op_mulu2_i32:
2675             opc_new = INDEX_op_mul_i32;
2676             opc_new2 = INDEX_op_muluh_i32;
2677             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2678             goto do_mul2;
2679         case INDEX_op_muls2_i32:
2680             opc_new = INDEX_op_mul_i32;
2681             opc_new2 = INDEX_op_mulsh_i32;
2682             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2683             goto do_mul2;
2684         case INDEX_op_mulu2_i64:
2685             opc_new = INDEX_op_mul_i64;
2686             opc_new2 = INDEX_op_muluh_i64;
2687             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2688             goto do_mul2;
2689         case INDEX_op_muls2_i64:
2690             opc_new = INDEX_op_mul_i64;
2691             opc_new2 = INDEX_op_mulsh_i64;
2692             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2693             goto do_mul2;
2694         do_mul2:
2695             nb_iargs = 2;
2696             nb_oargs = 2;
2697             if (arg_temp(op->args[1])->state == TS_DEAD) {
2698                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2699                     /* Both parts of the operation are dead.  */
2700                     goto do_remove;
2701                 }
2702                 /* The high part of the operation is dead; generate the low. */
2703                 op->opc = opc = opc_new;
2704                 op->args[1] = op->args[2];
2705                 op->args[2] = op->args[3];
2706             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2707                 /* The low part of the operation is dead; generate the high. */
2708                 op->opc = opc = opc_new2;
2709                 op->args[0] = op->args[1];
2710                 op->args[1] = op->args[2];
2711                 op->args[2] = op->args[3];
2712             } else {
2713                 goto do_not_remove;
2714             }
2715             /* Mark the single-word operation live.  */
2716             nb_oargs = 1;
2717             goto do_not_remove;
2718 
2719         default:
2720             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2721             nb_iargs = def->nb_iargs;
2722             nb_oargs = def->nb_oargs;
2723 
2724             /* Test if the operation can be removed because all
2725                its outputs are dead. We assume that nb_oargs == 0
2726                implies side effects */
2727             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2728                 for (i = 0; i < nb_oargs; i++) {
2729                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2730                         goto do_not_remove;
2731                     }
2732                 }
2733                 goto do_remove;
2734             }
2735             goto do_not_remove;
2736 
2737         do_remove:
2738             tcg_op_remove(s, op);
2739             break;
2740 
2741         do_not_remove:
2742             for (i = 0; i < nb_oargs; i++) {
2743                 ts = arg_temp(op->args[i]);
2744 
2745                 /* Remember the preference of the uses that followed.  */
2746                 op->output_pref[i] = *la_temp_pref(ts);
2747 
2748                 /* Output args are dead.  */
2749                 if (ts->state & TS_DEAD) {
2750                     arg_life |= DEAD_ARG << i;
2751                 }
2752                 if (ts->state & TS_MEM) {
2753                     arg_life |= SYNC_ARG << i;
2754                 }
2755                 ts->state = TS_DEAD;
2756                 la_reset_pref(ts);
2757             }
2758 
2759             /* If end of basic block, update.  */
2760             if (def->flags & TCG_OPF_BB_EXIT) {
2761                 la_func_end(s, nb_globals, nb_temps);
2762             } else if (def->flags & TCG_OPF_BB_END) {
2763                 la_bb_end(s, nb_globals, nb_temps);
2764             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2765                 la_global_sync(s, nb_globals);
2766                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2767                     la_cross_call(s, nb_temps);
2768                 }
2769             }
2770 
2771             /* Record arguments that die in this opcode.  */
2772             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2773                 ts = arg_temp(op->args[i]);
2774                 if (ts->state & TS_DEAD) {
2775                     arg_life |= DEAD_ARG << i;
2776                 }
2777             }
2778 
2779             /* Input arguments are live for preceding opcodes.  */
2780             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2781                 ts = arg_temp(op->args[i]);
2782                 if (ts->state & TS_DEAD) {
2783                     /* For operands that were dead, initially allow
2784                        all regs for the type.  */
2785                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2786                     ts->state &= ~TS_DEAD;
2787                 }
2788             }
2789 
2790             /* Incorporate constraints for this operand.  */
2791             switch (opc) {
2792             case INDEX_op_mov_i32:
2793             case INDEX_op_mov_i64:
2794                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2795                    have proper constraints.  That said, special case
2796                    moves to propagate preferences backward.  */
2797                 if (IS_DEAD_ARG(1)) {
2798                     *la_temp_pref(arg_temp(op->args[0]))
2799                         = *la_temp_pref(arg_temp(op->args[1]));
2800                 }
2801                 break;
2802 
2803             default:
2804                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2805                     const TCGArgConstraint *ct = &def->args_ct[i];
2806                     TCGRegSet set, *pset;
2807 
2808                     ts = arg_temp(op->args[i]);
2809                     pset = la_temp_pref(ts);
2810                     set = *pset;
2811 
2812                     set &= ct->u.regs;
2813                     if (ct->ct & TCG_CT_IALIAS) {
2814                         set &= op->output_pref[ct->alias_index];
2815                     }
2816                     /* If the combination is not possible, restart.  */
2817                     if (set == 0) {
2818                         set = ct->u.regs;
2819                     }
2820                     *pset = set;
2821                 }
2822                 break;
2823             }
2824             break;
2825         }
2826         op->life = arg_life;
2827     }
2828 }
2829 
2830 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2831 static bool liveness_pass_2(TCGContext *s)
2832 {
2833     int nb_globals = s->nb_globals;
2834     int nb_temps, i;
2835     bool changes = false;
2836     TCGOp *op, *op_next;
2837 
2838     /* Create a temporary for each indirect global.  */
2839     for (i = 0; i < nb_globals; ++i) {
2840         TCGTemp *its = &s->temps[i];
2841         if (its->indirect_reg) {
2842             TCGTemp *dts = tcg_temp_alloc(s);
2843             dts->type = its->type;
2844             dts->base_type = its->base_type;
2845             its->state_ptr = dts;
2846         } else {
2847             its->state_ptr = NULL;
2848         }
2849         /* All globals begin dead.  */
2850         its->state = TS_DEAD;
2851     }
2852     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2853         TCGTemp *its = &s->temps[i];
2854         its->state_ptr = NULL;
2855         its->state = TS_DEAD;
2856     }
2857 
2858     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2859         TCGOpcode opc = op->opc;
2860         const TCGOpDef *def = &tcg_op_defs[opc];
2861         TCGLifeData arg_life = op->life;
2862         int nb_iargs, nb_oargs, call_flags;
2863         TCGTemp *arg_ts, *dir_ts;
2864 
2865         if (opc == INDEX_op_call) {
2866             nb_oargs = TCGOP_CALLO(op);
2867             nb_iargs = TCGOP_CALLI(op);
2868             call_flags = op->args[nb_oargs + nb_iargs + 1];
2869         } else {
2870             nb_iargs = def->nb_iargs;
2871             nb_oargs = def->nb_oargs;
2872 
2873             /* Set flags similar to how calls require.  */
2874             if (def->flags & TCG_OPF_BB_END) {
2875                 /* Like writing globals: save_globals */
2876                 call_flags = 0;
2877             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2878                 /* Like reading globals: sync_globals */
2879                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2880             } else {
2881                 /* No effect on globals.  */
2882                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2883                               TCG_CALL_NO_WRITE_GLOBALS);
2884             }
2885         }
2886 
2887         /* Make sure that input arguments are available.  */
2888         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2889             arg_ts = arg_temp(op->args[i]);
2890             if (arg_ts) {
2891                 dir_ts = arg_ts->state_ptr;
2892                 if (dir_ts && arg_ts->state == TS_DEAD) {
2893                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2894                                       ? INDEX_op_ld_i32
2895                                       : INDEX_op_ld_i64);
2896                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2897 
2898                     lop->args[0] = temp_arg(dir_ts);
2899                     lop->args[1] = temp_arg(arg_ts->mem_base);
2900                     lop->args[2] = arg_ts->mem_offset;
2901 
2902                     /* Loaded, but synced with memory.  */
2903                     arg_ts->state = TS_MEM;
2904                 }
2905             }
2906         }
2907 
2908         /* Perform input replacement, and mark inputs that became dead.
2909            No action is required except keeping temp_state up to date
2910            so that we reload when needed.  */
2911         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2912             arg_ts = arg_temp(op->args[i]);
2913             if (arg_ts) {
2914                 dir_ts = arg_ts->state_ptr;
2915                 if (dir_ts) {
2916                     op->args[i] = temp_arg(dir_ts);
2917                     changes = true;
2918                     if (IS_DEAD_ARG(i)) {
2919                         arg_ts->state = TS_DEAD;
2920                     }
2921                 }
2922             }
2923         }
2924 
2925         /* Liveness analysis should ensure that the following are
2926            all correct, for call sites and basic block end points.  */
2927         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2928             /* Nothing to do */
2929         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2930             for (i = 0; i < nb_globals; ++i) {
2931                 /* Liveness should see that globals are synced back,
2932                    that is, either TS_DEAD or TS_MEM.  */
2933                 arg_ts = &s->temps[i];
2934                 tcg_debug_assert(arg_ts->state_ptr == 0
2935                                  || arg_ts->state != 0);
2936             }
2937         } else {
2938             for (i = 0; i < nb_globals; ++i) {
2939                 /* Liveness should see that globals are saved back,
2940                    that is, TS_DEAD, waiting to be reloaded.  */
2941                 arg_ts = &s->temps[i];
2942                 tcg_debug_assert(arg_ts->state_ptr == 0
2943                                  || arg_ts->state == TS_DEAD);
2944             }
2945         }
2946 
2947         /* Outputs become available.  */
2948         for (i = 0; i < nb_oargs; i++) {
2949             arg_ts = arg_temp(op->args[i]);
2950             dir_ts = arg_ts->state_ptr;
2951             if (!dir_ts) {
2952                 continue;
2953             }
2954             op->args[i] = temp_arg(dir_ts);
2955             changes = true;
2956 
2957             /* The output is now live and modified.  */
2958             arg_ts->state = 0;
2959 
2960             /* Sync outputs upon their last write.  */
2961             if (NEED_SYNC_ARG(i)) {
2962                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2963                                   ? INDEX_op_st_i32
2964                                   : INDEX_op_st_i64);
2965                 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2966 
2967                 sop->args[0] = temp_arg(dir_ts);
2968                 sop->args[1] = temp_arg(arg_ts->mem_base);
2969                 sop->args[2] = arg_ts->mem_offset;
2970 
2971                 arg_ts->state = TS_MEM;
2972             }
2973             /* Drop outputs that are dead.  */
2974             if (IS_DEAD_ARG(i)) {
2975                 arg_ts->state = TS_DEAD;
2976             }
2977         }
2978     }
2979 
2980     return changes;
2981 }
2982 
2983 #ifdef CONFIG_DEBUG_TCG
2984 static void dump_regs(TCGContext *s)
2985 {
2986     TCGTemp *ts;
2987     int i;
2988     char buf[64];
2989 
2990     for(i = 0; i < s->nb_temps; i++) {
2991         ts = &s->temps[i];
2992         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2993         switch(ts->val_type) {
2994         case TEMP_VAL_REG:
2995             printf("%s", tcg_target_reg_names[ts->reg]);
2996             break;
2997         case TEMP_VAL_MEM:
2998             printf("%d(%s)", (int)ts->mem_offset,
2999                    tcg_target_reg_names[ts->mem_base->reg]);
3000             break;
3001         case TEMP_VAL_CONST:
3002             printf("$0x%" TCG_PRIlx, ts->val);
3003             break;
3004         case TEMP_VAL_DEAD:
3005             printf("D");
3006             break;
3007         default:
3008             printf("???");
3009             break;
3010         }
3011         printf("\n");
3012     }
3013 
3014     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3015         if (s->reg_to_temp[i] != NULL) {
3016             printf("%s: %s\n",
3017                    tcg_target_reg_names[i],
3018                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3019         }
3020     }
3021 }
3022 
3023 static void check_regs(TCGContext *s)
3024 {
3025     int reg;
3026     int k;
3027     TCGTemp *ts;
3028     char buf[64];
3029 
3030     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3031         ts = s->reg_to_temp[reg];
3032         if (ts != NULL) {
3033             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3034                 printf("Inconsistency for register %s:\n",
3035                        tcg_target_reg_names[reg]);
3036                 goto fail;
3037             }
3038         }
3039     }
3040     for (k = 0; k < s->nb_temps; k++) {
3041         ts = &s->temps[k];
3042         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3043             && s->reg_to_temp[ts->reg] != ts) {
3044             printf("Inconsistency for temp %s:\n",
3045                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3046         fail:
3047             printf("reg state:\n");
3048             dump_regs(s);
3049             tcg_abort();
3050         }
3051     }
3052 }
3053 #endif
3054 
3055 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3056 {
3057 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3058     /* Sparc64 stack is accessed with offset of 2047 */
3059     s->current_frame_offset = (s->current_frame_offset +
3060                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3061         ~(sizeof(tcg_target_long) - 1);
3062 #endif
3063     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3064         s->frame_end) {
3065         tcg_abort();
3066     }
3067     ts->mem_offset = s->current_frame_offset;
3068     ts->mem_base = s->frame_temp;
3069     ts->mem_allocated = 1;
3070     s->current_frame_offset += sizeof(tcg_target_long);
3071 }
3072 
3073 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3074 
3075 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3076    mark it free; otherwise mark it dead.  */
3077 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3078 {
3079     if (ts->fixed_reg) {
3080         return;
3081     }
3082     if (ts->val_type == TEMP_VAL_REG) {
3083         s->reg_to_temp[ts->reg] = NULL;
3084     }
3085     ts->val_type = (free_or_dead < 0
3086                     || ts->temp_local
3087                     || ts->temp_global
3088                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3089 }
3090 
3091 /* Mark a temporary as dead.  */
3092 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3093 {
3094     temp_free_or_dead(s, ts, 1);
3095 }
3096 
3097 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3098    registers needs to be allocated to store a constant.  If 'free_or_dead'
3099    is non-zero, subsequently release the temporary; if it is positive, the
3100    temp is dead; if it is negative, the temp is free.  */
3101 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3102                       TCGRegSet preferred_regs, int free_or_dead)
3103 {
3104     if (ts->fixed_reg) {
3105         return;
3106     }
3107     if (!ts->mem_coherent) {
3108         if (!ts->mem_allocated) {
3109             temp_allocate_frame(s, ts);
3110         }
3111         switch (ts->val_type) {
3112         case TEMP_VAL_CONST:
3113             /* If we're going to free the temp immediately, then we won't
3114                require it later in a register, so attempt to store the
3115                constant to memory directly.  */
3116             if (free_or_dead
3117                 && tcg_out_sti(s, ts->type, ts->val,
3118                                ts->mem_base->reg, ts->mem_offset)) {
3119                 break;
3120             }
3121             temp_load(s, ts, tcg_target_available_regs[ts->type],
3122                       allocated_regs, preferred_regs);
3123             /* fallthrough */
3124 
3125         case TEMP_VAL_REG:
3126             tcg_out_st(s, ts->type, ts->reg,
3127                        ts->mem_base->reg, ts->mem_offset);
3128             break;
3129 
3130         case TEMP_VAL_MEM:
3131             break;
3132 
3133         case TEMP_VAL_DEAD:
3134         default:
3135             tcg_abort();
3136         }
3137         ts->mem_coherent = 1;
3138     }
3139     if (free_or_dead) {
3140         temp_free_or_dead(s, ts, free_or_dead);
3141     }
3142 }
3143 
3144 /* free register 'reg' by spilling the corresponding temporary if necessary */
3145 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3146 {
3147     TCGTemp *ts = s->reg_to_temp[reg];
3148     if (ts != NULL) {
3149         temp_sync(s, ts, allocated_regs, 0, -1);
3150     }
3151 }
3152 
3153 /**
3154  * tcg_reg_alloc:
3155  * @required_regs: Set of registers in which we must allocate.
3156  * @allocated_regs: Set of registers which must be avoided.
3157  * @preferred_regs: Set of registers we should prefer.
3158  * @rev: True if we search the registers in "indirect" order.
3159  *
3160  * The allocated register must be in @required_regs & ~@allocated_regs,
3161  * but if we can put it in @preferred_regs we may save a move later.
3162  */
3163 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3164                             TCGRegSet allocated_regs,
3165                             TCGRegSet preferred_regs, bool rev)
3166 {
3167     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3168     TCGRegSet reg_ct[2];
3169     const int *order;
3170 
3171     reg_ct[1] = required_regs & ~allocated_regs;
3172     tcg_debug_assert(reg_ct[1] != 0);
3173     reg_ct[0] = reg_ct[1] & preferred_regs;
3174 
3175     /* Skip the preferred_regs option if it cannot be satisfied,
3176        or if the preference made no difference.  */
3177     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3178 
3179     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3180 
3181     /* Try free registers, preferences first.  */
3182     for (j = f; j < 2; j++) {
3183         TCGRegSet set = reg_ct[j];
3184 
3185         if (tcg_regset_single(set)) {
3186             /* One register in the set.  */
3187             TCGReg reg = tcg_regset_first(set);
3188             if (s->reg_to_temp[reg] == NULL) {
3189                 return reg;
3190             }
3191         } else {
3192             for (i = 0; i < n; i++) {
3193                 TCGReg reg = order[i];
3194                 if (s->reg_to_temp[reg] == NULL &&
3195                     tcg_regset_test_reg(set, reg)) {
3196                     return reg;
3197                 }
3198             }
3199         }
3200     }
3201 
3202     /* We must spill something.  */
3203     for (j = f; j < 2; j++) {
3204         TCGRegSet set = reg_ct[j];
3205 
3206         if (tcg_regset_single(set)) {
3207             /* One register in the set.  */
3208             TCGReg reg = tcg_regset_first(set);
3209             tcg_reg_free(s, reg, allocated_regs);
3210             return reg;
3211         } else {
3212             for (i = 0; i < n; i++) {
3213                 TCGReg reg = order[i];
3214                 if (tcg_regset_test_reg(set, reg)) {
3215                     tcg_reg_free(s, reg, allocated_regs);
3216                     return reg;
3217                 }
3218             }
3219         }
3220     }
3221 
3222     tcg_abort();
3223 }
3224 
3225 /* Make sure the temporary is in a register.  If needed, allocate the register
3226    from DESIRED while avoiding ALLOCATED.  */
3227 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3228                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3229 {
3230     TCGReg reg;
3231 
3232     switch (ts->val_type) {
3233     case TEMP_VAL_REG:
3234         return;
3235     case TEMP_VAL_CONST:
3236         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3237                             preferred_regs, ts->indirect_base);
3238         tcg_out_movi(s, ts->type, reg, ts->val);
3239         ts->mem_coherent = 0;
3240         break;
3241     case TEMP_VAL_MEM:
3242         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3243                             preferred_regs, ts->indirect_base);
3244         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3245         ts->mem_coherent = 1;
3246         break;
3247     case TEMP_VAL_DEAD:
3248     default:
3249         tcg_abort();
3250     }
3251     ts->reg = reg;
3252     ts->val_type = TEMP_VAL_REG;
3253     s->reg_to_temp[reg] = ts;
3254 }
3255 
3256 /* Save a temporary to memory. 'allocated_regs' is used in case a
3257    temporary registers needs to be allocated to store a constant.  */
3258 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3259 {
3260     /* The liveness analysis already ensures that globals are back
3261        in memory. Keep an tcg_debug_assert for safety. */
3262     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3263 }
3264 
3265 /* save globals to their canonical location and assume they can be
3266    modified be the following code. 'allocated_regs' is used in case a
3267    temporary registers needs to be allocated to store a constant. */
3268 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3269 {
3270     int i, n;
3271 
3272     for (i = 0, n = s->nb_globals; i < n; i++) {
3273         temp_save(s, &s->temps[i], allocated_regs);
3274     }
3275 }
3276 
3277 /* sync globals to their canonical location and assume they can be
3278    read by the following code. 'allocated_regs' is used in case a
3279    temporary registers needs to be allocated to store a constant. */
3280 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3281 {
3282     int i, n;
3283 
3284     for (i = 0, n = s->nb_globals; i < n; i++) {
3285         TCGTemp *ts = &s->temps[i];
3286         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3287                          || ts->fixed_reg
3288                          || ts->mem_coherent);
3289     }
3290 }
3291 
3292 /* at the end of a basic block, we assume all temporaries are dead and
3293    all globals are stored at their canonical location. */
3294 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3295 {
3296     int i;
3297 
3298     for (i = s->nb_globals; i < s->nb_temps; i++) {
3299         TCGTemp *ts = &s->temps[i];
3300         if (ts->temp_local) {
3301             temp_save(s, ts, allocated_regs);
3302         } else {
3303             /* The liveness analysis already ensures that temps are dead.
3304                Keep an tcg_debug_assert for safety. */
3305             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3306         }
3307     }
3308 
3309     save_globals(s, allocated_regs);
3310 }
3311 
3312 /*
3313  * Specialized code generation for INDEX_op_movi_*.
3314  */
3315 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3316                                   tcg_target_ulong val, TCGLifeData arg_life,
3317                                   TCGRegSet preferred_regs)
3318 {
3319     /* ENV should not be modified.  */
3320     tcg_debug_assert(!ots->fixed_reg);
3321 
3322     /* The movi is not explicitly generated here.  */
3323     if (ots->val_type == TEMP_VAL_REG) {
3324         s->reg_to_temp[ots->reg] = NULL;
3325     }
3326     ots->val_type = TEMP_VAL_CONST;
3327     ots->val = val;
3328     ots->mem_coherent = 0;
3329     if (NEED_SYNC_ARG(0)) {
3330         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3331     } else if (IS_DEAD_ARG(0)) {
3332         temp_dead(s, ots);
3333     }
3334 }
3335 
3336 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3337 {
3338     TCGTemp *ots = arg_temp(op->args[0]);
3339     tcg_target_ulong val = op->args[1];
3340 
3341     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3342 }
3343 
3344 /*
3345  * Specialized code generation for INDEX_op_mov_*.
3346  */
3347 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3348 {
3349     const TCGLifeData arg_life = op->life;
3350     TCGRegSet allocated_regs, preferred_regs;
3351     TCGTemp *ts, *ots;
3352     TCGType otype, itype;
3353 
3354     allocated_regs = s->reserved_regs;
3355     preferred_regs = op->output_pref[0];
3356     ots = arg_temp(op->args[0]);
3357     ts = arg_temp(op->args[1]);
3358 
3359     /* ENV should not be modified.  */
3360     tcg_debug_assert(!ots->fixed_reg);
3361 
3362     /* Note that otype != itype for no-op truncation.  */
3363     otype = ots->type;
3364     itype = ts->type;
3365 
3366     if (ts->val_type == TEMP_VAL_CONST) {
3367         /* propagate constant or generate sti */
3368         tcg_target_ulong val = ts->val;
3369         if (IS_DEAD_ARG(1)) {
3370             temp_dead(s, ts);
3371         }
3372         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3373         return;
3374     }
3375 
3376     /* If the source value is in memory we're going to be forced
3377        to have it in a register in order to perform the copy.  Copy
3378        the SOURCE value into its own register first, that way we
3379        don't have to reload SOURCE the next time it is used. */
3380     if (ts->val_type == TEMP_VAL_MEM) {
3381         temp_load(s, ts, tcg_target_available_regs[itype],
3382                   allocated_regs, preferred_regs);
3383     }
3384 
3385     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3386     if (IS_DEAD_ARG(0)) {
3387         /* mov to a non-saved dead register makes no sense (even with
3388            liveness analysis disabled). */
3389         tcg_debug_assert(NEED_SYNC_ARG(0));
3390         if (!ots->mem_allocated) {
3391             temp_allocate_frame(s, ots);
3392         }
3393         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3394         if (IS_DEAD_ARG(1)) {
3395             temp_dead(s, ts);
3396         }
3397         temp_dead(s, ots);
3398     } else {
3399         if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3400             /* the mov can be suppressed */
3401             if (ots->val_type == TEMP_VAL_REG) {
3402                 s->reg_to_temp[ots->reg] = NULL;
3403             }
3404             ots->reg = ts->reg;
3405             temp_dead(s, ts);
3406         } else {
3407             if (ots->val_type != TEMP_VAL_REG) {
3408                 /* When allocating a new register, make sure to not spill the
3409                    input one. */
3410                 tcg_regset_set_reg(allocated_regs, ts->reg);
3411                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3412                                          allocated_regs, preferred_regs,
3413                                          ots->indirect_base);
3414             }
3415             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3416                 /*
3417                  * Cross register class move not supported.
3418                  * Store the source register into the destination slot
3419                  * and leave the destination temp as TEMP_VAL_MEM.
3420                  */
3421                 assert(!ots->fixed_reg);
3422                 if (!ts->mem_allocated) {
3423                     temp_allocate_frame(s, ots);
3424                 }
3425                 tcg_out_st(s, ts->type, ts->reg,
3426                            ots->mem_base->reg, ots->mem_offset);
3427                 ots->mem_coherent = 1;
3428                 temp_free_or_dead(s, ots, -1);
3429                 return;
3430             }
3431         }
3432         ots->val_type = TEMP_VAL_REG;
3433         ots->mem_coherent = 0;
3434         s->reg_to_temp[ots->reg] = ots;
3435         if (NEED_SYNC_ARG(0)) {
3436             temp_sync(s, ots, allocated_regs, 0, 0);
3437         }
3438     }
3439 }
3440 
3441 /*
3442  * Specialized code generation for INDEX_op_dup_vec.
3443  */
3444 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3445 {
3446     const TCGLifeData arg_life = op->life;
3447     TCGRegSet dup_out_regs, dup_in_regs;
3448     TCGTemp *its, *ots;
3449     TCGType itype, vtype;
3450     intptr_t endian_fixup;
3451     unsigned vece;
3452     bool ok;
3453 
3454     ots = arg_temp(op->args[0]);
3455     its = arg_temp(op->args[1]);
3456 
3457     /* ENV should not be modified.  */
3458     tcg_debug_assert(!ots->fixed_reg);
3459 
3460     itype = its->type;
3461     vece = TCGOP_VECE(op);
3462     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3463 
3464     if (its->val_type == TEMP_VAL_CONST) {
3465         /* Propagate constant via movi -> dupi.  */
3466         tcg_target_ulong val = its->val;
3467         if (IS_DEAD_ARG(1)) {
3468             temp_dead(s, its);
3469         }
3470         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3471         return;
3472     }
3473 
3474     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
3475     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
3476 
3477     /* Allocate the output register now.  */
3478     if (ots->val_type != TEMP_VAL_REG) {
3479         TCGRegSet allocated_regs = s->reserved_regs;
3480 
3481         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3482             /* Make sure to not spill the input register. */
3483             tcg_regset_set_reg(allocated_regs, its->reg);
3484         }
3485         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3486                                  op->output_pref[0], ots->indirect_base);
3487         ots->val_type = TEMP_VAL_REG;
3488         ots->mem_coherent = 0;
3489         s->reg_to_temp[ots->reg] = ots;
3490     }
3491 
3492     switch (its->val_type) {
3493     case TEMP_VAL_REG:
3494         /*
3495          * The dup constriaints must be broad, covering all possible VECE.
3496          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3497          * to fail, indicating that extra moves are required for that case.
3498          */
3499         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3500             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3501                 goto done;
3502             }
3503             /* Try again from memory or a vector input register.  */
3504         }
3505         if (!its->mem_coherent) {
3506             /*
3507              * The input register is not synced, and so an extra store
3508              * would be required to use memory.  Attempt an integer-vector
3509              * register move first.  We do not have a TCGRegSet for this.
3510              */
3511             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3512                 break;
3513             }
3514             /* Sync the temp back to its slot and load from there.  */
3515             temp_sync(s, its, s->reserved_regs, 0, 0);
3516         }
3517         /* fall through */
3518 
3519     case TEMP_VAL_MEM:
3520 #ifdef HOST_WORDS_BIGENDIAN
3521         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3522         endian_fixup -= 1 << vece;
3523 #else
3524         endian_fixup = 0;
3525 #endif
3526         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3527                              its->mem_offset + endian_fixup)) {
3528             goto done;
3529         }
3530         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3531         break;
3532 
3533     default:
3534         g_assert_not_reached();
3535     }
3536 
3537     /* We now have a vector input register, so dup must succeed. */
3538     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3539     tcg_debug_assert(ok);
3540 
3541  done:
3542     if (IS_DEAD_ARG(1)) {
3543         temp_dead(s, its);
3544     }
3545     if (NEED_SYNC_ARG(0)) {
3546         temp_sync(s, ots, s->reserved_regs, 0, 0);
3547     }
3548     if (IS_DEAD_ARG(0)) {
3549         temp_dead(s, ots);
3550     }
3551 }
3552 
3553 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3554 {
3555     const TCGLifeData arg_life = op->life;
3556     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3557     TCGRegSet i_allocated_regs;
3558     TCGRegSet o_allocated_regs;
3559     int i, k, nb_iargs, nb_oargs;
3560     TCGReg reg;
3561     TCGArg arg;
3562     const TCGArgConstraint *arg_ct;
3563     TCGTemp *ts;
3564     TCGArg new_args[TCG_MAX_OP_ARGS];
3565     int const_args[TCG_MAX_OP_ARGS];
3566 
3567     nb_oargs = def->nb_oargs;
3568     nb_iargs = def->nb_iargs;
3569 
3570     /* copy constants */
3571     memcpy(new_args + nb_oargs + nb_iargs,
3572            op->args + nb_oargs + nb_iargs,
3573            sizeof(TCGArg) * def->nb_cargs);
3574 
3575     i_allocated_regs = s->reserved_regs;
3576     o_allocated_regs = s->reserved_regs;
3577 
3578     /* satisfy input constraints */
3579     for (k = 0; k < nb_iargs; k++) {
3580         TCGRegSet i_preferred_regs, o_preferred_regs;
3581 
3582         i = def->sorted_args[nb_oargs + k];
3583         arg = op->args[i];
3584         arg_ct = &def->args_ct[i];
3585         ts = arg_temp(arg);
3586 
3587         if (ts->val_type == TEMP_VAL_CONST
3588             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3589             /* constant is OK for instruction */
3590             const_args[i] = 1;
3591             new_args[i] = ts->val;
3592             continue;
3593         }
3594 
3595         i_preferred_regs = o_preferred_regs = 0;
3596         if (arg_ct->ct & TCG_CT_IALIAS) {
3597             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3598             if (ts->fixed_reg) {
3599                 /* if fixed register, we must allocate a new register
3600                    if the alias is not the same register */
3601                 if (arg != op->args[arg_ct->alias_index]) {
3602                     goto allocate_in_reg;
3603                 }
3604             } else {
3605                 /* if the input is aliased to an output and if it is
3606                    not dead after the instruction, we must allocate
3607                    a new register and move it */
3608                 if (!IS_DEAD_ARG(i)) {
3609                     goto allocate_in_reg;
3610                 }
3611 
3612                 /* check if the current register has already been allocated
3613                    for another input aliased to an output */
3614                 if (ts->val_type == TEMP_VAL_REG) {
3615                     int k2, i2;
3616                     reg = ts->reg;
3617                     for (k2 = 0 ; k2 < k ; k2++) {
3618                         i2 = def->sorted_args[nb_oargs + k2];
3619                         if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3620                             reg == new_args[i2]) {
3621                             goto allocate_in_reg;
3622                         }
3623                     }
3624                 }
3625                 i_preferred_regs = o_preferred_regs;
3626             }
3627         }
3628 
3629         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3630         reg = ts->reg;
3631 
3632         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3633             /* nothing to do : the constraint is satisfied */
3634         } else {
3635         allocate_in_reg:
3636             /* allocate a new register matching the constraint
3637                and move the temporary register into it */
3638             temp_load(s, ts, tcg_target_available_regs[ts->type],
3639                       i_allocated_regs, 0);
3640             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3641                                 o_preferred_regs, ts->indirect_base);
3642             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3643                 /*
3644                  * Cross register class move not supported.  Sync the
3645                  * temp back to its slot and load from there.
3646                  */
3647                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3648                 tcg_out_ld(s, ts->type, reg,
3649                            ts->mem_base->reg, ts->mem_offset);
3650             }
3651         }
3652         new_args[i] = reg;
3653         const_args[i] = 0;
3654         tcg_regset_set_reg(i_allocated_regs, reg);
3655     }
3656 
3657     /* mark dead temporaries and free the associated registers */
3658     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3659         if (IS_DEAD_ARG(i)) {
3660             temp_dead(s, arg_temp(op->args[i]));
3661         }
3662     }
3663 
3664     if (def->flags & TCG_OPF_BB_END) {
3665         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3666     } else {
3667         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3668             /* XXX: permit generic clobber register list ? */
3669             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3670                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3671                     tcg_reg_free(s, i, i_allocated_regs);
3672                 }
3673             }
3674         }
3675         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3676             /* sync globals if the op has side effects and might trigger
3677                an exception. */
3678             sync_globals(s, i_allocated_regs);
3679         }
3680 
3681         /* satisfy the output constraints */
3682         for(k = 0; k < nb_oargs; k++) {
3683             i = def->sorted_args[k];
3684             arg = op->args[i];
3685             arg_ct = &def->args_ct[i];
3686             ts = arg_temp(arg);
3687 
3688             /* ENV should not be modified.  */
3689             tcg_debug_assert(!ts->fixed_reg);
3690 
3691             if ((arg_ct->ct & TCG_CT_ALIAS)
3692                 && !const_args[arg_ct->alias_index]) {
3693                 reg = new_args[arg_ct->alias_index];
3694             } else if (arg_ct->ct & TCG_CT_NEWREG) {
3695                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3696                                     i_allocated_regs | o_allocated_regs,
3697                                     op->output_pref[k], ts->indirect_base);
3698             } else {
3699                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3700                                     op->output_pref[k], ts->indirect_base);
3701             }
3702             tcg_regset_set_reg(o_allocated_regs, reg);
3703             if (ts->val_type == TEMP_VAL_REG) {
3704                 s->reg_to_temp[ts->reg] = NULL;
3705             }
3706             ts->val_type = TEMP_VAL_REG;
3707             ts->reg = reg;
3708             /*
3709              * Temp value is modified, so the value kept in memory is
3710              * potentially not the same.
3711              */
3712             ts->mem_coherent = 0;
3713             s->reg_to_temp[reg] = ts;
3714             new_args[i] = reg;
3715         }
3716     }
3717 
3718     /* emit instruction */
3719     if (def->flags & TCG_OPF_VECTOR) {
3720         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3721                        new_args, const_args);
3722     } else {
3723         tcg_out_op(s, op->opc, new_args, const_args);
3724     }
3725 
3726     /* move the outputs in the correct register if needed */
3727     for(i = 0; i < nb_oargs; i++) {
3728         ts = arg_temp(op->args[i]);
3729 
3730         /* ENV should not be modified.  */
3731         tcg_debug_assert(!ts->fixed_reg);
3732 
3733         if (NEED_SYNC_ARG(i)) {
3734             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3735         } else if (IS_DEAD_ARG(i)) {
3736             temp_dead(s, ts);
3737         }
3738     }
3739 }
3740 
3741 #ifdef TCG_TARGET_STACK_GROWSUP
3742 #define STACK_DIR(x) (-(x))
3743 #else
3744 #define STACK_DIR(x) (x)
3745 #endif
3746 
3747 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3748 {
3749     const int nb_oargs = TCGOP_CALLO(op);
3750     const int nb_iargs = TCGOP_CALLI(op);
3751     const TCGLifeData arg_life = op->life;
3752     int flags, nb_regs, i;
3753     TCGReg reg;
3754     TCGArg arg;
3755     TCGTemp *ts;
3756     intptr_t stack_offset;
3757     size_t call_stack_size;
3758     tcg_insn_unit *func_addr;
3759     int allocate_args;
3760     TCGRegSet allocated_regs;
3761 
3762     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3763     flags = op->args[nb_oargs + nb_iargs + 1];
3764 
3765     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3766     if (nb_regs > nb_iargs) {
3767         nb_regs = nb_iargs;
3768     }
3769 
3770     /* assign stack slots first */
3771     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3772     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3773         ~(TCG_TARGET_STACK_ALIGN - 1);
3774     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3775     if (allocate_args) {
3776         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3777            preallocate call stack */
3778         tcg_abort();
3779     }
3780 
3781     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3782     for (i = nb_regs; i < nb_iargs; i++) {
3783         arg = op->args[nb_oargs + i];
3784 #ifdef TCG_TARGET_STACK_GROWSUP
3785         stack_offset -= sizeof(tcg_target_long);
3786 #endif
3787         if (arg != TCG_CALL_DUMMY_ARG) {
3788             ts = arg_temp(arg);
3789             temp_load(s, ts, tcg_target_available_regs[ts->type],
3790                       s->reserved_regs, 0);
3791             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3792         }
3793 #ifndef TCG_TARGET_STACK_GROWSUP
3794         stack_offset += sizeof(tcg_target_long);
3795 #endif
3796     }
3797 
3798     /* assign input registers */
3799     allocated_regs = s->reserved_regs;
3800     for (i = 0; i < nb_regs; i++) {
3801         arg = op->args[nb_oargs + i];
3802         if (arg != TCG_CALL_DUMMY_ARG) {
3803             ts = arg_temp(arg);
3804             reg = tcg_target_call_iarg_regs[i];
3805 
3806             if (ts->val_type == TEMP_VAL_REG) {
3807                 if (ts->reg != reg) {
3808                     tcg_reg_free(s, reg, allocated_regs);
3809                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3810                         /*
3811                          * Cross register class move not supported.  Sync the
3812                          * temp back to its slot and load from there.
3813                          */
3814                         temp_sync(s, ts, allocated_regs, 0, 0);
3815                         tcg_out_ld(s, ts->type, reg,
3816                                    ts->mem_base->reg, ts->mem_offset);
3817                     }
3818                 }
3819             } else {
3820                 TCGRegSet arg_set = 0;
3821 
3822                 tcg_reg_free(s, reg, allocated_regs);
3823                 tcg_regset_set_reg(arg_set, reg);
3824                 temp_load(s, ts, arg_set, allocated_regs, 0);
3825             }
3826 
3827             tcg_regset_set_reg(allocated_regs, reg);
3828         }
3829     }
3830 
3831     /* mark dead temporaries and free the associated registers */
3832     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3833         if (IS_DEAD_ARG(i)) {
3834             temp_dead(s, arg_temp(op->args[i]));
3835         }
3836     }
3837 
3838     /* clobber call registers */
3839     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3840         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3841             tcg_reg_free(s, i, allocated_regs);
3842         }
3843     }
3844 
3845     /* Save globals if they might be written by the helper, sync them if
3846        they might be read. */
3847     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3848         /* Nothing to do */
3849     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3850         sync_globals(s, allocated_regs);
3851     } else {
3852         save_globals(s, allocated_regs);
3853     }
3854 
3855     tcg_out_call(s, func_addr);
3856 
3857     /* assign output registers and emit moves if needed */
3858     for(i = 0; i < nb_oargs; i++) {
3859         arg = op->args[i];
3860         ts = arg_temp(arg);
3861 
3862         /* ENV should not be modified.  */
3863         tcg_debug_assert(!ts->fixed_reg);
3864 
3865         reg = tcg_target_call_oarg_regs[i];
3866         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3867         if (ts->val_type == TEMP_VAL_REG) {
3868             s->reg_to_temp[ts->reg] = NULL;
3869         }
3870         ts->val_type = TEMP_VAL_REG;
3871         ts->reg = reg;
3872         ts->mem_coherent = 0;
3873         s->reg_to_temp[reg] = ts;
3874         if (NEED_SYNC_ARG(i)) {
3875             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3876         } else if (IS_DEAD_ARG(i)) {
3877             temp_dead(s, ts);
3878         }
3879     }
3880 }
3881 
3882 #ifdef CONFIG_PROFILER
3883 
3884 /* avoid copy/paste errors */
3885 #define PROF_ADD(to, from, field)                       \
3886     do {                                                \
3887         (to)->field += atomic_read(&((from)->field));   \
3888     } while (0)
3889 
3890 #define PROF_MAX(to, from, field)                                       \
3891     do {                                                                \
3892         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3893         if (val__ > (to)->field) {                                      \
3894             (to)->field = val__;                                        \
3895         }                                                               \
3896     } while (0)
3897 
3898 /* Pass in a zero'ed @prof */
3899 static inline
3900 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3901 {
3902     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3903     unsigned int i;
3904 
3905     for (i = 0; i < n_ctxs; i++) {
3906         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3907         const TCGProfile *orig = &s->prof;
3908 
3909         if (counters) {
3910             PROF_ADD(prof, orig, cpu_exec_time);
3911             PROF_ADD(prof, orig, tb_count1);
3912             PROF_ADD(prof, orig, tb_count);
3913             PROF_ADD(prof, orig, op_count);
3914             PROF_MAX(prof, orig, op_count_max);
3915             PROF_ADD(prof, orig, temp_count);
3916             PROF_MAX(prof, orig, temp_count_max);
3917             PROF_ADD(prof, orig, del_op_count);
3918             PROF_ADD(prof, orig, code_in_len);
3919             PROF_ADD(prof, orig, code_out_len);
3920             PROF_ADD(prof, orig, search_out_len);
3921             PROF_ADD(prof, orig, interm_time);
3922             PROF_ADD(prof, orig, code_time);
3923             PROF_ADD(prof, orig, la_time);
3924             PROF_ADD(prof, orig, opt_time);
3925             PROF_ADD(prof, orig, restore_count);
3926             PROF_ADD(prof, orig, restore_time);
3927         }
3928         if (table) {
3929             int i;
3930 
3931             for (i = 0; i < NB_OPS; i++) {
3932                 PROF_ADD(prof, orig, table_op_count[i]);
3933             }
3934         }
3935     }
3936 }
3937 
3938 #undef PROF_ADD
3939 #undef PROF_MAX
3940 
3941 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3942 {
3943     tcg_profile_snapshot(prof, true, false);
3944 }
3945 
3946 static void tcg_profile_snapshot_table(TCGProfile *prof)
3947 {
3948     tcg_profile_snapshot(prof, false, true);
3949 }
3950 
3951 void tcg_dump_op_count(void)
3952 {
3953     TCGProfile prof = {};
3954     int i;
3955 
3956     tcg_profile_snapshot_table(&prof);
3957     for (i = 0; i < NB_OPS; i++) {
3958         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
3959                     prof.table_op_count[i]);
3960     }
3961 }
3962 
3963 int64_t tcg_cpu_exec_time(void)
3964 {
3965     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3966     unsigned int i;
3967     int64_t ret = 0;
3968 
3969     for (i = 0; i < n_ctxs; i++) {
3970         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
3971         const TCGProfile *prof = &s->prof;
3972 
3973         ret += atomic_read(&prof->cpu_exec_time);
3974     }
3975     return ret;
3976 }
3977 #else
3978 void tcg_dump_op_count(void)
3979 {
3980     qemu_printf("[TCG profiler not compiled]\n");
3981 }
3982 
3983 int64_t tcg_cpu_exec_time(void)
3984 {
3985     error_report("%s: TCG profiler not compiled", __func__);
3986     exit(EXIT_FAILURE);
3987 }
3988 #endif
3989 
3990 
3991 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3992 {
3993 #ifdef CONFIG_PROFILER
3994     TCGProfile *prof = &s->prof;
3995 #endif
3996     int i, num_insns;
3997     TCGOp *op;
3998 
3999 #ifdef CONFIG_PROFILER
4000     {
4001         int n = 0;
4002 
4003         QTAILQ_FOREACH(op, &s->ops, link) {
4004             n++;
4005         }
4006         atomic_set(&prof->op_count, prof->op_count + n);
4007         if (n > prof->op_count_max) {
4008             atomic_set(&prof->op_count_max, n);
4009         }
4010 
4011         n = s->nb_temps;
4012         atomic_set(&prof->temp_count, prof->temp_count + n);
4013         if (n > prof->temp_count_max) {
4014             atomic_set(&prof->temp_count_max, n);
4015         }
4016     }
4017 #endif
4018 
4019 #ifdef DEBUG_DISAS
4020     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4021                  && qemu_log_in_addr_range(tb->pc))) {
4022         qemu_log_lock();
4023         qemu_log("OP:\n");
4024         tcg_dump_ops(s, false);
4025         qemu_log("\n");
4026         qemu_log_unlock();
4027     }
4028 #endif
4029 
4030 #ifdef CONFIG_DEBUG_TCG
4031     /* Ensure all labels referenced have been emitted.  */
4032     {
4033         TCGLabel *l;
4034         bool error = false;
4035 
4036         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4037             if (unlikely(!l->present) && l->refs) {
4038                 qemu_log_mask(CPU_LOG_TB_OP,
4039                               "$L%d referenced but not present.\n", l->id);
4040                 error = true;
4041             }
4042         }
4043         assert(!error);
4044     }
4045 #endif
4046 
4047 #ifdef CONFIG_PROFILER
4048     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4049 #endif
4050 
4051 #ifdef USE_TCG_OPTIMIZATIONS
4052     tcg_optimize(s);
4053 #endif
4054 
4055 #ifdef CONFIG_PROFILER
4056     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4057     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
4058 #endif
4059 
4060     reachable_code_pass(s);
4061     liveness_pass_1(s);
4062 
4063     if (s->nb_indirects > 0) {
4064 #ifdef DEBUG_DISAS
4065         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4066                      && qemu_log_in_addr_range(tb->pc))) {
4067             qemu_log_lock();
4068             qemu_log("OP before indirect lowering:\n");
4069             tcg_dump_ops(s, false);
4070             qemu_log("\n");
4071             qemu_log_unlock();
4072         }
4073 #endif
4074         /* Replace indirect temps with direct temps.  */
4075         if (liveness_pass_2(s)) {
4076             /* If changes were made, re-run liveness.  */
4077             liveness_pass_1(s);
4078         }
4079     }
4080 
4081 #ifdef CONFIG_PROFILER
4082     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
4083 #endif
4084 
4085 #ifdef DEBUG_DISAS
4086     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4087                  && qemu_log_in_addr_range(tb->pc))) {
4088         qemu_log_lock();
4089         qemu_log("OP after optimization and liveness analysis:\n");
4090         tcg_dump_ops(s, true);
4091         qemu_log("\n");
4092         qemu_log_unlock();
4093     }
4094 #endif
4095 
4096     tcg_reg_alloc_start(s);
4097 
4098     s->code_buf = tb->tc.ptr;
4099     s->code_ptr = tb->tc.ptr;
4100 
4101 #ifdef TCG_TARGET_NEED_LDST_LABELS
4102     QSIMPLEQ_INIT(&s->ldst_labels);
4103 #endif
4104 #ifdef TCG_TARGET_NEED_POOL_LABELS
4105     s->pool_labels = NULL;
4106 #endif
4107 
4108     num_insns = -1;
4109     QTAILQ_FOREACH(op, &s->ops, link) {
4110         TCGOpcode opc = op->opc;
4111 
4112 #ifdef CONFIG_PROFILER
4113         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4114 #endif
4115 
4116         switch (opc) {
4117         case INDEX_op_mov_i32:
4118         case INDEX_op_mov_i64:
4119         case INDEX_op_mov_vec:
4120             tcg_reg_alloc_mov(s, op);
4121             break;
4122         case INDEX_op_movi_i32:
4123         case INDEX_op_movi_i64:
4124         case INDEX_op_dupi_vec:
4125             tcg_reg_alloc_movi(s, op);
4126             break;
4127         case INDEX_op_dup_vec:
4128             tcg_reg_alloc_dup(s, op);
4129             break;
4130         case INDEX_op_insn_start:
4131             if (num_insns >= 0) {
4132                 size_t off = tcg_current_code_size(s);
4133                 s->gen_insn_end_off[num_insns] = off;
4134                 /* Assert that we do not overflow our stored offset.  */
4135                 assert(s->gen_insn_end_off[num_insns] == off);
4136             }
4137             num_insns++;
4138             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4139                 target_ulong a;
4140 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4141                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4142 #else
4143                 a = op->args[i];
4144 #endif
4145                 s->gen_insn_data[num_insns][i] = a;
4146             }
4147             break;
4148         case INDEX_op_discard:
4149             temp_dead(s, arg_temp(op->args[0]));
4150             break;
4151         case INDEX_op_set_label:
4152             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4153             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4154             break;
4155         case INDEX_op_call:
4156             tcg_reg_alloc_call(s, op);
4157             break;
4158         default:
4159             /* Sanity check that we've not introduced any unhandled opcodes. */
4160             tcg_debug_assert(tcg_op_supported(opc));
4161             /* Note: in order to speed up the code, it would be much
4162                faster to have specialized register allocator functions for
4163                some common argument patterns */
4164             tcg_reg_alloc_op(s, op);
4165             break;
4166         }
4167 #ifdef CONFIG_DEBUG_TCG
4168         check_regs(s);
4169 #endif
4170         /* Test for (pending) buffer overflow.  The assumption is that any
4171            one operation beginning below the high water mark cannot overrun
4172            the buffer completely.  Thus we can test for overflow after
4173            generating code without having to check during generation.  */
4174         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4175             return -1;
4176         }
4177         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4178         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4179             return -2;
4180         }
4181     }
4182     tcg_debug_assert(num_insns >= 0);
4183     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4184 
4185     /* Generate TB finalization at the end of block */
4186 #ifdef TCG_TARGET_NEED_LDST_LABELS
4187     i = tcg_out_ldst_finalize(s);
4188     if (i < 0) {
4189         return i;
4190     }
4191 #endif
4192 #ifdef TCG_TARGET_NEED_POOL_LABELS
4193     i = tcg_out_pool_finalize(s);
4194     if (i < 0) {
4195         return i;
4196     }
4197 #endif
4198     if (!tcg_resolve_relocs(s)) {
4199         return -2;
4200     }
4201 
4202     /* flush instruction cache */
4203     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4204 
4205     return tcg_current_code_size(s);
4206 }
4207 
4208 #ifdef CONFIG_PROFILER
4209 void tcg_dump_info(void)
4210 {
4211     TCGProfile prof = {};
4212     const TCGProfile *s;
4213     int64_t tb_count;
4214     int64_t tb_div_count;
4215     int64_t tot;
4216 
4217     tcg_profile_snapshot_counters(&prof);
4218     s = &prof;
4219     tb_count = s->tb_count;
4220     tb_div_count = tb_count ? tb_count : 1;
4221     tot = s->interm_time + s->code_time;
4222 
4223     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4224                 tot, tot / 2.4e9);
4225     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4226                 " %0.1f%%)\n",
4227                 tb_count, s->tb_count1 - tb_count,
4228                 (double)(s->tb_count1 - s->tb_count)
4229                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4230     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4231                 (double)s->op_count / tb_div_count, s->op_count_max);
4232     qemu_printf("deleted ops/TB      %0.2f\n",
4233                 (double)s->del_op_count / tb_div_count);
4234     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4235                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4236     qemu_printf("avg host code/TB    %0.1f\n",
4237                 (double)s->code_out_len / tb_div_count);
4238     qemu_printf("avg search data/TB  %0.1f\n",
4239                 (double)s->search_out_len / tb_div_count);
4240 
4241     qemu_printf("cycles/op           %0.1f\n",
4242                 s->op_count ? (double)tot / s->op_count : 0);
4243     qemu_printf("cycles/in byte      %0.1f\n",
4244                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4245     qemu_printf("cycles/out byte     %0.1f\n",
4246                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4247     qemu_printf("cycles/search byte     %0.1f\n",
4248                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4249     if (tot == 0) {
4250         tot = 1;
4251     }
4252     qemu_printf("  gen_interm time   %0.1f%%\n",
4253                 (double)s->interm_time / tot * 100.0);
4254     qemu_printf("  gen_code time     %0.1f%%\n",
4255                 (double)s->code_time / tot * 100.0);
4256     qemu_printf("optim./code time    %0.1f%%\n",
4257                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4258                 * 100.0);
4259     qemu_printf("liveness/code time  %0.1f%%\n",
4260                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4261     qemu_printf("cpu_restore count   %" PRId64 "\n",
4262                 s->restore_count);
4263     qemu_printf("  avg cycles        %0.1f\n",
4264                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4265 }
4266 #else
4267 void tcg_dump_info(void)
4268 {
4269     qemu_printf("[TCG profiler not compiled]\n");
4270 }
4271 #endif
4272 
4273 #ifdef ELF_HOST_MACHINE
4274 /* In order to use this feature, the backend needs to do three things:
4275 
4276    (1) Define ELF_HOST_MACHINE to indicate both what value to
4277        put into the ELF image and to indicate support for the feature.
4278 
4279    (2) Define tcg_register_jit.  This should create a buffer containing
4280        the contents of a .debug_frame section that describes the post-
4281        prologue unwind info for the tcg machine.
4282 
4283    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4284 */
4285 
4286 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4287 typedef enum {
4288     JIT_NOACTION = 0,
4289     JIT_REGISTER_FN,
4290     JIT_UNREGISTER_FN
4291 } jit_actions_t;
4292 
4293 struct jit_code_entry {
4294     struct jit_code_entry *next_entry;
4295     struct jit_code_entry *prev_entry;
4296     const void *symfile_addr;
4297     uint64_t symfile_size;
4298 };
4299 
4300 struct jit_descriptor {
4301     uint32_t version;
4302     uint32_t action_flag;
4303     struct jit_code_entry *relevant_entry;
4304     struct jit_code_entry *first_entry;
4305 };
4306 
4307 void __jit_debug_register_code(void) __attribute__((noinline));
4308 void __jit_debug_register_code(void)
4309 {
4310     asm("");
4311 }
4312 
4313 /* Must statically initialize the version, because GDB may check
4314    the version before we can set it.  */
4315 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4316 
4317 /* End GDB interface.  */
4318 
4319 static int find_string(const char *strtab, const char *str)
4320 {
4321     const char *p = strtab + 1;
4322 
4323     while (1) {
4324         if (strcmp(p, str) == 0) {
4325             return p - strtab;
4326         }
4327         p += strlen(p) + 1;
4328     }
4329 }
4330 
4331 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4332                                  const void *debug_frame,
4333                                  size_t debug_frame_size)
4334 {
4335     struct __attribute__((packed)) DebugInfo {
4336         uint32_t  len;
4337         uint16_t  version;
4338         uint32_t  abbrev;
4339         uint8_t   ptr_size;
4340         uint8_t   cu_die;
4341         uint16_t  cu_lang;
4342         uintptr_t cu_low_pc;
4343         uintptr_t cu_high_pc;
4344         uint8_t   fn_die;
4345         char      fn_name[16];
4346         uintptr_t fn_low_pc;
4347         uintptr_t fn_high_pc;
4348         uint8_t   cu_eoc;
4349     };
4350 
4351     struct ElfImage {
4352         ElfW(Ehdr) ehdr;
4353         ElfW(Phdr) phdr;
4354         ElfW(Shdr) shdr[7];
4355         ElfW(Sym)  sym[2];
4356         struct DebugInfo di;
4357         uint8_t    da[24];
4358         char       str[80];
4359     };
4360 
4361     struct ElfImage *img;
4362 
4363     static const struct ElfImage img_template = {
4364         .ehdr = {
4365             .e_ident[EI_MAG0] = ELFMAG0,
4366             .e_ident[EI_MAG1] = ELFMAG1,
4367             .e_ident[EI_MAG2] = ELFMAG2,
4368             .e_ident[EI_MAG3] = ELFMAG3,
4369             .e_ident[EI_CLASS] = ELF_CLASS,
4370             .e_ident[EI_DATA] = ELF_DATA,
4371             .e_ident[EI_VERSION] = EV_CURRENT,
4372             .e_type = ET_EXEC,
4373             .e_machine = ELF_HOST_MACHINE,
4374             .e_version = EV_CURRENT,
4375             .e_phoff = offsetof(struct ElfImage, phdr),
4376             .e_shoff = offsetof(struct ElfImage, shdr),
4377             .e_ehsize = sizeof(ElfW(Shdr)),
4378             .e_phentsize = sizeof(ElfW(Phdr)),
4379             .e_phnum = 1,
4380             .e_shentsize = sizeof(ElfW(Shdr)),
4381             .e_shnum = ARRAY_SIZE(img->shdr),
4382             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4383 #ifdef ELF_HOST_FLAGS
4384             .e_flags = ELF_HOST_FLAGS,
4385 #endif
4386 #ifdef ELF_OSABI
4387             .e_ident[EI_OSABI] = ELF_OSABI,
4388 #endif
4389         },
4390         .phdr = {
4391             .p_type = PT_LOAD,
4392             .p_flags = PF_X,
4393         },
4394         .shdr = {
4395             [0] = { .sh_type = SHT_NULL },
4396             /* Trick: The contents of code_gen_buffer are not present in
4397                this fake ELF file; that got allocated elsewhere.  Therefore
4398                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4399                will not look for contents.  We can record any address.  */
4400             [1] = { /* .text */
4401                 .sh_type = SHT_NOBITS,
4402                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4403             },
4404             [2] = { /* .debug_info */
4405                 .sh_type = SHT_PROGBITS,
4406                 .sh_offset = offsetof(struct ElfImage, di),
4407                 .sh_size = sizeof(struct DebugInfo),
4408             },
4409             [3] = { /* .debug_abbrev */
4410                 .sh_type = SHT_PROGBITS,
4411                 .sh_offset = offsetof(struct ElfImage, da),
4412                 .sh_size = sizeof(img->da),
4413             },
4414             [4] = { /* .debug_frame */
4415                 .sh_type = SHT_PROGBITS,
4416                 .sh_offset = sizeof(struct ElfImage),
4417             },
4418             [5] = { /* .symtab */
4419                 .sh_type = SHT_SYMTAB,
4420                 .sh_offset = offsetof(struct ElfImage, sym),
4421                 .sh_size = sizeof(img->sym),
4422                 .sh_info = 1,
4423                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4424                 .sh_entsize = sizeof(ElfW(Sym)),
4425             },
4426             [6] = { /* .strtab */
4427                 .sh_type = SHT_STRTAB,
4428                 .sh_offset = offsetof(struct ElfImage, str),
4429                 .sh_size = sizeof(img->str),
4430             }
4431         },
4432         .sym = {
4433             [1] = { /* code_gen_buffer */
4434                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4435                 .st_shndx = 1,
4436             }
4437         },
4438         .di = {
4439             .len = sizeof(struct DebugInfo) - 4,
4440             .version = 2,
4441             .ptr_size = sizeof(void *),
4442             .cu_die = 1,
4443             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4444             .fn_die = 2,
4445             .fn_name = "code_gen_buffer"
4446         },
4447         .da = {
4448             1,          /* abbrev number (the cu) */
4449             0x11, 1,    /* DW_TAG_compile_unit, has children */
4450             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4451             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4452             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4453             0, 0,       /* end of abbrev */
4454             2,          /* abbrev number (the fn) */
4455             0x2e, 0,    /* DW_TAG_subprogram, no children */
4456             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4457             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4458             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4459             0, 0,       /* end of abbrev */
4460             0           /* no more abbrev */
4461         },
4462         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4463                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4464     };
4465 
4466     /* We only need a single jit entry; statically allocate it.  */
4467     static struct jit_code_entry one_entry;
4468 
4469     uintptr_t buf = (uintptr_t)buf_ptr;
4470     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4471     DebugFrameHeader *dfh;
4472 
4473     img = g_malloc(img_size);
4474     *img = img_template;
4475 
4476     img->phdr.p_vaddr = buf;
4477     img->phdr.p_paddr = buf;
4478     img->phdr.p_memsz = buf_size;
4479 
4480     img->shdr[1].sh_name = find_string(img->str, ".text");
4481     img->shdr[1].sh_addr = buf;
4482     img->shdr[1].sh_size = buf_size;
4483 
4484     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4485     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4486 
4487     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4488     img->shdr[4].sh_size = debug_frame_size;
4489 
4490     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4491     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4492 
4493     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4494     img->sym[1].st_value = buf;
4495     img->sym[1].st_size = buf_size;
4496 
4497     img->di.cu_low_pc = buf;
4498     img->di.cu_high_pc = buf + buf_size;
4499     img->di.fn_low_pc = buf;
4500     img->di.fn_high_pc = buf + buf_size;
4501 
4502     dfh = (DebugFrameHeader *)(img + 1);
4503     memcpy(dfh, debug_frame, debug_frame_size);
4504     dfh->fde.func_start = buf;
4505     dfh->fde.func_len = buf_size;
4506 
4507 #ifdef DEBUG_JIT
4508     /* Enable this block to be able to debug the ELF image file creation.
4509        One can use readelf, objdump, or other inspection utilities.  */
4510     {
4511         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4512         if (f) {
4513             if (fwrite(img, img_size, 1, f) != img_size) {
4514                 /* Avoid stupid unused return value warning for fwrite.  */
4515             }
4516             fclose(f);
4517         }
4518     }
4519 #endif
4520 
4521     one_entry.symfile_addr = img;
4522     one_entry.symfile_size = img_size;
4523 
4524     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4525     __jit_debug_descriptor.relevant_entry = &one_entry;
4526     __jit_debug_descriptor.first_entry = &one_entry;
4527     __jit_debug_register_code();
4528 }
4529 #else
4530 /* No support for the feature.  Provide the entry point expected by exec.c,
4531    and implement the internal function we declared earlier.  */
4532 
4533 static void tcg_register_jit_int(void *buf, size_t size,
4534                                  const void *debug_frame,
4535                                  size_t debug_frame_size)
4536 {
4537 }
4538 
4539 void tcg_register_jit(void *buf, size_t buf_size)
4540 {
4541 }
4542 #endif /* ELF_HOST_MACHINE */
4543 
4544 #if !TCG_TARGET_MAYBE_vec
4545 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4546 {
4547     g_assert_not_reached();
4548 }
4549 #endif
4550