xref: /openbmc/qemu/tcg/tcg.c (revision fe4d7e33)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 
39 /* Note: the long term plan is to reduce the dependencies on the QEMU
40    CPU definitions. Currently they are used for qemu_ld/st
41    instructions */
42 #define NO_CPU_IO_DEFS
43 #include "cpu.h"
44 
45 #include "exec/exec-all.h"
46 
47 #if !defined(CONFIG_USER_ONLY)
48 #include "hw/boards.h"
49 #endif
50 
51 #include "tcg/tcg-op.h"
52 
53 #if UINTPTR_MAX == UINT32_MAX
54 # define ELF_CLASS  ELFCLASS32
55 #else
56 # define ELF_CLASS  ELFCLASS64
57 #endif
58 #ifdef HOST_WORDS_BIGENDIAN
59 # define ELF_DATA   ELFDATA2MSB
60 #else
61 # define ELF_DATA   ELFDATA2LSB
62 #endif
63 
64 #include "elf.h"
65 #include "exec/log.h"
66 #include "sysemu/sysemu.h"
67 
68 /* Forward declarations for functions declared in tcg-target.c.inc and
69    used here. */
70 static void tcg_target_init(TCGContext *s);
71 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static const char *target_parse_constraint(TCGArgConstraint *ct,
106                                            const char *ct_str, TCGType type);
107 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
108                        intptr_t arg2);
109 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
110 static void tcg_out_movi(TCGContext *s, TCGType type,
111                          TCGReg ret, tcg_target_long arg);
112 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
113                        const int *const_args);
114 #if TCG_TARGET_MAYBE_vec
115 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
116                             TCGReg dst, TCGReg src);
117 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, TCGReg base, intptr_t offset);
119 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
120                              TCGReg dst, tcg_target_long arg);
121 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
122                            unsigned vece, const TCGArg *args,
123                            const int *const_args);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
136                                     TCGReg dst, tcg_target_long arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
141                                   unsigned vece, const TCGArg *args,
142                                   const int *const_args)
143 {
144     g_assert_not_reached();
145 }
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148                        intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150                         TCGReg base, intptr_t ofs);
151 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
152 static int tcg_target_const_match(tcg_target_long val, TCGType type,
153                                   const TCGArgConstraint *arg_ct);
154 #ifdef TCG_TARGET_NEED_LDST_LABELS
155 static int tcg_out_ldst_finalize(TCGContext *s);
156 #endif
157 
158 #define TCG_HIGHWATER 1024
159 
160 static TCGContext **tcg_ctxs;
161 static unsigned int n_tcg_ctxs;
162 TCGv_env cpu_env = 0;
163 
164 struct tcg_region_tree {
165     QemuMutex lock;
166     GTree *tree;
167     /* padding to avoid false sharing is computed at run-time */
168 };
169 
170 /*
171  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
172  * dynamically allocate from as demand dictates. Given appropriate region
173  * sizing, this minimizes flushes even when some TCG threads generate a lot
174  * more code than others.
175  */
176 struct tcg_region_state {
177     QemuMutex lock;
178 
179     /* fields set at init time */
180     void *start;
181     void *start_aligned;
182     void *end;
183     size_t n;
184     size_t size; /* size of one region */
185     size_t stride; /* .size + guard size */
186 
187     /* fields protected by the lock */
188     size_t current; /* current region index */
189     size_t agg_size_full; /* aggregate size of full regions */
190 };
191 
192 static struct tcg_region_state region;
193 /*
194  * This is an array of struct tcg_region_tree's, with padding.
195  * We use void * to simplify the computation of region_trees[i]; each
196  * struct is found every tree_size bytes.
197  */
198 static void *region_trees;
199 static size_t tree_size;
200 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
201 static TCGRegSet tcg_target_call_clobber_regs;
202 
203 #if TCG_TARGET_INSN_UNIT_SIZE == 1
204 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
205 {
206     *s->code_ptr++ = v;
207 }
208 
209 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
210                                                       uint8_t v)
211 {
212     *p = v;
213 }
214 #endif
215 
216 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
217 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
218 {
219     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
220         *s->code_ptr++ = v;
221     } else {
222         tcg_insn_unit *p = s->code_ptr;
223         memcpy(p, &v, sizeof(v));
224         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
225     }
226 }
227 
228 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
229                                                        uint16_t v)
230 {
231     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
232         *p = v;
233     } else {
234         memcpy(p, &v, sizeof(v));
235     }
236 }
237 #endif
238 
239 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
240 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
241 {
242     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
243         *s->code_ptr++ = v;
244     } else {
245         tcg_insn_unit *p = s->code_ptr;
246         memcpy(p, &v, sizeof(v));
247         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
248     }
249 }
250 
251 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
252                                                        uint32_t v)
253 {
254     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
255         *p = v;
256     } else {
257         memcpy(p, &v, sizeof(v));
258     }
259 }
260 #endif
261 
262 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
263 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
264 {
265     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
266         *s->code_ptr++ = v;
267     } else {
268         tcg_insn_unit *p = s->code_ptr;
269         memcpy(p, &v, sizeof(v));
270         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
271     }
272 }
273 
274 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
275                                                        uint64_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
278         *p = v;
279     } else {
280         memcpy(p, &v, sizeof(v));
281     }
282 }
283 #endif
284 
285 /* label relocation processing */
286 
287 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
288                           TCGLabel *l, intptr_t addend)
289 {
290     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
291 
292     r->type = type;
293     r->ptr = code_ptr;
294     r->addend = addend;
295     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
296 }
297 
298 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
299 {
300     tcg_debug_assert(!l->has_value);
301     l->has_value = 1;
302     l->u.value_ptr = ptr;
303 }
304 
305 TCGLabel *gen_new_label(void)
306 {
307     TCGContext *s = tcg_ctx;
308     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
309 
310     memset(l, 0, sizeof(TCGLabel));
311     l->id = s->nb_labels++;
312     QSIMPLEQ_INIT(&l->relocs);
313 
314     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
315 
316     return l;
317 }
318 
319 static bool tcg_resolve_relocs(TCGContext *s)
320 {
321     TCGLabel *l;
322 
323     QSIMPLEQ_FOREACH(l, &s->labels, next) {
324         TCGRelocation *r;
325         uintptr_t value = l->u.value;
326 
327         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
328             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
329                 return false;
330             }
331         }
332     }
333     return true;
334 }
335 
336 static void set_jmp_reset_offset(TCGContext *s, int which)
337 {
338     /*
339      * We will check for overflow at the end of the opcode loop in
340      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
341      */
342     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
343 }
344 
345 #include "tcg-target.c.inc"
346 
347 /* compare a pointer @ptr and a tb_tc @s */
348 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
349 {
350     if (ptr >= s->ptr + s->size) {
351         return 1;
352     } else if (ptr < s->ptr) {
353         return -1;
354     }
355     return 0;
356 }
357 
358 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
359 {
360     const struct tb_tc *a = ap;
361     const struct tb_tc *b = bp;
362 
363     /*
364      * When both sizes are set, we know this isn't a lookup.
365      * This is the most likely case: every TB must be inserted; lookups
366      * are a lot less frequent.
367      */
368     if (likely(a->size && b->size)) {
369         if (a->ptr > b->ptr) {
370             return 1;
371         } else if (a->ptr < b->ptr) {
372             return -1;
373         }
374         /* a->ptr == b->ptr should happen only on deletions */
375         g_assert(a->size == b->size);
376         return 0;
377     }
378     /*
379      * All lookups have either .size field set to 0.
380      * From the glib sources we see that @ap is always the lookup key. However
381      * the docs provide no guarantee, so we just mark this case as likely.
382      */
383     if (likely(a->size == 0)) {
384         return ptr_cmp_tb_tc(a->ptr, b);
385     }
386     return ptr_cmp_tb_tc(b->ptr, a);
387 }
388 
389 static void tcg_region_trees_init(void)
390 {
391     size_t i;
392 
393     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
394     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
395     for (i = 0; i < region.n; i++) {
396         struct tcg_region_tree *rt = region_trees + i * tree_size;
397 
398         qemu_mutex_init(&rt->lock);
399         rt->tree = g_tree_new(tb_tc_cmp);
400     }
401 }
402 
403 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
404 {
405     size_t region_idx;
406 
407     if (p < region.start_aligned) {
408         region_idx = 0;
409     } else {
410         ptrdiff_t offset = p - region.start_aligned;
411 
412         if (offset > region.stride * (region.n - 1)) {
413             region_idx = region.n - 1;
414         } else {
415             region_idx = offset / region.stride;
416         }
417     }
418     return region_trees + region_idx * tree_size;
419 }
420 
421 void tcg_tb_insert(TranslationBlock *tb)
422 {
423     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
424 
425     qemu_mutex_lock(&rt->lock);
426     g_tree_insert(rt->tree, &tb->tc, tb);
427     qemu_mutex_unlock(&rt->lock);
428 }
429 
430 void tcg_tb_remove(TranslationBlock *tb)
431 {
432     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
433 
434     qemu_mutex_lock(&rt->lock);
435     g_tree_remove(rt->tree, &tb->tc);
436     qemu_mutex_unlock(&rt->lock);
437 }
438 
439 /*
440  * Find the TB 'tb' such that
441  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
442  * Return NULL if not found.
443  */
444 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
445 {
446     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
447     TranslationBlock *tb;
448     struct tb_tc s = { .ptr = (void *)tc_ptr };
449 
450     qemu_mutex_lock(&rt->lock);
451     tb = g_tree_lookup(rt->tree, &s);
452     qemu_mutex_unlock(&rt->lock);
453     return tb;
454 }
455 
456 static void tcg_region_tree_lock_all(void)
457 {
458     size_t i;
459 
460     for (i = 0; i < region.n; i++) {
461         struct tcg_region_tree *rt = region_trees + i * tree_size;
462 
463         qemu_mutex_lock(&rt->lock);
464     }
465 }
466 
467 static void tcg_region_tree_unlock_all(void)
468 {
469     size_t i;
470 
471     for (i = 0; i < region.n; i++) {
472         struct tcg_region_tree *rt = region_trees + i * tree_size;
473 
474         qemu_mutex_unlock(&rt->lock);
475     }
476 }
477 
478 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
479 {
480     size_t i;
481 
482     tcg_region_tree_lock_all();
483     for (i = 0; i < region.n; i++) {
484         struct tcg_region_tree *rt = region_trees + i * tree_size;
485 
486         g_tree_foreach(rt->tree, func, user_data);
487     }
488     tcg_region_tree_unlock_all();
489 }
490 
491 size_t tcg_nb_tbs(void)
492 {
493     size_t nb_tbs = 0;
494     size_t i;
495 
496     tcg_region_tree_lock_all();
497     for (i = 0; i < region.n; i++) {
498         struct tcg_region_tree *rt = region_trees + i * tree_size;
499 
500         nb_tbs += g_tree_nnodes(rt->tree);
501     }
502     tcg_region_tree_unlock_all();
503     return nb_tbs;
504 }
505 
506 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
507 {
508     TranslationBlock *tb = v;
509 
510     tb_destroy(tb);
511     return FALSE;
512 }
513 
514 static void tcg_region_tree_reset_all(void)
515 {
516     size_t i;
517 
518     tcg_region_tree_lock_all();
519     for (i = 0; i < region.n; i++) {
520         struct tcg_region_tree *rt = region_trees + i * tree_size;
521 
522         g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
523         /* Increment the refcount first so that destroy acts as a reset */
524         g_tree_ref(rt->tree);
525         g_tree_destroy(rt->tree);
526     }
527     tcg_region_tree_unlock_all();
528 }
529 
530 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
531 {
532     void *start, *end;
533 
534     start = region.start_aligned + curr_region * region.stride;
535     end = start + region.size;
536 
537     if (curr_region == 0) {
538         start = region.start;
539     }
540     if (curr_region == region.n - 1) {
541         end = region.end;
542     }
543 
544     *pstart = start;
545     *pend = end;
546 }
547 
548 static void tcg_region_assign(TCGContext *s, size_t curr_region)
549 {
550     void *start, *end;
551 
552     tcg_region_bounds(curr_region, &start, &end);
553 
554     s->code_gen_buffer = start;
555     s->code_gen_ptr = start;
556     s->code_gen_buffer_size = end - start;
557     s->code_gen_highwater = end - TCG_HIGHWATER;
558 }
559 
560 static bool tcg_region_alloc__locked(TCGContext *s)
561 {
562     if (region.current == region.n) {
563         return true;
564     }
565     tcg_region_assign(s, region.current);
566     region.current++;
567     return false;
568 }
569 
570 /*
571  * Request a new region once the one in use has filled up.
572  * Returns true on error.
573  */
574 static bool tcg_region_alloc(TCGContext *s)
575 {
576     bool err;
577     /* read the region size now; alloc__locked will overwrite it on success */
578     size_t size_full = s->code_gen_buffer_size;
579 
580     qemu_mutex_lock(&region.lock);
581     err = tcg_region_alloc__locked(s);
582     if (!err) {
583         region.agg_size_full += size_full - TCG_HIGHWATER;
584     }
585     qemu_mutex_unlock(&region.lock);
586     return err;
587 }
588 
589 /*
590  * Perform a context's first region allocation.
591  * This function does _not_ increment region.agg_size_full.
592  */
593 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
594 {
595     return tcg_region_alloc__locked(s);
596 }
597 
598 /* Call from a safe-work context */
599 void tcg_region_reset_all(void)
600 {
601     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
602     unsigned int i;
603 
604     qemu_mutex_lock(&region.lock);
605     region.current = 0;
606     region.agg_size_full = 0;
607 
608     for (i = 0; i < n_ctxs; i++) {
609         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
610         bool err = tcg_region_initial_alloc__locked(s);
611 
612         g_assert(!err);
613     }
614     qemu_mutex_unlock(&region.lock);
615 
616     tcg_region_tree_reset_all();
617 }
618 
619 #ifdef CONFIG_USER_ONLY
620 static size_t tcg_n_regions(void)
621 {
622     return 1;
623 }
624 #else
625 /*
626  * It is likely that some vCPUs will translate more code than others, so we
627  * first try to set more regions than max_cpus, with those regions being of
628  * reasonable size. If that's not possible we make do by evenly dividing
629  * the code_gen_buffer among the vCPUs.
630  */
631 static size_t tcg_n_regions(void)
632 {
633     size_t i;
634 
635     /* Use a single region if all we have is one vCPU thread */
636 #if !defined(CONFIG_USER_ONLY)
637     MachineState *ms = MACHINE(qdev_get_machine());
638     unsigned int max_cpus = ms->smp.max_cpus;
639 #endif
640     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
641         return 1;
642     }
643 
644     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
645     for (i = 8; i > 0; i--) {
646         size_t regions_per_thread = i;
647         size_t region_size;
648 
649         region_size = tcg_init_ctx.code_gen_buffer_size;
650         region_size /= max_cpus * regions_per_thread;
651 
652         if (region_size >= 2 * 1024u * 1024) {
653             return max_cpus * regions_per_thread;
654         }
655     }
656     /* If we can't, then just allocate one region per vCPU thread */
657     return max_cpus;
658 }
659 #endif
660 
661 /*
662  * Initializes region partitioning.
663  *
664  * Called at init time from the parent thread (i.e. the one calling
665  * tcg_context_init), after the target's TCG globals have been set.
666  *
667  * Region partitioning works by splitting code_gen_buffer into separate regions,
668  * and then assigning regions to TCG threads so that the threads can translate
669  * code in parallel without synchronization.
670  *
671  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
672  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
673  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
674  * must have been parsed before calling this function, since it calls
675  * qemu_tcg_mttcg_enabled().
676  *
677  * In user-mode we use a single region.  Having multiple regions in user-mode
678  * is not supported, because the number of vCPU threads (recall that each thread
679  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
680  * OS, and usually this number is huge (tens of thousands is not uncommon).
681  * Thus, given this large bound on the number of vCPU threads and the fact
682  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
683  * that the availability of at least one region per vCPU thread.
684  *
685  * However, this user-mode limitation is unlikely to be a significant problem
686  * in practice. Multi-threaded guests share most if not all of their translated
687  * code, which makes parallel code generation less appealing than in softmmu.
688  */
689 void tcg_region_init(void)
690 {
691     void *buf = tcg_init_ctx.code_gen_buffer;
692     void *aligned;
693     size_t size = tcg_init_ctx.code_gen_buffer_size;
694     size_t page_size = qemu_real_host_page_size;
695     size_t region_size;
696     size_t n_regions;
697     size_t i;
698 
699     n_regions = tcg_n_regions();
700 
701     /* The first region will be 'aligned - buf' bytes larger than the others */
702     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
703     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
704     /*
705      * Make region_size a multiple of page_size, using aligned as the start.
706      * As a result of this we might end up with a few extra pages at the end of
707      * the buffer; we will assign those to the last region.
708      */
709     region_size = (size - (aligned - buf)) / n_regions;
710     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
711 
712     /* A region must have at least 2 pages; one code, one guard */
713     g_assert(region_size >= 2 * page_size);
714 
715     /* init the region struct */
716     qemu_mutex_init(&region.lock);
717     region.n = n_regions;
718     region.size = region_size - page_size;
719     region.stride = region_size;
720     region.start = buf;
721     region.start_aligned = aligned;
722     /* page-align the end, since its last page will be a guard page */
723     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
724     /* account for that last guard page */
725     region.end -= page_size;
726 
727     /* set guard pages */
728     for (i = 0; i < region.n; i++) {
729         void *start, *end;
730         int rc;
731 
732         tcg_region_bounds(i, &start, &end);
733         rc = qemu_mprotect_none(end, page_size);
734         g_assert(!rc);
735     }
736 
737     tcg_region_trees_init();
738 
739     /* In user-mode we support only one ctx, so do the initial allocation now */
740 #ifdef CONFIG_USER_ONLY
741     {
742         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
743 
744         g_assert(!err);
745     }
746 #endif
747 }
748 
749 static void alloc_tcg_plugin_context(TCGContext *s)
750 {
751 #ifdef CONFIG_PLUGIN
752     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
753     s->plugin_tb->insns =
754         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
755 #endif
756 }
757 
758 /*
759  * All TCG threads except the parent (i.e. the one that called tcg_context_init
760  * and registered the target's TCG globals) must register with this function
761  * before initiating translation.
762  *
763  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
764  * of tcg_region_init() for the reasoning behind this.
765  *
766  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
767  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
768  * is not used anymore for translation once this function is called.
769  *
770  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
771  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
772  */
773 #ifdef CONFIG_USER_ONLY
774 void tcg_register_thread(void)
775 {
776     tcg_ctx = &tcg_init_ctx;
777 }
778 #else
779 void tcg_register_thread(void)
780 {
781     MachineState *ms = MACHINE(qdev_get_machine());
782     TCGContext *s = g_malloc(sizeof(*s));
783     unsigned int i, n;
784     bool err;
785 
786     *s = tcg_init_ctx;
787 
788     /* Relink mem_base.  */
789     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
790         if (tcg_init_ctx.temps[i].mem_base) {
791             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
792             tcg_debug_assert(b >= 0 && b < n);
793             s->temps[i].mem_base = &s->temps[b];
794         }
795     }
796 
797     /* Claim an entry in tcg_ctxs */
798     n = qatomic_fetch_inc(&n_tcg_ctxs);
799     g_assert(n < ms->smp.max_cpus);
800     qatomic_set(&tcg_ctxs[n], s);
801 
802     if (n > 0) {
803         alloc_tcg_plugin_context(s);
804     }
805 
806     tcg_ctx = s;
807     qemu_mutex_lock(&region.lock);
808     err = tcg_region_initial_alloc__locked(tcg_ctx);
809     g_assert(!err);
810     qemu_mutex_unlock(&region.lock);
811 }
812 #endif /* !CONFIG_USER_ONLY */
813 
814 /*
815  * Returns the size (in bytes) of all translated code (i.e. from all regions)
816  * currently in the cache.
817  * See also: tcg_code_capacity()
818  * Do not confuse with tcg_current_code_size(); that one applies to a single
819  * TCG context.
820  */
821 size_t tcg_code_size(void)
822 {
823     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
824     unsigned int i;
825     size_t total;
826 
827     qemu_mutex_lock(&region.lock);
828     total = region.agg_size_full;
829     for (i = 0; i < n_ctxs; i++) {
830         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
831         size_t size;
832 
833         size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
834         g_assert(size <= s->code_gen_buffer_size);
835         total += size;
836     }
837     qemu_mutex_unlock(&region.lock);
838     return total;
839 }
840 
841 /*
842  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
843  * regions.
844  * See also: tcg_code_size()
845  */
846 size_t tcg_code_capacity(void)
847 {
848     size_t guard_size, capacity;
849 
850     /* no need for synchronization; these variables are set at init time */
851     guard_size = region.stride - region.size;
852     capacity = region.end + guard_size - region.start;
853     capacity -= region.n * (guard_size + TCG_HIGHWATER);
854     return capacity;
855 }
856 
857 size_t tcg_tb_phys_invalidate_count(void)
858 {
859     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
860     unsigned int i;
861     size_t total = 0;
862 
863     for (i = 0; i < n_ctxs; i++) {
864         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
865 
866         total += qatomic_read(&s->tb_phys_invalidate_count);
867     }
868     return total;
869 }
870 
871 /* pool based memory allocation */
872 void *tcg_malloc_internal(TCGContext *s, int size)
873 {
874     TCGPool *p;
875     int pool_size;
876 
877     if (size > TCG_POOL_CHUNK_SIZE) {
878         /* big malloc: insert a new pool (XXX: could optimize) */
879         p = g_malloc(sizeof(TCGPool) + size);
880         p->size = size;
881         p->next = s->pool_first_large;
882         s->pool_first_large = p;
883         return p->data;
884     } else {
885         p = s->pool_current;
886         if (!p) {
887             p = s->pool_first;
888             if (!p)
889                 goto new_pool;
890         } else {
891             if (!p->next) {
892             new_pool:
893                 pool_size = TCG_POOL_CHUNK_SIZE;
894                 p = g_malloc(sizeof(TCGPool) + pool_size);
895                 p->size = pool_size;
896                 p->next = NULL;
897                 if (s->pool_current)
898                     s->pool_current->next = p;
899                 else
900                     s->pool_first = p;
901             } else {
902                 p = p->next;
903             }
904         }
905     }
906     s->pool_current = p;
907     s->pool_cur = p->data + size;
908     s->pool_end = p->data + p->size;
909     return p->data;
910 }
911 
912 void tcg_pool_reset(TCGContext *s)
913 {
914     TCGPool *p, *t;
915     for (p = s->pool_first_large; p; p = t) {
916         t = p->next;
917         g_free(p);
918     }
919     s->pool_first_large = NULL;
920     s->pool_cur = s->pool_end = NULL;
921     s->pool_current = NULL;
922 }
923 
924 typedef struct TCGHelperInfo {
925     void *func;
926     const char *name;
927     unsigned flags;
928     unsigned sizemask;
929 } TCGHelperInfo;
930 
931 #include "exec/helper-proto.h"
932 
933 static const TCGHelperInfo all_helpers[] = {
934 #include "exec/helper-tcg.h"
935 };
936 static GHashTable *helper_table;
937 
938 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
939 static void process_op_defs(TCGContext *s);
940 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
941                                             TCGReg reg, const char *name);
942 
943 void tcg_context_init(TCGContext *s)
944 {
945     int op, total_args, n, i;
946     TCGOpDef *def;
947     TCGArgConstraint *args_ct;
948     TCGTemp *ts;
949 
950     memset(s, 0, sizeof(*s));
951     s->nb_globals = 0;
952 
953     /* Count total number of arguments and allocate the corresponding
954        space */
955     total_args = 0;
956     for(op = 0; op < NB_OPS; op++) {
957         def = &tcg_op_defs[op];
958         n = def->nb_iargs + def->nb_oargs;
959         total_args += n;
960     }
961 
962     args_ct = g_new0(TCGArgConstraint, total_args);
963 
964     for(op = 0; op < NB_OPS; op++) {
965         def = &tcg_op_defs[op];
966         def->args_ct = args_ct;
967         n = def->nb_iargs + def->nb_oargs;
968         args_ct += n;
969     }
970 
971     /* Register helpers.  */
972     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
973     helper_table = g_hash_table_new(NULL, NULL);
974 
975     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
976         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
977                             (gpointer)&all_helpers[i]);
978     }
979 
980     tcg_target_init(s);
981     process_op_defs(s);
982 
983     /* Reverse the order of the saved registers, assuming they're all at
984        the start of tcg_target_reg_alloc_order.  */
985     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
986         int r = tcg_target_reg_alloc_order[n];
987         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
988             break;
989         }
990     }
991     for (i = 0; i < n; ++i) {
992         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
993     }
994     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
995         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
996     }
997 
998     alloc_tcg_plugin_context(s);
999 
1000     tcg_ctx = s;
1001     /*
1002      * In user-mode we simply share the init context among threads, since we
1003      * use a single region. See the documentation tcg_region_init() for the
1004      * reasoning behind this.
1005      * In softmmu we will have at most max_cpus TCG threads.
1006      */
1007 #ifdef CONFIG_USER_ONLY
1008     tcg_ctxs = &tcg_ctx;
1009     n_tcg_ctxs = 1;
1010 #else
1011     MachineState *ms = MACHINE(qdev_get_machine());
1012     unsigned int max_cpus = ms->smp.max_cpus;
1013     tcg_ctxs = g_new(TCGContext *, max_cpus);
1014 #endif
1015 
1016     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1017     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1018     cpu_env = temp_tcgv_ptr(ts);
1019 }
1020 
1021 /*
1022  * Allocate TBs right before their corresponding translated code, making
1023  * sure that TBs and code are on different cache lines.
1024  */
1025 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1026 {
1027     uintptr_t align = qemu_icache_linesize;
1028     TranslationBlock *tb;
1029     void *next;
1030 
1031  retry:
1032     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1033     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1034 
1035     if (unlikely(next > s->code_gen_highwater)) {
1036         if (tcg_region_alloc(s)) {
1037             return NULL;
1038         }
1039         goto retry;
1040     }
1041     qatomic_set(&s->code_gen_ptr, next);
1042     s->data_gen_ptr = NULL;
1043     return tb;
1044 }
1045 
1046 void tcg_prologue_init(TCGContext *s)
1047 {
1048     size_t prologue_size, total_size;
1049     void *buf0, *buf1;
1050 
1051     /* Put the prologue at the beginning of code_gen_buffer.  */
1052     buf0 = s->code_gen_buffer;
1053     total_size = s->code_gen_buffer_size;
1054     s->code_ptr = buf0;
1055     s->code_buf = buf0;
1056     s->data_gen_ptr = NULL;
1057     s->code_gen_prologue = buf0;
1058 
1059     /* Compute a high-water mark, at which we voluntarily flush the buffer
1060        and start over.  The size here is arbitrary, significantly larger
1061        than we expect the code generation for any one opcode to require.  */
1062     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1063 
1064 #ifdef TCG_TARGET_NEED_POOL_LABELS
1065     s->pool_labels = NULL;
1066 #endif
1067 
1068     /* Generate the prologue.  */
1069     tcg_target_qemu_prologue(s);
1070 
1071 #ifdef TCG_TARGET_NEED_POOL_LABELS
1072     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1073     {
1074         int result = tcg_out_pool_finalize(s);
1075         tcg_debug_assert(result == 0);
1076     }
1077 #endif
1078 
1079     buf1 = s->code_ptr;
1080     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1081 
1082     /* Deduct the prologue from the buffer.  */
1083     prologue_size = tcg_current_code_size(s);
1084     s->code_gen_ptr = buf1;
1085     s->code_gen_buffer = buf1;
1086     s->code_buf = buf1;
1087     total_size -= prologue_size;
1088     s->code_gen_buffer_size = total_size;
1089 
1090     tcg_register_jit(s->code_gen_buffer, total_size);
1091 
1092 #ifdef DEBUG_DISAS
1093     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1094         FILE *logfile = qemu_log_lock();
1095         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1096         if (s->data_gen_ptr) {
1097             size_t code_size = s->data_gen_ptr - buf0;
1098             size_t data_size = prologue_size - code_size;
1099             size_t i;
1100 
1101             log_disas(buf0, code_size);
1102 
1103             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1104                 if (sizeof(tcg_target_ulong) == 8) {
1105                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1106                              (uintptr_t)s->data_gen_ptr + i,
1107                              *(uint64_t *)(s->data_gen_ptr + i));
1108                 } else {
1109                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1110                              (uintptr_t)s->data_gen_ptr + i,
1111                              *(uint32_t *)(s->data_gen_ptr + i));
1112                 }
1113             }
1114         } else {
1115             log_disas(buf0, prologue_size);
1116         }
1117         qemu_log("\n");
1118         qemu_log_flush();
1119         qemu_log_unlock(logfile);
1120     }
1121 #endif
1122 
1123     /* Assert that goto_ptr is implemented completely.  */
1124     if (TCG_TARGET_HAS_goto_ptr) {
1125         tcg_debug_assert(s->code_gen_epilogue != NULL);
1126     }
1127 }
1128 
1129 void tcg_func_start(TCGContext *s)
1130 {
1131     tcg_pool_reset(s);
1132     s->nb_temps = s->nb_globals;
1133 
1134     /* No temps have been previously allocated for size or locality.  */
1135     memset(s->free_temps, 0, sizeof(s->free_temps));
1136 
1137     s->nb_ops = 0;
1138     s->nb_labels = 0;
1139     s->current_frame_offset = s->frame_start;
1140 
1141 #ifdef CONFIG_DEBUG_TCG
1142     s->goto_tb_issue_mask = 0;
1143 #endif
1144 
1145     QTAILQ_INIT(&s->ops);
1146     QTAILQ_INIT(&s->free_ops);
1147     QSIMPLEQ_INIT(&s->labels);
1148 }
1149 
1150 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1151 {
1152     int n = s->nb_temps++;
1153     tcg_debug_assert(n < TCG_MAX_TEMPS);
1154     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1155 }
1156 
1157 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1158 {
1159     TCGTemp *ts;
1160 
1161     tcg_debug_assert(s->nb_globals == s->nb_temps);
1162     s->nb_globals++;
1163     ts = tcg_temp_alloc(s);
1164     ts->temp_global = 1;
1165 
1166     return ts;
1167 }
1168 
1169 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1170                                             TCGReg reg, const char *name)
1171 {
1172     TCGTemp *ts;
1173 
1174     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1175         tcg_abort();
1176     }
1177 
1178     ts = tcg_global_alloc(s);
1179     ts->base_type = type;
1180     ts->type = type;
1181     ts->fixed_reg = 1;
1182     ts->reg = reg;
1183     ts->name = name;
1184     tcg_regset_set_reg(s->reserved_regs, reg);
1185 
1186     return ts;
1187 }
1188 
1189 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1190 {
1191     s->frame_start = start;
1192     s->frame_end = start + size;
1193     s->frame_temp
1194         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1195 }
1196 
1197 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1198                                      intptr_t offset, const char *name)
1199 {
1200     TCGContext *s = tcg_ctx;
1201     TCGTemp *base_ts = tcgv_ptr_temp(base);
1202     TCGTemp *ts = tcg_global_alloc(s);
1203     int indirect_reg = 0, bigendian = 0;
1204 #ifdef HOST_WORDS_BIGENDIAN
1205     bigendian = 1;
1206 #endif
1207 
1208     if (!base_ts->fixed_reg) {
1209         /* We do not support double-indirect registers.  */
1210         tcg_debug_assert(!base_ts->indirect_reg);
1211         base_ts->indirect_base = 1;
1212         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1213                             ? 2 : 1);
1214         indirect_reg = 1;
1215     }
1216 
1217     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1218         TCGTemp *ts2 = tcg_global_alloc(s);
1219         char buf[64];
1220 
1221         ts->base_type = TCG_TYPE_I64;
1222         ts->type = TCG_TYPE_I32;
1223         ts->indirect_reg = indirect_reg;
1224         ts->mem_allocated = 1;
1225         ts->mem_base = base_ts;
1226         ts->mem_offset = offset + bigendian * 4;
1227         pstrcpy(buf, sizeof(buf), name);
1228         pstrcat(buf, sizeof(buf), "_0");
1229         ts->name = strdup(buf);
1230 
1231         tcg_debug_assert(ts2 == ts + 1);
1232         ts2->base_type = TCG_TYPE_I64;
1233         ts2->type = TCG_TYPE_I32;
1234         ts2->indirect_reg = indirect_reg;
1235         ts2->mem_allocated = 1;
1236         ts2->mem_base = base_ts;
1237         ts2->mem_offset = offset + (1 - bigendian) * 4;
1238         pstrcpy(buf, sizeof(buf), name);
1239         pstrcat(buf, sizeof(buf), "_1");
1240         ts2->name = strdup(buf);
1241     } else {
1242         ts->base_type = type;
1243         ts->type = type;
1244         ts->indirect_reg = indirect_reg;
1245         ts->mem_allocated = 1;
1246         ts->mem_base = base_ts;
1247         ts->mem_offset = offset;
1248         ts->name = name;
1249     }
1250     return ts;
1251 }
1252 
1253 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1254 {
1255     TCGContext *s = tcg_ctx;
1256     TCGTemp *ts;
1257     int idx, k;
1258 
1259     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1260     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1261     if (idx < TCG_MAX_TEMPS) {
1262         /* There is already an available temp with the right type.  */
1263         clear_bit(idx, s->free_temps[k].l);
1264 
1265         ts = &s->temps[idx];
1266         ts->temp_allocated = 1;
1267         tcg_debug_assert(ts->base_type == type);
1268         tcg_debug_assert(ts->temp_local == temp_local);
1269     } else {
1270         ts = tcg_temp_alloc(s);
1271         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1272             TCGTemp *ts2 = tcg_temp_alloc(s);
1273 
1274             ts->base_type = type;
1275             ts->type = TCG_TYPE_I32;
1276             ts->temp_allocated = 1;
1277             ts->temp_local = temp_local;
1278 
1279             tcg_debug_assert(ts2 == ts + 1);
1280             ts2->base_type = TCG_TYPE_I64;
1281             ts2->type = TCG_TYPE_I32;
1282             ts2->temp_allocated = 1;
1283             ts2->temp_local = temp_local;
1284         } else {
1285             ts->base_type = type;
1286             ts->type = type;
1287             ts->temp_allocated = 1;
1288             ts->temp_local = temp_local;
1289         }
1290     }
1291 
1292 #if defined(CONFIG_DEBUG_TCG)
1293     s->temps_in_use++;
1294 #endif
1295     return ts;
1296 }
1297 
1298 TCGv_vec tcg_temp_new_vec(TCGType type)
1299 {
1300     TCGTemp *t;
1301 
1302 #ifdef CONFIG_DEBUG_TCG
1303     switch (type) {
1304     case TCG_TYPE_V64:
1305         assert(TCG_TARGET_HAS_v64);
1306         break;
1307     case TCG_TYPE_V128:
1308         assert(TCG_TARGET_HAS_v128);
1309         break;
1310     case TCG_TYPE_V256:
1311         assert(TCG_TARGET_HAS_v256);
1312         break;
1313     default:
1314         g_assert_not_reached();
1315     }
1316 #endif
1317 
1318     t = tcg_temp_new_internal(type, 0);
1319     return temp_tcgv_vec(t);
1320 }
1321 
1322 /* Create a new temp of the same type as an existing temp.  */
1323 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1324 {
1325     TCGTemp *t = tcgv_vec_temp(match);
1326 
1327     tcg_debug_assert(t->temp_allocated != 0);
1328 
1329     t = tcg_temp_new_internal(t->base_type, 0);
1330     return temp_tcgv_vec(t);
1331 }
1332 
1333 void tcg_temp_free_internal(TCGTemp *ts)
1334 {
1335     TCGContext *s = tcg_ctx;
1336     int k, idx;
1337 
1338 #if defined(CONFIG_DEBUG_TCG)
1339     s->temps_in_use--;
1340     if (s->temps_in_use < 0) {
1341         fprintf(stderr, "More temporaries freed than allocated!\n");
1342     }
1343 #endif
1344 
1345     tcg_debug_assert(ts->temp_global == 0);
1346     tcg_debug_assert(ts->temp_allocated != 0);
1347     ts->temp_allocated = 0;
1348 
1349     idx = temp_idx(ts);
1350     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1351     set_bit(idx, s->free_temps[k].l);
1352 }
1353 
1354 TCGv_i32 tcg_const_i32(int32_t val)
1355 {
1356     TCGv_i32 t0;
1357     t0 = tcg_temp_new_i32();
1358     tcg_gen_movi_i32(t0, val);
1359     return t0;
1360 }
1361 
1362 TCGv_i64 tcg_const_i64(int64_t val)
1363 {
1364     TCGv_i64 t0;
1365     t0 = tcg_temp_new_i64();
1366     tcg_gen_movi_i64(t0, val);
1367     return t0;
1368 }
1369 
1370 TCGv_i32 tcg_const_local_i32(int32_t val)
1371 {
1372     TCGv_i32 t0;
1373     t0 = tcg_temp_local_new_i32();
1374     tcg_gen_movi_i32(t0, val);
1375     return t0;
1376 }
1377 
1378 TCGv_i64 tcg_const_local_i64(int64_t val)
1379 {
1380     TCGv_i64 t0;
1381     t0 = tcg_temp_local_new_i64();
1382     tcg_gen_movi_i64(t0, val);
1383     return t0;
1384 }
1385 
1386 #if defined(CONFIG_DEBUG_TCG)
1387 void tcg_clear_temp_count(void)
1388 {
1389     TCGContext *s = tcg_ctx;
1390     s->temps_in_use = 0;
1391 }
1392 
1393 int tcg_check_temp_count(void)
1394 {
1395     TCGContext *s = tcg_ctx;
1396     if (s->temps_in_use) {
1397         /* Clear the count so that we don't give another
1398          * warning immediately next time around.
1399          */
1400         s->temps_in_use = 0;
1401         return 1;
1402     }
1403     return 0;
1404 }
1405 #endif
1406 
1407 /* Return true if OP may appear in the opcode stream.
1408    Test the runtime variable that controls each opcode.  */
1409 bool tcg_op_supported(TCGOpcode op)
1410 {
1411     const bool have_vec
1412         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1413 
1414     switch (op) {
1415     case INDEX_op_discard:
1416     case INDEX_op_set_label:
1417     case INDEX_op_call:
1418     case INDEX_op_br:
1419     case INDEX_op_mb:
1420     case INDEX_op_insn_start:
1421     case INDEX_op_exit_tb:
1422     case INDEX_op_goto_tb:
1423     case INDEX_op_qemu_ld_i32:
1424     case INDEX_op_qemu_st_i32:
1425     case INDEX_op_qemu_ld_i64:
1426     case INDEX_op_qemu_st_i64:
1427         return true;
1428 
1429     case INDEX_op_goto_ptr:
1430         return TCG_TARGET_HAS_goto_ptr;
1431 
1432     case INDEX_op_mov_i32:
1433     case INDEX_op_movi_i32:
1434     case INDEX_op_setcond_i32:
1435     case INDEX_op_brcond_i32:
1436     case INDEX_op_ld8u_i32:
1437     case INDEX_op_ld8s_i32:
1438     case INDEX_op_ld16u_i32:
1439     case INDEX_op_ld16s_i32:
1440     case INDEX_op_ld_i32:
1441     case INDEX_op_st8_i32:
1442     case INDEX_op_st16_i32:
1443     case INDEX_op_st_i32:
1444     case INDEX_op_add_i32:
1445     case INDEX_op_sub_i32:
1446     case INDEX_op_mul_i32:
1447     case INDEX_op_and_i32:
1448     case INDEX_op_or_i32:
1449     case INDEX_op_xor_i32:
1450     case INDEX_op_shl_i32:
1451     case INDEX_op_shr_i32:
1452     case INDEX_op_sar_i32:
1453         return true;
1454 
1455     case INDEX_op_movcond_i32:
1456         return TCG_TARGET_HAS_movcond_i32;
1457     case INDEX_op_div_i32:
1458     case INDEX_op_divu_i32:
1459         return TCG_TARGET_HAS_div_i32;
1460     case INDEX_op_rem_i32:
1461     case INDEX_op_remu_i32:
1462         return TCG_TARGET_HAS_rem_i32;
1463     case INDEX_op_div2_i32:
1464     case INDEX_op_divu2_i32:
1465         return TCG_TARGET_HAS_div2_i32;
1466     case INDEX_op_rotl_i32:
1467     case INDEX_op_rotr_i32:
1468         return TCG_TARGET_HAS_rot_i32;
1469     case INDEX_op_deposit_i32:
1470         return TCG_TARGET_HAS_deposit_i32;
1471     case INDEX_op_extract_i32:
1472         return TCG_TARGET_HAS_extract_i32;
1473     case INDEX_op_sextract_i32:
1474         return TCG_TARGET_HAS_sextract_i32;
1475     case INDEX_op_extract2_i32:
1476         return TCG_TARGET_HAS_extract2_i32;
1477     case INDEX_op_add2_i32:
1478         return TCG_TARGET_HAS_add2_i32;
1479     case INDEX_op_sub2_i32:
1480         return TCG_TARGET_HAS_sub2_i32;
1481     case INDEX_op_mulu2_i32:
1482         return TCG_TARGET_HAS_mulu2_i32;
1483     case INDEX_op_muls2_i32:
1484         return TCG_TARGET_HAS_muls2_i32;
1485     case INDEX_op_muluh_i32:
1486         return TCG_TARGET_HAS_muluh_i32;
1487     case INDEX_op_mulsh_i32:
1488         return TCG_TARGET_HAS_mulsh_i32;
1489     case INDEX_op_ext8s_i32:
1490         return TCG_TARGET_HAS_ext8s_i32;
1491     case INDEX_op_ext16s_i32:
1492         return TCG_TARGET_HAS_ext16s_i32;
1493     case INDEX_op_ext8u_i32:
1494         return TCG_TARGET_HAS_ext8u_i32;
1495     case INDEX_op_ext16u_i32:
1496         return TCG_TARGET_HAS_ext16u_i32;
1497     case INDEX_op_bswap16_i32:
1498         return TCG_TARGET_HAS_bswap16_i32;
1499     case INDEX_op_bswap32_i32:
1500         return TCG_TARGET_HAS_bswap32_i32;
1501     case INDEX_op_not_i32:
1502         return TCG_TARGET_HAS_not_i32;
1503     case INDEX_op_neg_i32:
1504         return TCG_TARGET_HAS_neg_i32;
1505     case INDEX_op_andc_i32:
1506         return TCG_TARGET_HAS_andc_i32;
1507     case INDEX_op_orc_i32:
1508         return TCG_TARGET_HAS_orc_i32;
1509     case INDEX_op_eqv_i32:
1510         return TCG_TARGET_HAS_eqv_i32;
1511     case INDEX_op_nand_i32:
1512         return TCG_TARGET_HAS_nand_i32;
1513     case INDEX_op_nor_i32:
1514         return TCG_TARGET_HAS_nor_i32;
1515     case INDEX_op_clz_i32:
1516         return TCG_TARGET_HAS_clz_i32;
1517     case INDEX_op_ctz_i32:
1518         return TCG_TARGET_HAS_ctz_i32;
1519     case INDEX_op_ctpop_i32:
1520         return TCG_TARGET_HAS_ctpop_i32;
1521 
1522     case INDEX_op_brcond2_i32:
1523     case INDEX_op_setcond2_i32:
1524         return TCG_TARGET_REG_BITS == 32;
1525 
1526     case INDEX_op_mov_i64:
1527     case INDEX_op_movi_i64:
1528     case INDEX_op_setcond_i64:
1529     case INDEX_op_brcond_i64:
1530     case INDEX_op_ld8u_i64:
1531     case INDEX_op_ld8s_i64:
1532     case INDEX_op_ld16u_i64:
1533     case INDEX_op_ld16s_i64:
1534     case INDEX_op_ld32u_i64:
1535     case INDEX_op_ld32s_i64:
1536     case INDEX_op_ld_i64:
1537     case INDEX_op_st8_i64:
1538     case INDEX_op_st16_i64:
1539     case INDEX_op_st32_i64:
1540     case INDEX_op_st_i64:
1541     case INDEX_op_add_i64:
1542     case INDEX_op_sub_i64:
1543     case INDEX_op_mul_i64:
1544     case INDEX_op_and_i64:
1545     case INDEX_op_or_i64:
1546     case INDEX_op_xor_i64:
1547     case INDEX_op_shl_i64:
1548     case INDEX_op_shr_i64:
1549     case INDEX_op_sar_i64:
1550     case INDEX_op_ext_i32_i64:
1551     case INDEX_op_extu_i32_i64:
1552         return TCG_TARGET_REG_BITS == 64;
1553 
1554     case INDEX_op_movcond_i64:
1555         return TCG_TARGET_HAS_movcond_i64;
1556     case INDEX_op_div_i64:
1557     case INDEX_op_divu_i64:
1558         return TCG_TARGET_HAS_div_i64;
1559     case INDEX_op_rem_i64:
1560     case INDEX_op_remu_i64:
1561         return TCG_TARGET_HAS_rem_i64;
1562     case INDEX_op_div2_i64:
1563     case INDEX_op_divu2_i64:
1564         return TCG_TARGET_HAS_div2_i64;
1565     case INDEX_op_rotl_i64:
1566     case INDEX_op_rotr_i64:
1567         return TCG_TARGET_HAS_rot_i64;
1568     case INDEX_op_deposit_i64:
1569         return TCG_TARGET_HAS_deposit_i64;
1570     case INDEX_op_extract_i64:
1571         return TCG_TARGET_HAS_extract_i64;
1572     case INDEX_op_sextract_i64:
1573         return TCG_TARGET_HAS_sextract_i64;
1574     case INDEX_op_extract2_i64:
1575         return TCG_TARGET_HAS_extract2_i64;
1576     case INDEX_op_extrl_i64_i32:
1577         return TCG_TARGET_HAS_extrl_i64_i32;
1578     case INDEX_op_extrh_i64_i32:
1579         return TCG_TARGET_HAS_extrh_i64_i32;
1580     case INDEX_op_ext8s_i64:
1581         return TCG_TARGET_HAS_ext8s_i64;
1582     case INDEX_op_ext16s_i64:
1583         return TCG_TARGET_HAS_ext16s_i64;
1584     case INDEX_op_ext32s_i64:
1585         return TCG_TARGET_HAS_ext32s_i64;
1586     case INDEX_op_ext8u_i64:
1587         return TCG_TARGET_HAS_ext8u_i64;
1588     case INDEX_op_ext16u_i64:
1589         return TCG_TARGET_HAS_ext16u_i64;
1590     case INDEX_op_ext32u_i64:
1591         return TCG_TARGET_HAS_ext32u_i64;
1592     case INDEX_op_bswap16_i64:
1593         return TCG_TARGET_HAS_bswap16_i64;
1594     case INDEX_op_bswap32_i64:
1595         return TCG_TARGET_HAS_bswap32_i64;
1596     case INDEX_op_bswap64_i64:
1597         return TCG_TARGET_HAS_bswap64_i64;
1598     case INDEX_op_not_i64:
1599         return TCG_TARGET_HAS_not_i64;
1600     case INDEX_op_neg_i64:
1601         return TCG_TARGET_HAS_neg_i64;
1602     case INDEX_op_andc_i64:
1603         return TCG_TARGET_HAS_andc_i64;
1604     case INDEX_op_orc_i64:
1605         return TCG_TARGET_HAS_orc_i64;
1606     case INDEX_op_eqv_i64:
1607         return TCG_TARGET_HAS_eqv_i64;
1608     case INDEX_op_nand_i64:
1609         return TCG_TARGET_HAS_nand_i64;
1610     case INDEX_op_nor_i64:
1611         return TCG_TARGET_HAS_nor_i64;
1612     case INDEX_op_clz_i64:
1613         return TCG_TARGET_HAS_clz_i64;
1614     case INDEX_op_ctz_i64:
1615         return TCG_TARGET_HAS_ctz_i64;
1616     case INDEX_op_ctpop_i64:
1617         return TCG_TARGET_HAS_ctpop_i64;
1618     case INDEX_op_add2_i64:
1619         return TCG_TARGET_HAS_add2_i64;
1620     case INDEX_op_sub2_i64:
1621         return TCG_TARGET_HAS_sub2_i64;
1622     case INDEX_op_mulu2_i64:
1623         return TCG_TARGET_HAS_mulu2_i64;
1624     case INDEX_op_muls2_i64:
1625         return TCG_TARGET_HAS_muls2_i64;
1626     case INDEX_op_muluh_i64:
1627         return TCG_TARGET_HAS_muluh_i64;
1628     case INDEX_op_mulsh_i64:
1629         return TCG_TARGET_HAS_mulsh_i64;
1630 
1631     case INDEX_op_mov_vec:
1632     case INDEX_op_dup_vec:
1633     case INDEX_op_dupi_vec:
1634     case INDEX_op_dupm_vec:
1635     case INDEX_op_ld_vec:
1636     case INDEX_op_st_vec:
1637     case INDEX_op_add_vec:
1638     case INDEX_op_sub_vec:
1639     case INDEX_op_and_vec:
1640     case INDEX_op_or_vec:
1641     case INDEX_op_xor_vec:
1642     case INDEX_op_cmp_vec:
1643         return have_vec;
1644     case INDEX_op_dup2_vec:
1645         return have_vec && TCG_TARGET_REG_BITS == 32;
1646     case INDEX_op_not_vec:
1647         return have_vec && TCG_TARGET_HAS_not_vec;
1648     case INDEX_op_neg_vec:
1649         return have_vec && TCG_TARGET_HAS_neg_vec;
1650     case INDEX_op_abs_vec:
1651         return have_vec && TCG_TARGET_HAS_abs_vec;
1652     case INDEX_op_andc_vec:
1653         return have_vec && TCG_TARGET_HAS_andc_vec;
1654     case INDEX_op_orc_vec:
1655         return have_vec && TCG_TARGET_HAS_orc_vec;
1656     case INDEX_op_mul_vec:
1657         return have_vec && TCG_TARGET_HAS_mul_vec;
1658     case INDEX_op_shli_vec:
1659     case INDEX_op_shri_vec:
1660     case INDEX_op_sari_vec:
1661         return have_vec && TCG_TARGET_HAS_shi_vec;
1662     case INDEX_op_shls_vec:
1663     case INDEX_op_shrs_vec:
1664     case INDEX_op_sars_vec:
1665         return have_vec && TCG_TARGET_HAS_shs_vec;
1666     case INDEX_op_shlv_vec:
1667     case INDEX_op_shrv_vec:
1668     case INDEX_op_sarv_vec:
1669         return have_vec && TCG_TARGET_HAS_shv_vec;
1670     case INDEX_op_rotli_vec:
1671         return have_vec && TCG_TARGET_HAS_roti_vec;
1672     case INDEX_op_rotls_vec:
1673         return have_vec && TCG_TARGET_HAS_rots_vec;
1674     case INDEX_op_rotlv_vec:
1675     case INDEX_op_rotrv_vec:
1676         return have_vec && TCG_TARGET_HAS_rotv_vec;
1677     case INDEX_op_ssadd_vec:
1678     case INDEX_op_usadd_vec:
1679     case INDEX_op_sssub_vec:
1680     case INDEX_op_ussub_vec:
1681         return have_vec && TCG_TARGET_HAS_sat_vec;
1682     case INDEX_op_smin_vec:
1683     case INDEX_op_umin_vec:
1684     case INDEX_op_smax_vec:
1685     case INDEX_op_umax_vec:
1686         return have_vec && TCG_TARGET_HAS_minmax_vec;
1687     case INDEX_op_bitsel_vec:
1688         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1689     case INDEX_op_cmpsel_vec:
1690         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1691 
1692     default:
1693         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1694         return true;
1695     }
1696 }
1697 
1698 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1699    and endian swap. Maybe it would be better to do the alignment
1700    and endian swap in tcg_reg_alloc_call(). */
1701 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1702 {
1703     int i, real_args, nb_rets, pi;
1704     unsigned sizemask, flags;
1705     TCGHelperInfo *info;
1706     TCGOp *op;
1707 
1708     info = g_hash_table_lookup(helper_table, (gpointer)func);
1709     flags = info->flags;
1710     sizemask = info->sizemask;
1711 
1712 #ifdef CONFIG_PLUGIN
1713     /* detect non-plugin helpers */
1714     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1715         tcg_ctx->plugin_insn->calls_helpers = true;
1716     }
1717 #endif
1718 
1719 #if defined(__sparc__) && !defined(__arch64__) \
1720     && !defined(CONFIG_TCG_INTERPRETER)
1721     /* We have 64-bit values in one register, but need to pass as two
1722        separate parameters.  Split them.  */
1723     int orig_sizemask = sizemask;
1724     int orig_nargs = nargs;
1725     TCGv_i64 retl, reth;
1726     TCGTemp *split_args[MAX_OPC_PARAM];
1727 
1728     retl = NULL;
1729     reth = NULL;
1730     if (sizemask != 0) {
1731         for (i = real_args = 0; i < nargs; ++i) {
1732             int is_64bit = sizemask & (1 << (i+1)*2);
1733             if (is_64bit) {
1734                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1735                 TCGv_i32 h = tcg_temp_new_i32();
1736                 TCGv_i32 l = tcg_temp_new_i32();
1737                 tcg_gen_extr_i64_i32(l, h, orig);
1738                 split_args[real_args++] = tcgv_i32_temp(h);
1739                 split_args[real_args++] = tcgv_i32_temp(l);
1740             } else {
1741                 split_args[real_args++] = args[i];
1742             }
1743         }
1744         nargs = real_args;
1745         args = split_args;
1746         sizemask = 0;
1747     }
1748 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1749     for (i = 0; i < nargs; ++i) {
1750         int is_64bit = sizemask & (1 << (i+1)*2);
1751         int is_signed = sizemask & (2 << (i+1)*2);
1752         if (!is_64bit) {
1753             TCGv_i64 temp = tcg_temp_new_i64();
1754             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1755             if (is_signed) {
1756                 tcg_gen_ext32s_i64(temp, orig);
1757             } else {
1758                 tcg_gen_ext32u_i64(temp, orig);
1759             }
1760             args[i] = tcgv_i64_temp(temp);
1761         }
1762     }
1763 #endif /* TCG_TARGET_EXTEND_ARGS */
1764 
1765     op = tcg_emit_op(INDEX_op_call);
1766 
1767     pi = 0;
1768     if (ret != NULL) {
1769 #if defined(__sparc__) && !defined(__arch64__) \
1770     && !defined(CONFIG_TCG_INTERPRETER)
1771         if (orig_sizemask & 1) {
1772             /* The 32-bit ABI is going to return the 64-bit value in
1773                the %o0/%o1 register pair.  Prepare for this by using
1774                two return temporaries, and reassemble below.  */
1775             retl = tcg_temp_new_i64();
1776             reth = tcg_temp_new_i64();
1777             op->args[pi++] = tcgv_i64_arg(reth);
1778             op->args[pi++] = tcgv_i64_arg(retl);
1779             nb_rets = 2;
1780         } else {
1781             op->args[pi++] = temp_arg(ret);
1782             nb_rets = 1;
1783         }
1784 #else
1785         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1786 #ifdef HOST_WORDS_BIGENDIAN
1787             op->args[pi++] = temp_arg(ret + 1);
1788             op->args[pi++] = temp_arg(ret);
1789 #else
1790             op->args[pi++] = temp_arg(ret);
1791             op->args[pi++] = temp_arg(ret + 1);
1792 #endif
1793             nb_rets = 2;
1794         } else {
1795             op->args[pi++] = temp_arg(ret);
1796             nb_rets = 1;
1797         }
1798 #endif
1799     } else {
1800         nb_rets = 0;
1801     }
1802     TCGOP_CALLO(op) = nb_rets;
1803 
1804     real_args = 0;
1805     for (i = 0; i < nargs; i++) {
1806         int is_64bit = sizemask & (1 << (i+1)*2);
1807         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1808 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1809             /* some targets want aligned 64 bit args */
1810             if (real_args & 1) {
1811                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1812                 real_args++;
1813             }
1814 #endif
1815            /* If stack grows up, then we will be placing successive
1816               arguments at lower addresses, which means we need to
1817               reverse the order compared to how we would normally
1818               treat either big or little-endian.  For those arguments
1819               that will wind up in registers, this still works for
1820               HPPA (the only current STACK_GROWSUP target) since the
1821               argument registers are *also* allocated in decreasing
1822               order.  If another such target is added, this logic may
1823               have to get more complicated to differentiate between
1824               stack arguments and register arguments.  */
1825 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1826             op->args[pi++] = temp_arg(args[i] + 1);
1827             op->args[pi++] = temp_arg(args[i]);
1828 #else
1829             op->args[pi++] = temp_arg(args[i]);
1830             op->args[pi++] = temp_arg(args[i] + 1);
1831 #endif
1832             real_args += 2;
1833             continue;
1834         }
1835 
1836         op->args[pi++] = temp_arg(args[i]);
1837         real_args++;
1838     }
1839     op->args[pi++] = (uintptr_t)func;
1840     op->args[pi++] = flags;
1841     TCGOP_CALLI(op) = real_args;
1842 
1843     /* Make sure the fields didn't overflow.  */
1844     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1845     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1846 
1847 #if defined(__sparc__) && !defined(__arch64__) \
1848     && !defined(CONFIG_TCG_INTERPRETER)
1849     /* Free all of the parts we allocated above.  */
1850     for (i = real_args = 0; i < orig_nargs; ++i) {
1851         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1852         if (is_64bit) {
1853             tcg_temp_free_internal(args[real_args++]);
1854             tcg_temp_free_internal(args[real_args++]);
1855         } else {
1856             real_args++;
1857         }
1858     }
1859     if (orig_sizemask & 1) {
1860         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1861            Note that describing these as TCGv_i64 eliminates an unnecessary
1862            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1863         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1864         tcg_temp_free_i64(retl);
1865         tcg_temp_free_i64(reth);
1866     }
1867 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1868     for (i = 0; i < nargs; ++i) {
1869         int is_64bit = sizemask & (1 << (i+1)*2);
1870         if (!is_64bit) {
1871             tcg_temp_free_internal(args[i]);
1872         }
1873     }
1874 #endif /* TCG_TARGET_EXTEND_ARGS */
1875 }
1876 
1877 static void tcg_reg_alloc_start(TCGContext *s)
1878 {
1879     int i, n;
1880     TCGTemp *ts;
1881 
1882     for (i = 0, n = s->nb_globals; i < n; i++) {
1883         ts = &s->temps[i];
1884         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1885     }
1886     for (n = s->nb_temps; i < n; i++) {
1887         ts = &s->temps[i];
1888         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1889         ts->mem_allocated = 0;
1890         ts->fixed_reg = 0;
1891     }
1892 
1893     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1894 }
1895 
1896 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1897                                  TCGTemp *ts)
1898 {
1899     int idx = temp_idx(ts);
1900 
1901     if (ts->temp_global) {
1902         pstrcpy(buf, buf_size, ts->name);
1903     } else if (ts->temp_local) {
1904         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1905     } else {
1906         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1907     }
1908     return buf;
1909 }
1910 
1911 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1912                              int buf_size, TCGArg arg)
1913 {
1914     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1915 }
1916 
1917 /* Find helper name.  */
1918 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1919 {
1920     const char *ret = NULL;
1921     if (helper_table) {
1922         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1923         if (info) {
1924             ret = info->name;
1925         }
1926     }
1927     return ret;
1928 }
1929 
1930 static const char * const cond_name[] =
1931 {
1932     [TCG_COND_NEVER] = "never",
1933     [TCG_COND_ALWAYS] = "always",
1934     [TCG_COND_EQ] = "eq",
1935     [TCG_COND_NE] = "ne",
1936     [TCG_COND_LT] = "lt",
1937     [TCG_COND_GE] = "ge",
1938     [TCG_COND_LE] = "le",
1939     [TCG_COND_GT] = "gt",
1940     [TCG_COND_LTU] = "ltu",
1941     [TCG_COND_GEU] = "geu",
1942     [TCG_COND_LEU] = "leu",
1943     [TCG_COND_GTU] = "gtu"
1944 };
1945 
1946 static const char * const ldst_name[] =
1947 {
1948     [MO_UB]   = "ub",
1949     [MO_SB]   = "sb",
1950     [MO_LEUW] = "leuw",
1951     [MO_LESW] = "lesw",
1952     [MO_LEUL] = "leul",
1953     [MO_LESL] = "lesl",
1954     [MO_LEQ]  = "leq",
1955     [MO_BEUW] = "beuw",
1956     [MO_BESW] = "besw",
1957     [MO_BEUL] = "beul",
1958     [MO_BESL] = "besl",
1959     [MO_BEQ]  = "beq",
1960 };
1961 
1962 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1963 #ifdef TARGET_ALIGNED_ONLY
1964     [MO_UNALN >> MO_ASHIFT]    = "un+",
1965     [MO_ALIGN >> MO_ASHIFT]    = "",
1966 #else
1967     [MO_UNALN >> MO_ASHIFT]    = "",
1968     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1969 #endif
1970     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1971     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1972     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1973     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1974     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1975     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1976 };
1977 
1978 static inline bool tcg_regset_single(TCGRegSet d)
1979 {
1980     return (d & (d - 1)) == 0;
1981 }
1982 
1983 static inline TCGReg tcg_regset_first(TCGRegSet d)
1984 {
1985     if (TCG_TARGET_NB_REGS <= 32) {
1986         return ctz32(d);
1987     } else {
1988         return ctz64(d);
1989     }
1990 }
1991 
1992 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1993 {
1994     char buf[128];
1995     TCGOp *op;
1996 
1997     QTAILQ_FOREACH(op, &s->ops, link) {
1998         int i, k, nb_oargs, nb_iargs, nb_cargs;
1999         const TCGOpDef *def;
2000         TCGOpcode c;
2001         int col = 0;
2002 
2003         c = op->opc;
2004         def = &tcg_op_defs[c];
2005 
2006         if (c == INDEX_op_insn_start) {
2007             nb_oargs = 0;
2008             col += qemu_log("\n ----");
2009 
2010             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2011                 target_ulong a;
2012 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2013                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2014 #else
2015                 a = op->args[i];
2016 #endif
2017                 col += qemu_log(" " TARGET_FMT_lx, a);
2018             }
2019         } else if (c == INDEX_op_call) {
2020             /* variable number of arguments */
2021             nb_oargs = TCGOP_CALLO(op);
2022             nb_iargs = TCGOP_CALLI(op);
2023             nb_cargs = def->nb_cargs;
2024 
2025             /* function name, flags, out args */
2026             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2027                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2028                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2029             for (i = 0; i < nb_oargs; i++) {
2030                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2031                                                        op->args[i]));
2032             }
2033             for (i = 0; i < nb_iargs; i++) {
2034                 TCGArg arg = op->args[nb_oargs + i];
2035                 const char *t = "<dummy>";
2036                 if (arg != TCG_CALL_DUMMY_ARG) {
2037                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2038                 }
2039                 col += qemu_log(",%s", t);
2040             }
2041         } else {
2042             col += qemu_log(" %s ", def->name);
2043 
2044             nb_oargs = def->nb_oargs;
2045             nb_iargs = def->nb_iargs;
2046             nb_cargs = def->nb_cargs;
2047 
2048             if (def->flags & TCG_OPF_VECTOR) {
2049                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2050                                 8 << TCGOP_VECE(op));
2051             }
2052 
2053             k = 0;
2054             for (i = 0; i < nb_oargs; i++) {
2055                 if (k != 0) {
2056                     col += qemu_log(",");
2057                 }
2058                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2059                                                       op->args[k++]));
2060             }
2061             for (i = 0; i < nb_iargs; i++) {
2062                 if (k != 0) {
2063                     col += qemu_log(",");
2064                 }
2065                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2066                                                       op->args[k++]));
2067             }
2068             switch (c) {
2069             case INDEX_op_brcond_i32:
2070             case INDEX_op_setcond_i32:
2071             case INDEX_op_movcond_i32:
2072             case INDEX_op_brcond2_i32:
2073             case INDEX_op_setcond2_i32:
2074             case INDEX_op_brcond_i64:
2075             case INDEX_op_setcond_i64:
2076             case INDEX_op_movcond_i64:
2077             case INDEX_op_cmp_vec:
2078             case INDEX_op_cmpsel_vec:
2079                 if (op->args[k] < ARRAY_SIZE(cond_name)
2080                     && cond_name[op->args[k]]) {
2081                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2082                 } else {
2083                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2084                 }
2085                 i = 1;
2086                 break;
2087             case INDEX_op_qemu_ld_i32:
2088             case INDEX_op_qemu_st_i32:
2089             case INDEX_op_qemu_ld_i64:
2090             case INDEX_op_qemu_st_i64:
2091                 {
2092                     TCGMemOpIdx oi = op->args[k++];
2093                     MemOp op = get_memop(oi);
2094                     unsigned ix = get_mmuidx(oi);
2095 
2096                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2097                         col += qemu_log(",$0x%x,%u", op, ix);
2098                     } else {
2099                         const char *s_al, *s_op;
2100                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2101                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2102                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2103                     }
2104                     i = 1;
2105                 }
2106                 break;
2107             default:
2108                 i = 0;
2109                 break;
2110             }
2111             switch (c) {
2112             case INDEX_op_set_label:
2113             case INDEX_op_br:
2114             case INDEX_op_brcond_i32:
2115             case INDEX_op_brcond_i64:
2116             case INDEX_op_brcond2_i32:
2117                 col += qemu_log("%s$L%d", k ? "," : "",
2118                                 arg_label(op->args[k])->id);
2119                 i++, k++;
2120                 break;
2121             default:
2122                 break;
2123             }
2124             for (; i < nb_cargs; i++, k++) {
2125                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2126             }
2127         }
2128 
2129         if (have_prefs || op->life) {
2130 
2131             QemuLogFile *logfile;
2132 
2133             rcu_read_lock();
2134             logfile = qatomic_rcu_read(&qemu_logfile);
2135             if (logfile) {
2136                 for (; col < 40; ++col) {
2137                     putc(' ', logfile->fd);
2138                 }
2139             }
2140             rcu_read_unlock();
2141         }
2142 
2143         if (op->life) {
2144             unsigned life = op->life;
2145 
2146             if (life & (SYNC_ARG * 3)) {
2147                 qemu_log("  sync:");
2148                 for (i = 0; i < 2; ++i) {
2149                     if (life & (SYNC_ARG << i)) {
2150                         qemu_log(" %d", i);
2151                     }
2152                 }
2153             }
2154             life /= DEAD_ARG;
2155             if (life) {
2156                 qemu_log("  dead:");
2157                 for (i = 0; life; ++i, life >>= 1) {
2158                     if (life & 1) {
2159                         qemu_log(" %d", i);
2160                     }
2161                 }
2162             }
2163         }
2164 
2165         if (have_prefs) {
2166             for (i = 0; i < nb_oargs; ++i) {
2167                 TCGRegSet set = op->output_pref[i];
2168 
2169                 if (i == 0) {
2170                     qemu_log("  pref=");
2171                 } else {
2172                     qemu_log(",");
2173                 }
2174                 if (set == 0) {
2175                     qemu_log("none");
2176                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2177                     qemu_log("all");
2178 #ifdef CONFIG_DEBUG_TCG
2179                 } else if (tcg_regset_single(set)) {
2180                     TCGReg reg = tcg_regset_first(set);
2181                     qemu_log("%s", tcg_target_reg_names[reg]);
2182 #endif
2183                 } else if (TCG_TARGET_NB_REGS <= 32) {
2184                     qemu_log("%#x", (uint32_t)set);
2185                 } else {
2186                     qemu_log("%#" PRIx64, (uint64_t)set);
2187                 }
2188             }
2189         }
2190 
2191         qemu_log("\n");
2192     }
2193 }
2194 
2195 /* we give more priority to constraints with less registers */
2196 static int get_constraint_priority(const TCGOpDef *def, int k)
2197 {
2198     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2199     int n;
2200 
2201     if (arg_ct->oalias) {
2202         /* an alias is equivalent to a single register */
2203         n = 1;
2204     } else {
2205         n = ctpop64(arg_ct->regs);
2206     }
2207     return TCG_TARGET_NB_REGS - n + 1;
2208 }
2209 
2210 /* sort from highest priority to lowest */
2211 static void sort_constraints(TCGOpDef *def, int start, int n)
2212 {
2213     int i, j;
2214     TCGArgConstraint *a = def->args_ct;
2215 
2216     for (i = 0; i < n; i++) {
2217         a[start + i].sort_index = start + i;
2218     }
2219     if (n <= 1) {
2220         return;
2221     }
2222     for (i = 0; i < n - 1; i++) {
2223         for (j = i + 1; j < n; j++) {
2224             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2225             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2226             if (p1 < p2) {
2227                 int tmp = a[start + i].sort_index;
2228                 a[start + i].sort_index = a[start + j].sort_index;
2229                 a[start + j].sort_index = tmp;
2230             }
2231         }
2232     }
2233 }
2234 
2235 static void process_op_defs(TCGContext *s)
2236 {
2237     TCGOpcode op;
2238 
2239     for (op = 0; op < NB_OPS; op++) {
2240         TCGOpDef *def = &tcg_op_defs[op];
2241         const TCGTargetOpDef *tdefs;
2242         TCGType type;
2243         int i, nb_args;
2244 
2245         if (def->flags & TCG_OPF_NOT_PRESENT) {
2246             continue;
2247         }
2248 
2249         nb_args = def->nb_iargs + def->nb_oargs;
2250         if (nb_args == 0) {
2251             continue;
2252         }
2253 
2254         tdefs = tcg_target_op_def(op);
2255         /* Missing TCGTargetOpDef entry. */
2256         tcg_debug_assert(tdefs != NULL);
2257 
2258         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2259         for (i = 0; i < nb_args; i++) {
2260             const char *ct_str = tdefs->args_ct_str[i];
2261             /* Incomplete TCGTargetOpDef entry. */
2262             tcg_debug_assert(ct_str != NULL);
2263 
2264             while (*ct_str != '\0') {
2265                 switch(*ct_str) {
2266                 case '0' ... '9':
2267                     {
2268                         int oarg = *ct_str - '0';
2269                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2270                         tcg_debug_assert(oarg < def->nb_oargs);
2271                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2272                         def->args_ct[i] = def->args_ct[oarg];
2273                         /* The output sets oalias.  */
2274                         def->args_ct[oarg].oalias = true;
2275                         def->args_ct[oarg].alias_index = i;
2276                         /* The input sets ialias. */
2277                         def->args_ct[i].ialias = true;
2278                         def->args_ct[i].alias_index = oarg;
2279                     }
2280                     ct_str++;
2281                     break;
2282                 case '&':
2283                     def->args_ct[i].newreg = true;
2284                     ct_str++;
2285                     break;
2286                 case 'i':
2287                     def->args_ct[i].ct |= TCG_CT_CONST;
2288                     ct_str++;
2289                     break;
2290                 default:
2291                     ct_str = target_parse_constraint(&def->args_ct[i],
2292                                                      ct_str, type);
2293                     /* Typo in TCGTargetOpDef constraint. */
2294                     tcg_debug_assert(ct_str != NULL);
2295                 }
2296             }
2297         }
2298 
2299         /* TCGTargetOpDef entry with too much information? */
2300         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2301 
2302         /* sort the constraints (XXX: this is just an heuristic) */
2303         sort_constraints(def, 0, def->nb_oargs);
2304         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2305     }
2306 }
2307 
2308 void tcg_op_remove(TCGContext *s, TCGOp *op)
2309 {
2310     TCGLabel *label;
2311 
2312     switch (op->opc) {
2313     case INDEX_op_br:
2314         label = arg_label(op->args[0]);
2315         label->refs--;
2316         break;
2317     case INDEX_op_brcond_i32:
2318     case INDEX_op_brcond_i64:
2319         label = arg_label(op->args[3]);
2320         label->refs--;
2321         break;
2322     case INDEX_op_brcond2_i32:
2323         label = arg_label(op->args[5]);
2324         label->refs--;
2325         break;
2326     default:
2327         break;
2328     }
2329 
2330     QTAILQ_REMOVE(&s->ops, op, link);
2331     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2332     s->nb_ops--;
2333 
2334 #ifdef CONFIG_PROFILER
2335     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2336 #endif
2337 }
2338 
2339 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2340 {
2341     TCGContext *s = tcg_ctx;
2342     TCGOp *op;
2343 
2344     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2345         op = tcg_malloc(sizeof(TCGOp));
2346     } else {
2347         op = QTAILQ_FIRST(&s->free_ops);
2348         QTAILQ_REMOVE(&s->free_ops, op, link);
2349     }
2350     memset(op, 0, offsetof(TCGOp, link));
2351     op->opc = opc;
2352     s->nb_ops++;
2353 
2354     return op;
2355 }
2356 
2357 TCGOp *tcg_emit_op(TCGOpcode opc)
2358 {
2359     TCGOp *op = tcg_op_alloc(opc);
2360     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2361     return op;
2362 }
2363 
2364 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2365 {
2366     TCGOp *new_op = tcg_op_alloc(opc);
2367     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2368     return new_op;
2369 }
2370 
2371 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2372 {
2373     TCGOp *new_op = tcg_op_alloc(opc);
2374     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2375     return new_op;
2376 }
2377 
2378 /* Reachable analysis : remove unreachable code.  */
2379 static void reachable_code_pass(TCGContext *s)
2380 {
2381     TCGOp *op, *op_next;
2382     bool dead = false;
2383 
2384     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2385         bool remove = dead;
2386         TCGLabel *label;
2387         int call_flags;
2388 
2389         switch (op->opc) {
2390         case INDEX_op_set_label:
2391             label = arg_label(op->args[0]);
2392             if (label->refs == 0) {
2393                 /*
2394                  * While there is an occasional backward branch, virtually
2395                  * all branches generated by the translators are forward.
2396                  * Which means that generally we will have already removed
2397                  * all references to the label that will be, and there is
2398                  * little to be gained by iterating.
2399                  */
2400                 remove = true;
2401             } else {
2402                 /* Once we see a label, insns become live again.  */
2403                 dead = false;
2404                 remove = false;
2405 
2406                 /*
2407                  * Optimization can fold conditional branches to unconditional.
2408                  * If we find a label with one reference which is preceded by
2409                  * an unconditional branch to it, remove both.  This needed to
2410                  * wait until the dead code in between them was removed.
2411                  */
2412                 if (label->refs == 1) {
2413                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2414                     if (op_prev->opc == INDEX_op_br &&
2415                         label == arg_label(op_prev->args[0])) {
2416                         tcg_op_remove(s, op_prev);
2417                         remove = true;
2418                     }
2419                 }
2420             }
2421             break;
2422 
2423         case INDEX_op_br:
2424         case INDEX_op_exit_tb:
2425         case INDEX_op_goto_ptr:
2426             /* Unconditional branches; everything following is dead.  */
2427             dead = true;
2428             break;
2429 
2430         case INDEX_op_call:
2431             /* Notice noreturn helper calls, raising exceptions.  */
2432             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2433             if (call_flags & TCG_CALL_NO_RETURN) {
2434                 dead = true;
2435             }
2436             break;
2437 
2438         case INDEX_op_insn_start:
2439             /* Never remove -- we need to keep these for unwind.  */
2440             remove = false;
2441             break;
2442 
2443         default:
2444             break;
2445         }
2446 
2447         if (remove) {
2448             tcg_op_remove(s, op);
2449         }
2450     }
2451 }
2452 
2453 #define TS_DEAD  1
2454 #define TS_MEM   2
2455 
2456 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2457 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2458 
2459 /* For liveness_pass_1, the register preferences for a given temp.  */
2460 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2461 {
2462     return ts->state_ptr;
2463 }
2464 
2465 /* For liveness_pass_1, reset the preferences for a given temp to the
2466  * maximal regset for its type.
2467  */
2468 static inline void la_reset_pref(TCGTemp *ts)
2469 {
2470     *la_temp_pref(ts)
2471         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2472 }
2473 
2474 /* liveness analysis: end of function: all temps are dead, and globals
2475    should be in memory. */
2476 static void la_func_end(TCGContext *s, int ng, int nt)
2477 {
2478     int i;
2479 
2480     for (i = 0; i < ng; ++i) {
2481         s->temps[i].state = TS_DEAD | TS_MEM;
2482         la_reset_pref(&s->temps[i]);
2483     }
2484     for (i = ng; i < nt; ++i) {
2485         s->temps[i].state = TS_DEAD;
2486         la_reset_pref(&s->temps[i]);
2487     }
2488 }
2489 
2490 /* liveness analysis: end of basic block: all temps are dead, globals
2491    and local temps should be in memory. */
2492 static void la_bb_end(TCGContext *s, int ng, int nt)
2493 {
2494     int i;
2495 
2496     for (i = 0; i < ng; ++i) {
2497         s->temps[i].state = TS_DEAD | TS_MEM;
2498         la_reset_pref(&s->temps[i]);
2499     }
2500     for (i = ng; i < nt; ++i) {
2501         s->temps[i].state = (s->temps[i].temp_local
2502                              ? TS_DEAD | TS_MEM
2503                              : TS_DEAD);
2504         la_reset_pref(&s->temps[i]);
2505     }
2506 }
2507 
2508 /* liveness analysis: sync globals back to memory.  */
2509 static void la_global_sync(TCGContext *s, int ng)
2510 {
2511     int i;
2512 
2513     for (i = 0; i < ng; ++i) {
2514         int state = s->temps[i].state;
2515         s->temps[i].state = state | TS_MEM;
2516         if (state == TS_DEAD) {
2517             /* If the global was previously dead, reset prefs.  */
2518             la_reset_pref(&s->temps[i]);
2519         }
2520     }
2521 }
2522 
2523 /*
2524  * liveness analysis: conditional branch: all temps are dead,
2525  * globals and local temps should be synced.
2526  */
2527 static void la_bb_sync(TCGContext *s, int ng, int nt)
2528 {
2529     la_global_sync(s, ng);
2530 
2531     for (int i = ng; i < nt; ++i) {
2532         if (s->temps[i].temp_local) {
2533             int state = s->temps[i].state;
2534             s->temps[i].state = state | TS_MEM;
2535             if (state != TS_DEAD) {
2536                 continue;
2537             }
2538         } else {
2539             s->temps[i].state = TS_DEAD;
2540         }
2541         la_reset_pref(&s->temps[i]);
2542     }
2543 }
2544 
2545 /* liveness analysis: sync globals back to memory and kill.  */
2546 static void la_global_kill(TCGContext *s, int ng)
2547 {
2548     int i;
2549 
2550     for (i = 0; i < ng; i++) {
2551         s->temps[i].state = TS_DEAD | TS_MEM;
2552         la_reset_pref(&s->temps[i]);
2553     }
2554 }
2555 
2556 /* liveness analysis: note live globals crossing calls.  */
2557 static void la_cross_call(TCGContext *s, int nt)
2558 {
2559     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2560     int i;
2561 
2562     for (i = 0; i < nt; i++) {
2563         TCGTemp *ts = &s->temps[i];
2564         if (!(ts->state & TS_DEAD)) {
2565             TCGRegSet *pset = la_temp_pref(ts);
2566             TCGRegSet set = *pset;
2567 
2568             set &= mask;
2569             /* If the combination is not possible, restart.  */
2570             if (set == 0) {
2571                 set = tcg_target_available_regs[ts->type] & mask;
2572             }
2573             *pset = set;
2574         }
2575     }
2576 }
2577 
2578 /* Liveness analysis : update the opc_arg_life array to tell if a
2579    given input arguments is dead. Instructions updating dead
2580    temporaries are removed. */
2581 static void liveness_pass_1(TCGContext *s)
2582 {
2583     int nb_globals = s->nb_globals;
2584     int nb_temps = s->nb_temps;
2585     TCGOp *op, *op_prev;
2586     TCGRegSet *prefs;
2587     int i;
2588 
2589     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2590     for (i = 0; i < nb_temps; ++i) {
2591         s->temps[i].state_ptr = prefs + i;
2592     }
2593 
2594     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2595     la_func_end(s, nb_globals, nb_temps);
2596 
2597     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2598         int nb_iargs, nb_oargs;
2599         TCGOpcode opc_new, opc_new2;
2600         bool have_opc_new2;
2601         TCGLifeData arg_life = 0;
2602         TCGTemp *ts;
2603         TCGOpcode opc = op->opc;
2604         const TCGOpDef *def = &tcg_op_defs[opc];
2605 
2606         switch (opc) {
2607         case INDEX_op_call:
2608             {
2609                 int call_flags;
2610                 int nb_call_regs;
2611 
2612                 nb_oargs = TCGOP_CALLO(op);
2613                 nb_iargs = TCGOP_CALLI(op);
2614                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2615 
2616                 /* pure functions can be removed if their result is unused */
2617                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2618                     for (i = 0; i < nb_oargs; i++) {
2619                         ts = arg_temp(op->args[i]);
2620                         if (ts->state != TS_DEAD) {
2621                             goto do_not_remove_call;
2622                         }
2623                     }
2624                     goto do_remove;
2625                 }
2626             do_not_remove_call:
2627 
2628                 /* Output args are dead.  */
2629                 for (i = 0; i < nb_oargs; i++) {
2630                     ts = arg_temp(op->args[i]);
2631                     if (ts->state & TS_DEAD) {
2632                         arg_life |= DEAD_ARG << i;
2633                     }
2634                     if (ts->state & TS_MEM) {
2635                         arg_life |= SYNC_ARG << i;
2636                     }
2637                     ts->state = TS_DEAD;
2638                     la_reset_pref(ts);
2639 
2640                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2641                     op->output_pref[i] = 0;
2642                 }
2643 
2644                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2645                                     TCG_CALL_NO_READ_GLOBALS))) {
2646                     la_global_kill(s, nb_globals);
2647                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2648                     la_global_sync(s, nb_globals);
2649                 }
2650 
2651                 /* Record arguments that die in this helper.  */
2652                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2653                     ts = arg_temp(op->args[i]);
2654                     if (ts && ts->state & TS_DEAD) {
2655                         arg_life |= DEAD_ARG << i;
2656                     }
2657                 }
2658 
2659                 /* For all live registers, remove call-clobbered prefs.  */
2660                 la_cross_call(s, nb_temps);
2661 
2662                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2663 
2664                 /* Input arguments are live for preceding opcodes.  */
2665                 for (i = 0; i < nb_iargs; i++) {
2666                     ts = arg_temp(op->args[i + nb_oargs]);
2667                     if (ts && ts->state & TS_DEAD) {
2668                         /* For those arguments that die, and will be allocated
2669                          * in registers, clear the register set for that arg,
2670                          * to be filled in below.  For args that will be on
2671                          * the stack, reset to any available reg.
2672                          */
2673                         *la_temp_pref(ts)
2674                             = (i < nb_call_regs ? 0 :
2675                                tcg_target_available_regs[ts->type]);
2676                         ts->state &= ~TS_DEAD;
2677                     }
2678                 }
2679 
2680                 /* For each input argument, add its input register to prefs.
2681                    If a temp is used once, this produces a single set bit.  */
2682                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2683                     ts = arg_temp(op->args[i + nb_oargs]);
2684                     if (ts) {
2685                         tcg_regset_set_reg(*la_temp_pref(ts),
2686                                            tcg_target_call_iarg_regs[i]);
2687                     }
2688                 }
2689             }
2690             break;
2691         case INDEX_op_insn_start:
2692             break;
2693         case INDEX_op_discard:
2694             /* mark the temporary as dead */
2695             ts = arg_temp(op->args[0]);
2696             ts->state = TS_DEAD;
2697             la_reset_pref(ts);
2698             break;
2699 
2700         case INDEX_op_add2_i32:
2701             opc_new = INDEX_op_add_i32;
2702             goto do_addsub2;
2703         case INDEX_op_sub2_i32:
2704             opc_new = INDEX_op_sub_i32;
2705             goto do_addsub2;
2706         case INDEX_op_add2_i64:
2707             opc_new = INDEX_op_add_i64;
2708             goto do_addsub2;
2709         case INDEX_op_sub2_i64:
2710             opc_new = INDEX_op_sub_i64;
2711         do_addsub2:
2712             nb_iargs = 4;
2713             nb_oargs = 2;
2714             /* Test if the high part of the operation is dead, but not
2715                the low part.  The result can be optimized to a simple
2716                add or sub.  This happens often for x86_64 guest when the
2717                cpu mode is set to 32 bit.  */
2718             if (arg_temp(op->args[1])->state == TS_DEAD) {
2719                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2720                     goto do_remove;
2721                 }
2722                 /* Replace the opcode and adjust the args in place,
2723                    leaving 3 unused args at the end.  */
2724                 op->opc = opc = opc_new;
2725                 op->args[1] = op->args[2];
2726                 op->args[2] = op->args[4];
2727                 /* Fall through and mark the single-word operation live.  */
2728                 nb_iargs = 2;
2729                 nb_oargs = 1;
2730             }
2731             goto do_not_remove;
2732 
2733         case INDEX_op_mulu2_i32:
2734             opc_new = INDEX_op_mul_i32;
2735             opc_new2 = INDEX_op_muluh_i32;
2736             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2737             goto do_mul2;
2738         case INDEX_op_muls2_i32:
2739             opc_new = INDEX_op_mul_i32;
2740             opc_new2 = INDEX_op_mulsh_i32;
2741             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2742             goto do_mul2;
2743         case INDEX_op_mulu2_i64:
2744             opc_new = INDEX_op_mul_i64;
2745             opc_new2 = INDEX_op_muluh_i64;
2746             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2747             goto do_mul2;
2748         case INDEX_op_muls2_i64:
2749             opc_new = INDEX_op_mul_i64;
2750             opc_new2 = INDEX_op_mulsh_i64;
2751             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2752             goto do_mul2;
2753         do_mul2:
2754             nb_iargs = 2;
2755             nb_oargs = 2;
2756             if (arg_temp(op->args[1])->state == TS_DEAD) {
2757                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2758                     /* Both parts of the operation are dead.  */
2759                     goto do_remove;
2760                 }
2761                 /* The high part of the operation is dead; generate the low. */
2762                 op->opc = opc = opc_new;
2763                 op->args[1] = op->args[2];
2764                 op->args[2] = op->args[3];
2765             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2766                 /* The low part of the operation is dead; generate the high. */
2767                 op->opc = opc = opc_new2;
2768                 op->args[0] = op->args[1];
2769                 op->args[1] = op->args[2];
2770                 op->args[2] = op->args[3];
2771             } else {
2772                 goto do_not_remove;
2773             }
2774             /* Mark the single-word operation live.  */
2775             nb_oargs = 1;
2776             goto do_not_remove;
2777 
2778         default:
2779             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2780             nb_iargs = def->nb_iargs;
2781             nb_oargs = def->nb_oargs;
2782 
2783             /* Test if the operation can be removed because all
2784                its outputs are dead. We assume that nb_oargs == 0
2785                implies side effects */
2786             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2787                 for (i = 0; i < nb_oargs; i++) {
2788                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2789                         goto do_not_remove;
2790                     }
2791                 }
2792                 goto do_remove;
2793             }
2794             goto do_not_remove;
2795 
2796         do_remove:
2797             tcg_op_remove(s, op);
2798             break;
2799 
2800         do_not_remove:
2801             for (i = 0; i < nb_oargs; i++) {
2802                 ts = arg_temp(op->args[i]);
2803 
2804                 /* Remember the preference of the uses that followed.  */
2805                 op->output_pref[i] = *la_temp_pref(ts);
2806 
2807                 /* Output args are dead.  */
2808                 if (ts->state & TS_DEAD) {
2809                     arg_life |= DEAD_ARG << i;
2810                 }
2811                 if (ts->state & TS_MEM) {
2812                     arg_life |= SYNC_ARG << i;
2813                 }
2814                 ts->state = TS_DEAD;
2815                 la_reset_pref(ts);
2816             }
2817 
2818             /* If end of basic block, update.  */
2819             if (def->flags & TCG_OPF_BB_EXIT) {
2820                 la_func_end(s, nb_globals, nb_temps);
2821             } else if (def->flags & TCG_OPF_COND_BRANCH) {
2822                 la_bb_sync(s, nb_globals, nb_temps);
2823             } else if (def->flags & TCG_OPF_BB_END) {
2824                 la_bb_end(s, nb_globals, nb_temps);
2825             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2826                 la_global_sync(s, nb_globals);
2827                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2828                     la_cross_call(s, nb_temps);
2829                 }
2830             }
2831 
2832             /* Record arguments that die in this opcode.  */
2833             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2834                 ts = arg_temp(op->args[i]);
2835                 if (ts->state & TS_DEAD) {
2836                     arg_life |= DEAD_ARG << i;
2837                 }
2838             }
2839 
2840             /* Input arguments are live for preceding opcodes.  */
2841             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2842                 ts = arg_temp(op->args[i]);
2843                 if (ts->state & TS_DEAD) {
2844                     /* For operands that were dead, initially allow
2845                        all regs for the type.  */
2846                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2847                     ts->state &= ~TS_DEAD;
2848                 }
2849             }
2850 
2851             /* Incorporate constraints for this operand.  */
2852             switch (opc) {
2853             case INDEX_op_mov_i32:
2854             case INDEX_op_mov_i64:
2855                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2856                    have proper constraints.  That said, special case
2857                    moves to propagate preferences backward.  */
2858                 if (IS_DEAD_ARG(1)) {
2859                     *la_temp_pref(arg_temp(op->args[0]))
2860                         = *la_temp_pref(arg_temp(op->args[1]));
2861                 }
2862                 break;
2863 
2864             default:
2865                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2866                     const TCGArgConstraint *ct = &def->args_ct[i];
2867                     TCGRegSet set, *pset;
2868 
2869                     ts = arg_temp(op->args[i]);
2870                     pset = la_temp_pref(ts);
2871                     set = *pset;
2872 
2873                     set &= ct->regs;
2874                     if (ct->ialias) {
2875                         set &= op->output_pref[ct->alias_index];
2876                     }
2877                     /* If the combination is not possible, restart.  */
2878                     if (set == 0) {
2879                         set = ct->regs;
2880                     }
2881                     *pset = set;
2882                 }
2883                 break;
2884             }
2885             break;
2886         }
2887         op->life = arg_life;
2888     }
2889 }
2890 
2891 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2892 static bool liveness_pass_2(TCGContext *s)
2893 {
2894     int nb_globals = s->nb_globals;
2895     int nb_temps, i;
2896     bool changes = false;
2897     TCGOp *op, *op_next;
2898 
2899     /* Create a temporary for each indirect global.  */
2900     for (i = 0; i < nb_globals; ++i) {
2901         TCGTemp *its = &s->temps[i];
2902         if (its->indirect_reg) {
2903             TCGTemp *dts = tcg_temp_alloc(s);
2904             dts->type = its->type;
2905             dts->base_type = its->base_type;
2906             its->state_ptr = dts;
2907         } else {
2908             its->state_ptr = NULL;
2909         }
2910         /* All globals begin dead.  */
2911         its->state = TS_DEAD;
2912     }
2913     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2914         TCGTemp *its = &s->temps[i];
2915         its->state_ptr = NULL;
2916         its->state = TS_DEAD;
2917     }
2918 
2919     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2920         TCGOpcode opc = op->opc;
2921         const TCGOpDef *def = &tcg_op_defs[opc];
2922         TCGLifeData arg_life = op->life;
2923         int nb_iargs, nb_oargs, call_flags;
2924         TCGTemp *arg_ts, *dir_ts;
2925 
2926         if (opc == INDEX_op_call) {
2927             nb_oargs = TCGOP_CALLO(op);
2928             nb_iargs = TCGOP_CALLI(op);
2929             call_flags = op->args[nb_oargs + nb_iargs + 1];
2930         } else {
2931             nb_iargs = def->nb_iargs;
2932             nb_oargs = def->nb_oargs;
2933 
2934             /* Set flags similar to how calls require.  */
2935             if (def->flags & TCG_OPF_COND_BRANCH) {
2936                 /* Like reading globals: sync_globals */
2937                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2938             } else if (def->flags & TCG_OPF_BB_END) {
2939                 /* Like writing globals: save_globals */
2940                 call_flags = 0;
2941             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2942                 /* Like reading globals: sync_globals */
2943                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2944             } else {
2945                 /* No effect on globals.  */
2946                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2947                               TCG_CALL_NO_WRITE_GLOBALS);
2948             }
2949         }
2950 
2951         /* Make sure that input arguments are available.  */
2952         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2953             arg_ts = arg_temp(op->args[i]);
2954             if (arg_ts) {
2955                 dir_ts = arg_ts->state_ptr;
2956                 if (dir_ts && arg_ts->state == TS_DEAD) {
2957                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2958                                       ? INDEX_op_ld_i32
2959                                       : INDEX_op_ld_i64);
2960                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2961 
2962                     lop->args[0] = temp_arg(dir_ts);
2963                     lop->args[1] = temp_arg(arg_ts->mem_base);
2964                     lop->args[2] = arg_ts->mem_offset;
2965 
2966                     /* Loaded, but synced with memory.  */
2967                     arg_ts->state = TS_MEM;
2968                 }
2969             }
2970         }
2971 
2972         /* Perform input replacement, and mark inputs that became dead.
2973            No action is required except keeping temp_state up to date
2974            so that we reload when needed.  */
2975         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2976             arg_ts = arg_temp(op->args[i]);
2977             if (arg_ts) {
2978                 dir_ts = arg_ts->state_ptr;
2979                 if (dir_ts) {
2980                     op->args[i] = temp_arg(dir_ts);
2981                     changes = true;
2982                     if (IS_DEAD_ARG(i)) {
2983                         arg_ts->state = TS_DEAD;
2984                     }
2985                 }
2986             }
2987         }
2988 
2989         /* Liveness analysis should ensure that the following are
2990            all correct, for call sites and basic block end points.  */
2991         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2992             /* Nothing to do */
2993         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2994             for (i = 0; i < nb_globals; ++i) {
2995                 /* Liveness should see that globals are synced back,
2996                    that is, either TS_DEAD or TS_MEM.  */
2997                 arg_ts = &s->temps[i];
2998                 tcg_debug_assert(arg_ts->state_ptr == 0
2999                                  || arg_ts->state != 0);
3000             }
3001         } else {
3002             for (i = 0; i < nb_globals; ++i) {
3003                 /* Liveness should see that globals are saved back,
3004                    that is, TS_DEAD, waiting to be reloaded.  */
3005                 arg_ts = &s->temps[i];
3006                 tcg_debug_assert(arg_ts->state_ptr == 0
3007                                  || arg_ts->state == TS_DEAD);
3008             }
3009         }
3010 
3011         /* Outputs become available.  */
3012         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3013             arg_ts = arg_temp(op->args[0]);
3014             dir_ts = arg_ts->state_ptr;
3015             if (dir_ts) {
3016                 op->args[0] = temp_arg(dir_ts);
3017                 changes = true;
3018 
3019                 /* The output is now live and modified.  */
3020                 arg_ts->state = 0;
3021 
3022                 if (NEED_SYNC_ARG(0)) {
3023                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3024                                       ? INDEX_op_st_i32
3025                                       : INDEX_op_st_i64);
3026                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3027                     TCGTemp *out_ts = dir_ts;
3028 
3029                     if (IS_DEAD_ARG(0)) {
3030                         out_ts = arg_temp(op->args[1]);
3031                         arg_ts->state = TS_DEAD;
3032                         tcg_op_remove(s, op);
3033                     } else {
3034                         arg_ts->state = TS_MEM;
3035                     }
3036 
3037                     sop->args[0] = temp_arg(out_ts);
3038                     sop->args[1] = temp_arg(arg_ts->mem_base);
3039                     sop->args[2] = arg_ts->mem_offset;
3040                 } else {
3041                     tcg_debug_assert(!IS_DEAD_ARG(0));
3042                 }
3043             }
3044         } else {
3045             for (i = 0; i < nb_oargs; i++) {
3046                 arg_ts = arg_temp(op->args[i]);
3047                 dir_ts = arg_ts->state_ptr;
3048                 if (!dir_ts) {
3049                     continue;
3050                 }
3051                 op->args[i] = temp_arg(dir_ts);
3052                 changes = true;
3053 
3054                 /* The output is now live and modified.  */
3055                 arg_ts->state = 0;
3056 
3057                 /* Sync outputs upon their last write.  */
3058                 if (NEED_SYNC_ARG(i)) {
3059                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3060                                       ? INDEX_op_st_i32
3061                                       : INDEX_op_st_i64);
3062                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3063 
3064                     sop->args[0] = temp_arg(dir_ts);
3065                     sop->args[1] = temp_arg(arg_ts->mem_base);
3066                     sop->args[2] = arg_ts->mem_offset;
3067 
3068                     arg_ts->state = TS_MEM;
3069                 }
3070                 /* Drop outputs that are dead.  */
3071                 if (IS_DEAD_ARG(i)) {
3072                     arg_ts->state = TS_DEAD;
3073                 }
3074             }
3075         }
3076     }
3077 
3078     return changes;
3079 }
3080 
3081 #ifdef CONFIG_DEBUG_TCG
3082 static void dump_regs(TCGContext *s)
3083 {
3084     TCGTemp *ts;
3085     int i;
3086     char buf[64];
3087 
3088     for(i = 0; i < s->nb_temps; i++) {
3089         ts = &s->temps[i];
3090         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3091         switch(ts->val_type) {
3092         case TEMP_VAL_REG:
3093             printf("%s", tcg_target_reg_names[ts->reg]);
3094             break;
3095         case TEMP_VAL_MEM:
3096             printf("%d(%s)", (int)ts->mem_offset,
3097                    tcg_target_reg_names[ts->mem_base->reg]);
3098             break;
3099         case TEMP_VAL_CONST:
3100             printf("$0x%" TCG_PRIlx, ts->val);
3101             break;
3102         case TEMP_VAL_DEAD:
3103             printf("D");
3104             break;
3105         default:
3106             printf("???");
3107             break;
3108         }
3109         printf("\n");
3110     }
3111 
3112     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3113         if (s->reg_to_temp[i] != NULL) {
3114             printf("%s: %s\n",
3115                    tcg_target_reg_names[i],
3116                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3117         }
3118     }
3119 }
3120 
3121 static void check_regs(TCGContext *s)
3122 {
3123     int reg;
3124     int k;
3125     TCGTemp *ts;
3126     char buf[64];
3127 
3128     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3129         ts = s->reg_to_temp[reg];
3130         if (ts != NULL) {
3131             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3132                 printf("Inconsistency for register %s:\n",
3133                        tcg_target_reg_names[reg]);
3134                 goto fail;
3135             }
3136         }
3137     }
3138     for (k = 0; k < s->nb_temps; k++) {
3139         ts = &s->temps[k];
3140         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3141             && s->reg_to_temp[ts->reg] != ts) {
3142             printf("Inconsistency for temp %s:\n",
3143                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3144         fail:
3145             printf("reg state:\n");
3146             dump_regs(s);
3147             tcg_abort();
3148         }
3149     }
3150 }
3151 #endif
3152 
3153 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3154 {
3155 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3156     /* Sparc64 stack is accessed with offset of 2047 */
3157     s->current_frame_offset = (s->current_frame_offset +
3158                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3159         ~(sizeof(tcg_target_long) - 1);
3160 #endif
3161     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3162         s->frame_end) {
3163         tcg_abort();
3164     }
3165     ts->mem_offset = s->current_frame_offset;
3166     ts->mem_base = s->frame_temp;
3167     ts->mem_allocated = 1;
3168     s->current_frame_offset += sizeof(tcg_target_long);
3169 }
3170 
3171 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3172 
3173 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3174    mark it free; otherwise mark it dead.  */
3175 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3176 {
3177     if (ts->fixed_reg) {
3178         return;
3179     }
3180     if (ts->val_type == TEMP_VAL_REG) {
3181         s->reg_to_temp[ts->reg] = NULL;
3182     }
3183     ts->val_type = (free_or_dead < 0
3184                     || ts->temp_local
3185                     || ts->temp_global
3186                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3187 }
3188 
3189 /* Mark a temporary as dead.  */
3190 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3191 {
3192     temp_free_or_dead(s, ts, 1);
3193 }
3194 
3195 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3196    registers needs to be allocated to store a constant.  If 'free_or_dead'
3197    is non-zero, subsequently release the temporary; if it is positive, the
3198    temp is dead; if it is negative, the temp is free.  */
3199 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3200                       TCGRegSet preferred_regs, int free_or_dead)
3201 {
3202     if (ts->fixed_reg) {
3203         return;
3204     }
3205     if (!ts->mem_coherent) {
3206         if (!ts->mem_allocated) {
3207             temp_allocate_frame(s, ts);
3208         }
3209         switch (ts->val_type) {
3210         case TEMP_VAL_CONST:
3211             /* If we're going to free the temp immediately, then we won't
3212                require it later in a register, so attempt to store the
3213                constant to memory directly.  */
3214             if (free_or_dead
3215                 && tcg_out_sti(s, ts->type, ts->val,
3216                                ts->mem_base->reg, ts->mem_offset)) {
3217                 break;
3218             }
3219             temp_load(s, ts, tcg_target_available_regs[ts->type],
3220                       allocated_regs, preferred_regs);
3221             /* fallthrough */
3222 
3223         case TEMP_VAL_REG:
3224             tcg_out_st(s, ts->type, ts->reg,
3225                        ts->mem_base->reg, ts->mem_offset);
3226             break;
3227 
3228         case TEMP_VAL_MEM:
3229             break;
3230 
3231         case TEMP_VAL_DEAD:
3232         default:
3233             tcg_abort();
3234         }
3235         ts->mem_coherent = 1;
3236     }
3237     if (free_or_dead) {
3238         temp_free_or_dead(s, ts, free_or_dead);
3239     }
3240 }
3241 
3242 /* free register 'reg' by spilling the corresponding temporary if necessary */
3243 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3244 {
3245     TCGTemp *ts = s->reg_to_temp[reg];
3246     if (ts != NULL) {
3247         temp_sync(s, ts, allocated_regs, 0, -1);
3248     }
3249 }
3250 
3251 /**
3252  * tcg_reg_alloc:
3253  * @required_regs: Set of registers in which we must allocate.
3254  * @allocated_regs: Set of registers which must be avoided.
3255  * @preferred_regs: Set of registers we should prefer.
3256  * @rev: True if we search the registers in "indirect" order.
3257  *
3258  * The allocated register must be in @required_regs & ~@allocated_regs,
3259  * but if we can put it in @preferred_regs we may save a move later.
3260  */
3261 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3262                             TCGRegSet allocated_regs,
3263                             TCGRegSet preferred_regs, bool rev)
3264 {
3265     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3266     TCGRegSet reg_ct[2];
3267     const int *order;
3268 
3269     reg_ct[1] = required_regs & ~allocated_regs;
3270     tcg_debug_assert(reg_ct[1] != 0);
3271     reg_ct[0] = reg_ct[1] & preferred_regs;
3272 
3273     /* Skip the preferred_regs option if it cannot be satisfied,
3274        or if the preference made no difference.  */
3275     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3276 
3277     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3278 
3279     /* Try free registers, preferences first.  */
3280     for (j = f; j < 2; j++) {
3281         TCGRegSet set = reg_ct[j];
3282 
3283         if (tcg_regset_single(set)) {
3284             /* One register in the set.  */
3285             TCGReg reg = tcg_regset_first(set);
3286             if (s->reg_to_temp[reg] == NULL) {
3287                 return reg;
3288             }
3289         } else {
3290             for (i = 0; i < n; i++) {
3291                 TCGReg reg = order[i];
3292                 if (s->reg_to_temp[reg] == NULL &&
3293                     tcg_regset_test_reg(set, reg)) {
3294                     return reg;
3295                 }
3296             }
3297         }
3298     }
3299 
3300     /* We must spill something.  */
3301     for (j = f; j < 2; j++) {
3302         TCGRegSet set = reg_ct[j];
3303 
3304         if (tcg_regset_single(set)) {
3305             /* One register in the set.  */
3306             TCGReg reg = tcg_regset_first(set);
3307             tcg_reg_free(s, reg, allocated_regs);
3308             return reg;
3309         } else {
3310             for (i = 0; i < n; i++) {
3311                 TCGReg reg = order[i];
3312                 if (tcg_regset_test_reg(set, reg)) {
3313                     tcg_reg_free(s, reg, allocated_regs);
3314                     return reg;
3315                 }
3316             }
3317         }
3318     }
3319 
3320     tcg_abort();
3321 }
3322 
3323 /* Make sure the temporary is in a register.  If needed, allocate the register
3324    from DESIRED while avoiding ALLOCATED.  */
3325 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3326                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3327 {
3328     TCGReg reg;
3329 
3330     switch (ts->val_type) {
3331     case TEMP_VAL_REG:
3332         return;
3333     case TEMP_VAL_CONST:
3334         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3335                             preferred_regs, ts->indirect_base);
3336         tcg_out_movi(s, ts->type, reg, ts->val);
3337         ts->mem_coherent = 0;
3338         break;
3339     case TEMP_VAL_MEM:
3340         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3341                             preferred_regs, ts->indirect_base);
3342         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3343         ts->mem_coherent = 1;
3344         break;
3345     case TEMP_VAL_DEAD:
3346     default:
3347         tcg_abort();
3348     }
3349     ts->reg = reg;
3350     ts->val_type = TEMP_VAL_REG;
3351     s->reg_to_temp[reg] = ts;
3352 }
3353 
3354 /* Save a temporary to memory. 'allocated_regs' is used in case a
3355    temporary registers needs to be allocated to store a constant.  */
3356 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3357 {
3358     /* The liveness analysis already ensures that globals are back
3359        in memory. Keep an tcg_debug_assert for safety. */
3360     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3361 }
3362 
3363 /* save globals to their canonical location and assume they can be
3364    modified be the following code. 'allocated_regs' is used in case a
3365    temporary registers needs to be allocated to store a constant. */
3366 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3367 {
3368     int i, n;
3369 
3370     for (i = 0, n = s->nb_globals; i < n; i++) {
3371         temp_save(s, &s->temps[i], allocated_regs);
3372     }
3373 }
3374 
3375 /* sync globals to their canonical location and assume they can be
3376    read by the following code. 'allocated_regs' is used in case a
3377    temporary registers needs to be allocated to store a constant. */
3378 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3379 {
3380     int i, n;
3381 
3382     for (i = 0, n = s->nb_globals; i < n; i++) {
3383         TCGTemp *ts = &s->temps[i];
3384         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3385                          || ts->fixed_reg
3386                          || ts->mem_coherent);
3387     }
3388 }
3389 
3390 /* at the end of a basic block, we assume all temporaries are dead and
3391    all globals are stored at their canonical location. */
3392 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3393 {
3394     int i;
3395 
3396     for (i = s->nb_globals; i < s->nb_temps; i++) {
3397         TCGTemp *ts = &s->temps[i];
3398         if (ts->temp_local) {
3399             temp_save(s, ts, allocated_regs);
3400         } else {
3401             /* The liveness analysis already ensures that temps are dead.
3402                Keep an tcg_debug_assert for safety. */
3403             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3404         }
3405     }
3406 
3407     save_globals(s, allocated_regs);
3408 }
3409 
3410 /*
3411  * At a conditional branch, we assume all temporaries are dead and
3412  * all globals and local temps are synced to their location.
3413  */
3414 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3415 {
3416     sync_globals(s, allocated_regs);
3417 
3418     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3419         TCGTemp *ts = &s->temps[i];
3420         /*
3421          * The liveness analysis already ensures that temps are dead.
3422          * Keep tcg_debug_asserts for safety.
3423          */
3424         if (ts->temp_local) {
3425             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3426         } else {
3427             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3428         }
3429     }
3430 }
3431 
3432 /*
3433  * Specialized code generation for INDEX_op_movi_*.
3434  */
3435 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3436                                   tcg_target_ulong val, TCGLifeData arg_life,
3437                                   TCGRegSet preferred_regs)
3438 {
3439     /* ENV should not be modified.  */
3440     tcg_debug_assert(!ots->fixed_reg);
3441 
3442     /* The movi is not explicitly generated here.  */
3443     if (ots->val_type == TEMP_VAL_REG) {
3444         s->reg_to_temp[ots->reg] = NULL;
3445     }
3446     ots->val_type = TEMP_VAL_CONST;
3447     ots->val = val;
3448     ots->mem_coherent = 0;
3449     if (NEED_SYNC_ARG(0)) {
3450         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3451     } else if (IS_DEAD_ARG(0)) {
3452         temp_dead(s, ots);
3453     }
3454 }
3455 
3456 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3457 {
3458     TCGTemp *ots = arg_temp(op->args[0]);
3459     tcg_target_ulong val = op->args[1];
3460 
3461     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3462 }
3463 
3464 /*
3465  * Specialized code generation for INDEX_op_mov_*.
3466  */
3467 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3468 {
3469     const TCGLifeData arg_life = op->life;
3470     TCGRegSet allocated_regs, preferred_regs;
3471     TCGTemp *ts, *ots;
3472     TCGType otype, itype;
3473 
3474     allocated_regs = s->reserved_regs;
3475     preferred_regs = op->output_pref[0];
3476     ots = arg_temp(op->args[0]);
3477     ts = arg_temp(op->args[1]);
3478 
3479     /* ENV should not be modified.  */
3480     tcg_debug_assert(!ots->fixed_reg);
3481 
3482     /* Note that otype != itype for no-op truncation.  */
3483     otype = ots->type;
3484     itype = ts->type;
3485 
3486     if (ts->val_type == TEMP_VAL_CONST) {
3487         /* propagate constant or generate sti */
3488         tcg_target_ulong val = ts->val;
3489         if (IS_DEAD_ARG(1)) {
3490             temp_dead(s, ts);
3491         }
3492         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3493         return;
3494     }
3495 
3496     /* If the source value is in memory we're going to be forced
3497        to have it in a register in order to perform the copy.  Copy
3498        the SOURCE value into its own register first, that way we
3499        don't have to reload SOURCE the next time it is used. */
3500     if (ts->val_type == TEMP_VAL_MEM) {
3501         temp_load(s, ts, tcg_target_available_regs[itype],
3502                   allocated_regs, preferred_regs);
3503     }
3504 
3505     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3506     if (IS_DEAD_ARG(0)) {
3507         /* mov to a non-saved dead register makes no sense (even with
3508            liveness analysis disabled). */
3509         tcg_debug_assert(NEED_SYNC_ARG(0));
3510         if (!ots->mem_allocated) {
3511             temp_allocate_frame(s, ots);
3512         }
3513         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3514         if (IS_DEAD_ARG(1)) {
3515             temp_dead(s, ts);
3516         }
3517         temp_dead(s, ots);
3518     } else {
3519         if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3520             /* the mov can be suppressed */
3521             if (ots->val_type == TEMP_VAL_REG) {
3522                 s->reg_to_temp[ots->reg] = NULL;
3523             }
3524             ots->reg = ts->reg;
3525             temp_dead(s, ts);
3526         } else {
3527             if (ots->val_type != TEMP_VAL_REG) {
3528                 /* When allocating a new register, make sure to not spill the
3529                    input one. */
3530                 tcg_regset_set_reg(allocated_regs, ts->reg);
3531                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3532                                          allocated_regs, preferred_regs,
3533                                          ots->indirect_base);
3534             }
3535             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3536                 /*
3537                  * Cross register class move not supported.
3538                  * Store the source register into the destination slot
3539                  * and leave the destination temp as TEMP_VAL_MEM.
3540                  */
3541                 assert(!ots->fixed_reg);
3542                 if (!ts->mem_allocated) {
3543                     temp_allocate_frame(s, ots);
3544                 }
3545                 tcg_out_st(s, ts->type, ts->reg,
3546                            ots->mem_base->reg, ots->mem_offset);
3547                 ots->mem_coherent = 1;
3548                 temp_free_or_dead(s, ots, -1);
3549                 return;
3550             }
3551         }
3552         ots->val_type = TEMP_VAL_REG;
3553         ots->mem_coherent = 0;
3554         s->reg_to_temp[ots->reg] = ots;
3555         if (NEED_SYNC_ARG(0)) {
3556             temp_sync(s, ots, allocated_regs, 0, 0);
3557         }
3558     }
3559 }
3560 
3561 /*
3562  * Specialized code generation for INDEX_op_dup_vec.
3563  */
3564 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3565 {
3566     const TCGLifeData arg_life = op->life;
3567     TCGRegSet dup_out_regs, dup_in_regs;
3568     TCGTemp *its, *ots;
3569     TCGType itype, vtype;
3570     intptr_t endian_fixup;
3571     unsigned vece;
3572     bool ok;
3573 
3574     ots = arg_temp(op->args[0]);
3575     its = arg_temp(op->args[1]);
3576 
3577     /* ENV should not be modified.  */
3578     tcg_debug_assert(!ots->fixed_reg);
3579 
3580     itype = its->type;
3581     vece = TCGOP_VECE(op);
3582     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3583 
3584     if (its->val_type == TEMP_VAL_CONST) {
3585         /* Propagate constant via movi -> dupi.  */
3586         tcg_target_ulong val = its->val;
3587         if (IS_DEAD_ARG(1)) {
3588             temp_dead(s, its);
3589         }
3590         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3591         return;
3592     }
3593 
3594     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3595     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3596 
3597     /* Allocate the output register now.  */
3598     if (ots->val_type != TEMP_VAL_REG) {
3599         TCGRegSet allocated_regs = s->reserved_regs;
3600 
3601         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3602             /* Make sure to not spill the input register. */
3603             tcg_regset_set_reg(allocated_regs, its->reg);
3604         }
3605         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3606                                  op->output_pref[0], ots->indirect_base);
3607         ots->val_type = TEMP_VAL_REG;
3608         ots->mem_coherent = 0;
3609         s->reg_to_temp[ots->reg] = ots;
3610     }
3611 
3612     switch (its->val_type) {
3613     case TEMP_VAL_REG:
3614         /*
3615          * The dup constriaints must be broad, covering all possible VECE.
3616          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3617          * to fail, indicating that extra moves are required for that case.
3618          */
3619         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3620             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3621                 goto done;
3622             }
3623             /* Try again from memory or a vector input register.  */
3624         }
3625         if (!its->mem_coherent) {
3626             /*
3627              * The input register is not synced, and so an extra store
3628              * would be required to use memory.  Attempt an integer-vector
3629              * register move first.  We do not have a TCGRegSet for this.
3630              */
3631             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3632                 break;
3633             }
3634             /* Sync the temp back to its slot and load from there.  */
3635             temp_sync(s, its, s->reserved_regs, 0, 0);
3636         }
3637         /* fall through */
3638 
3639     case TEMP_VAL_MEM:
3640 #ifdef HOST_WORDS_BIGENDIAN
3641         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3642         endian_fixup -= 1 << vece;
3643 #else
3644         endian_fixup = 0;
3645 #endif
3646         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3647                              its->mem_offset + endian_fixup)) {
3648             goto done;
3649         }
3650         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3651         break;
3652 
3653     default:
3654         g_assert_not_reached();
3655     }
3656 
3657     /* We now have a vector input register, so dup must succeed. */
3658     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3659     tcg_debug_assert(ok);
3660 
3661  done:
3662     if (IS_DEAD_ARG(1)) {
3663         temp_dead(s, its);
3664     }
3665     if (NEED_SYNC_ARG(0)) {
3666         temp_sync(s, ots, s->reserved_regs, 0, 0);
3667     }
3668     if (IS_DEAD_ARG(0)) {
3669         temp_dead(s, ots);
3670     }
3671 }
3672 
3673 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3674 {
3675     const TCGLifeData arg_life = op->life;
3676     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3677     TCGRegSet i_allocated_regs;
3678     TCGRegSet o_allocated_regs;
3679     int i, k, nb_iargs, nb_oargs;
3680     TCGReg reg;
3681     TCGArg arg;
3682     const TCGArgConstraint *arg_ct;
3683     TCGTemp *ts;
3684     TCGArg new_args[TCG_MAX_OP_ARGS];
3685     int const_args[TCG_MAX_OP_ARGS];
3686 
3687     nb_oargs = def->nb_oargs;
3688     nb_iargs = def->nb_iargs;
3689 
3690     /* copy constants */
3691     memcpy(new_args + nb_oargs + nb_iargs,
3692            op->args + nb_oargs + nb_iargs,
3693            sizeof(TCGArg) * def->nb_cargs);
3694 
3695     i_allocated_regs = s->reserved_regs;
3696     o_allocated_regs = s->reserved_regs;
3697 
3698     /* satisfy input constraints */
3699     for (k = 0; k < nb_iargs; k++) {
3700         TCGRegSet i_preferred_regs, o_preferred_regs;
3701 
3702         i = def->args_ct[nb_oargs + k].sort_index;
3703         arg = op->args[i];
3704         arg_ct = &def->args_ct[i];
3705         ts = arg_temp(arg);
3706 
3707         if (ts->val_type == TEMP_VAL_CONST
3708             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3709             /* constant is OK for instruction */
3710             const_args[i] = 1;
3711             new_args[i] = ts->val;
3712             continue;
3713         }
3714 
3715         i_preferred_regs = o_preferred_regs = 0;
3716         if (arg_ct->ialias) {
3717             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3718             if (ts->fixed_reg) {
3719                 /* if fixed register, we must allocate a new register
3720                    if the alias is not the same register */
3721                 if (arg != op->args[arg_ct->alias_index]) {
3722                     goto allocate_in_reg;
3723                 }
3724             } else {
3725                 /* if the input is aliased to an output and if it is
3726                    not dead after the instruction, we must allocate
3727                    a new register and move it */
3728                 if (!IS_DEAD_ARG(i)) {
3729                     goto allocate_in_reg;
3730                 }
3731 
3732                 /* check if the current register has already been allocated
3733                    for another input aliased to an output */
3734                 if (ts->val_type == TEMP_VAL_REG) {
3735                     int k2, i2;
3736                     reg = ts->reg;
3737                     for (k2 = 0 ; k2 < k ; k2++) {
3738                         i2 = def->args_ct[nb_oargs + k2].sort_index;
3739                         if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3740                             goto allocate_in_reg;
3741                         }
3742                     }
3743                 }
3744                 i_preferred_regs = o_preferred_regs;
3745             }
3746         }
3747 
3748         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3749         reg = ts->reg;
3750 
3751         if (tcg_regset_test_reg(arg_ct->regs, reg)) {
3752             /* nothing to do : the constraint is satisfied */
3753         } else {
3754         allocate_in_reg:
3755             /* allocate a new register matching the constraint
3756                and move the temporary register into it */
3757             temp_load(s, ts, tcg_target_available_regs[ts->type],
3758                       i_allocated_regs, 0);
3759             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3760                                 o_preferred_regs, ts->indirect_base);
3761             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3762                 /*
3763                  * Cross register class move not supported.  Sync the
3764                  * temp back to its slot and load from there.
3765                  */
3766                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3767                 tcg_out_ld(s, ts->type, reg,
3768                            ts->mem_base->reg, ts->mem_offset);
3769             }
3770         }
3771         new_args[i] = reg;
3772         const_args[i] = 0;
3773         tcg_regset_set_reg(i_allocated_regs, reg);
3774     }
3775 
3776     /* mark dead temporaries and free the associated registers */
3777     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3778         if (IS_DEAD_ARG(i)) {
3779             temp_dead(s, arg_temp(op->args[i]));
3780         }
3781     }
3782 
3783     if (def->flags & TCG_OPF_COND_BRANCH) {
3784         tcg_reg_alloc_cbranch(s, i_allocated_regs);
3785     } else if (def->flags & TCG_OPF_BB_END) {
3786         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3787     } else {
3788         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3789             /* XXX: permit generic clobber register list ? */
3790             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3791                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3792                     tcg_reg_free(s, i, i_allocated_regs);
3793                 }
3794             }
3795         }
3796         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3797             /* sync globals if the op has side effects and might trigger
3798                an exception. */
3799             sync_globals(s, i_allocated_regs);
3800         }
3801 
3802         /* satisfy the output constraints */
3803         for(k = 0; k < nb_oargs; k++) {
3804             i = def->args_ct[k].sort_index;
3805             arg = op->args[i];
3806             arg_ct = &def->args_ct[i];
3807             ts = arg_temp(arg);
3808 
3809             /* ENV should not be modified.  */
3810             tcg_debug_assert(!ts->fixed_reg);
3811 
3812             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3813                 reg = new_args[arg_ct->alias_index];
3814             } else if (arg_ct->newreg) {
3815                 reg = tcg_reg_alloc(s, arg_ct->regs,
3816                                     i_allocated_regs | o_allocated_regs,
3817                                     op->output_pref[k], ts->indirect_base);
3818             } else {
3819                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3820                                     op->output_pref[k], ts->indirect_base);
3821             }
3822             tcg_regset_set_reg(o_allocated_regs, reg);
3823             if (ts->val_type == TEMP_VAL_REG) {
3824                 s->reg_to_temp[ts->reg] = NULL;
3825             }
3826             ts->val_type = TEMP_VAL_REG;
3827             ts->reg = reg;
3828             /*
3829              * Temp value is modified, so the value kept in memory is
3830              * potentially not the same.
3831              */
3832             ts->mem_coherent = 0;
3833             s->reg_to_temp[reg] = ts;
3834             new_args[i] = reg;
3835         }
3836     }
3837 
3838     /* emit instruction */
3839     if (def->flags & TCG_OPF_VECTOR) {
3840         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3841                        new_args, const_args);
3842     } else {
3843         tcg_out_op(s, op->opc, new_args, const_args);
3844     }
3845 
3846     /* move the outputs in the correct register if needed */
3847     for(i = 0; i < nb_oargs; i++) {
3848         ts = arg_temp(op->args[i]);
3849 
3850         /* ENV should not be modified.  */
3851         tcg_debug_assert(!ts->fixed_reg);
3852 
3853         if (NEED_SYNC_ARG(i)) {
3854             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3855         } else if (IS_DEAD_ARG(i)) {
3856             temp_dead(s, ts);
3857         }
3858     }
3859 }
3860 
3861 #ifdef TCG_TARGET_STACK_GROWSUP
3862 #define STACK_DIR(x) (-(x))
3863 #else
3864 #define STACK_DIR(x) (x)
3865 #endif
3866 
3867 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3868 {
3869     const int nb_oargs = TCGOP_CALLO(op);
3870     const int nb_iargs = TCGOP_CALLI(op);
3871     const TCGLifeData arg_life = op->life;
3872     int flags, nb_regs, i;
3873     TCGReg reg;
3874     TCGArg arg;
3875     TCGTemp *ts;
3876     intptr_t stack_offset;
3877     size_t call_stack_size;
3878     tcg_insn_unit *func_addr;
3879     int allocate_args;
3880     TCGRegSet allocated_regs;
3881 
3882     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3883     flags = op->args[nb_oargs + nb_iargs + 1];
3884 
3885     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3886     if (nb_regs > nb_iargs) {
3887         nb_regs = nb_iargs;
3888     }
3889 
3890     /* assign stack slots first */
3891     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3892     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3893         ~(TCG_TARGET_STACK_ALIGN - 1);
3894     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3895     if (allocate_args) {
3896         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3897            preallocate call stack */
3898         tcg_abort();
3899     }
3900 
3901     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3902     for (i = nb_regs; i < nb_iargs; i++) {
3903         arg = op->args[nb_oargs + i];
3904 #ifdef TCG_TARGET_STACK_GROWSUP
3905         stack_offset -= sizeof(tcg_target_long);
3906 #endif
3907         if (arg != TCG_CALL_DUMMY_ARG) {
3908             ts = arg_temp(arg);
3909             temp_load(s, ts, tcg_target_available_regs[ts->type],
3910                       s->reserved_regs, 0);
3911             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3912         }
3913 #ifndef TCG_TARGET_STACK_GROWSUP
3914         stack_offset += sizeof(tcg_target_long);
3915 #endif
3916     }
3917 
3918     /* assign input registers */
3919     allocated_regs = s->reserved_regs;
3920     for (i = 0; i < nb_regs; i++) {
3921         arg = op->args[nb_oargs + i];
3922         if (arg != TCG_CALL_DUMMY_ARG) {
3923             ts = arg_temp(arg);
3924             reg = tcg_target_call_iarg_regs[i];
3925 
3926             if (ts->val_type == TEMP_VAL_REG) {
3927                 if (ts->reg != reg) {
3928                     tcg_reg_free(s, reg, allocated_regs);
3929                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3930                         /*
3931                          * Cross register class move not supported.  Sync the
3932                          * temp back to its slot and load from there.
3933                          */
3934                         temp_sync(s, ts, allocated_regs, 0, 0);
3935                         tcg_out_ld(s, ts->type, reg,
3936                                    ts->mem_base->reg, ts->mem_offset);
3937                     }
3938                 }
3939             } else {
3940                 TCGRegSet arg_set = 0;
3941 
3942                 tcg_reg_free(s, reg, allocated_regs);
3943                 tcg_regset_set_reg(arg_set, reg);
3944                 temp_load(s, ts, arg_set, allocated_regs, 0);
3945             }
3946 
3947             tcg_regset_set_reg(allocated_regs, reg);
3948         }
3949     }
3950 
3951     /* mark dead temporaries and free the associated registers */
3952     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3953         if (IS_DEAD_ARG(i)) {
3954             temp_dead(s, arg_temp(op->args[i]));
3955         }
3956     }
3957 
3958     /* clobber call registers */
3959     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3960         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3961             tcg_reg_free(s, i, allocated_regs);
3962         }
3963     }
3964 
3965     /* Save globals if they might be written by the helper, sync them if
3966        they might be read. */
3967     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3968         /* Nothing to do */
3969     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3970         sync_globals(s, allocated_regs);
3971     } else {
3972         save_globals(s, allocated_regs);
3973     }
3974 
3975     tcg_out_call(s, func_addr);
3976 
3977     /* assign output registers and emit moves if needed */
3978     for(i = 0; i < nb_oargs; i++) {
3979         arg = op->args[i];
3980         ts = arg_temp(arg);
3981 
3982         /* ENV should not be modified.  */
3983         tcg_debug_assert(!ts->fixed_reg);
3984 
3985         reg = tcg_target_call_oarg_regs[i];
3986         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3987         if (ts->val_type == TEMP_VAL_REG) {
3988             s->reg_to_temp[ts->reg] = NULL;
3989         }
3990         ts->val_type = TEMP_VAL_REG;
3991         ts->reg = reg;
3992         ts->mem_coherent = 0;
3993         s->reg_to_temp[reg] = ts;
3994         if (NEED_SYNC_ARG(i)) {
3995             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3996         } else if (IS_DEAD_ARG(i)) {
3997             temp_dead(s, ts);
3998         }
3999     }
4000 }
4001 
4002 #ifdef CONFIG_PROFILER
4003 
4004 /* avoid copy/paste errors */
4005 #define PROF_ADD(to, from, field)                       \
4006     do {                                                \
4007         (to)->field += qatomic_read(&((from)->field));  \
4008     } while (0)
4009 
4010 #define PROF_MAX(to, from, field)                                       \
4011     do {                                                                \
4012         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4013         if (val__ > (to)->field) {                                      \
4014             (to)->field = val__;                                        \
4015         }                                                               \
4016     } while (0)
4017 
4018 /* Pass in a zero'ed @prof */
4019 static inline
4020 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4021 {
4022     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4023     unsigned int i;
4024 
4025     for (i = 0; i < n_ctxs; i++) {
4026         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4027         const TCGProfile *orig = &s->prof;
4028 
4029         if (counters) {
4030             PROF_ADD(prof, orig, cpu_exec_time);
4031             PROF_ADD(prof, orig, tb_count1);
4032             PROF_ADD(prof, orig, tb_count);
4033             PROF_ADD(prof, orig, op_count);
4034             PROF_MAX(prof, orig, op_count_max);
4035             PROF_ADD(prof, orig, temp_count);
4036             PROF_MAX(prof, orig, temp_count_max);
4037             PROF_ADD(prof, orig, del_op_count);
4038             PROF_ADD(prof, orig, code_in_len);
4039             PROF_ADD(prof, orig, code_out_len);
4040             PROF_ADD(prof, orig, search_out_len);
4041             PROF_ADD(prof, orig, interm_time);
4042             PROF_ADD(prof, orig, code_time);
4043             PROF_ADD(prof, orig, la_time);
4044             PROF_ADD(prof, orig, opt_time);
4045             PROF_ADD(prof, orig, restore_count);
4046             PROF_ADD(prof, orig, restore_time);
4047         }
4048         if (table) {
4049             int i;
4050 
4051             for (i = 0; i < NB_OPS; i++) {
4052                 PROF_ADD(prof, orig, table_op_count[i]);
4053             }
4054         }
4055     }
4056 }
4057 
4058 #undef PROF_ADD
4059 #undef PROF_MAX
4060 
4061 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4062 {
4063     tcg_profile_snapshot(prof, true, false);
4064 }
4065 
4066 static void tcg_profile_snapshot_table(TCGProfile *prof)
4067 {
4068     tcg_profile_snapshot(prof, false, true);
4069 }
4070 
4071 void tcg_dump_op_count(void)
4072 {
4073     TCGProfile prof = {};
4074     int i;
4075 
4076     tcg_profile_snapshot_table(&prof);
4077     for (i = 0; i < NB_OPS; i++) {
4078         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4079                     prof.table_op_count[i]);
4080     }
4081 }
4082 
4083 int64_t tcg_cpu_exec_time(void)
4084 {
4085     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4086     unsigned int i;
4087     int64_t ret = 0;
4088 
4089     for (i = 0; i < n_ctxs; i++) {
4090         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4091         const TCGProfile *prof = &s->prof;
4092 
4093         ret += qatomic_read(&prof->cpu_exec_time);
4094     }
4095     return ret;
4096 }
4097 #else
4098 void tcg_dump_op_count(void)
4099 {
4100     qemu_printf("[TCG profiler not compiled]\n");
4101 }
4102 
4103 int64_t tcg_cpu_exec_time(void)
4104 {
4105     error_report("%s: TCG profiler not compiled", __func__);
4106     exit(EXIT_FAILURE);
4107 }
4108 #endif
4109 
4110 
4111 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4112 {
4113 #ifdef CONFIG_PROFILER
4114     TCGProfile *prof = &s->prof;
4115 #endif
4116     int i, num_insns;
4117     TCGOp *op;
4118 
4119 #ifdef CONFIG_PROFILER
4120     {
4121         int n = 0;
4122 
4123         QTAILQ_FOREACH(op, &s->ops, link) {
4124             n++;
4125         }
4126         qatomic_set(&prof->op_count, prof->op_count + n);
4127         if (n > prof->op_count_max) {
4128             qatomic_set(&prof->op_count_max, n);
4129         }
4130 
4131         n = s->nb_temps;
4132         qatomic_set(&prof->temp_count, prof->temp_count + n);
4133         if (n > prof->temp_count_max) {
4134             qatomic_set(&prof->temp_count_max, n);
4135         }
4136     }
4137 #endif
4138 
4139 #ifdef DEBUG_DISAS
4140     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4141                  && qemu_log_in_addr_range(tb->pc))) {
4142         FILE *logfile = qemu_log_lock();
4143         qemu_log("OP:\n");
4144         tcg_dump_ops(s, false);
4145         qemu_log("\n");
4146         qemu_log_unlock(logfile);
4147     }
4148 #endif
4149 
4150 #ifdef CONFIG_DEBUG_TCG
4151     /* Ensure all labels referenced have been emitted.  */
4152     {
4153         TCGLabel *l;
4154         bool error = false;
4155 
4156         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4157             if (unlikely(!l->present) && l->refs) {
4158                 qemu_log_mask(CPU_LOG_TB_OP,
4159                               "$L%d referenced but not present.\n", l->id);
4160                 error = true;
4161             }
4162         }
4163         assert(!error);
4164     }
4165 #endif
4166 
4167 #ifdef CONFIG_PROFILER
4168     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4169 #endif
4170 
4171 #ifdef USE_TCG_OPTIMIZATIONS
4172     tcg_optimize(s);
4173 #endif
4174 
4175 #ifdef CONFIG_PROFILER
4176     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4177     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4178 #endif
4179 
4180     reachable_code_pass(s);
4181     liveness_pass_1(s);
4182 
4183     if (s->nb_indirects > 0) {
4184 #ifdef DEBUG_DISAS
4185         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4186                      && qemu_log_in_addr_range(tb->pc))) {
4187             FILE *logfile = qemu_log_lock();
4188             qemu_log("OP before indirect lowering:\n");
4189             tcg_dump_ops(s, false);
4190             qemu_log("\n");
4191             qemu_log_unlock(logfile);
4192         }
4193 #endif
4194         /* Replace indirect temps with direct temps.  */
4195         if (liveness_pass_2(s)) {
4196             /* If changes were made, re-run liveness.  */
4197             liveness_pass_1(s);
4198         }
4199     }
4200 
4201 #ifdef CONFIG_PROFILER
4202     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4203 #endif
4204 
4205 #ifdef DEBUG_DISAS
4206     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4207                  && qemu_log_in_addr_range(tb->pc))) {
4208         FILE *logfile = qemu_log_lock();
4209         qemu_log("OP after optimization and liveness analysis:\n");
4210         tcg_dump_ops(s, true);
4211         qemu_log("\n");
4212         qemu_log_unlock(logfile);
4213     }
4214 #endif
4215 
4216     tcg_reg_alloc_start(s);
4217 
4218     s->code_buf = tb->tc.ptr;
4219     s->code_ptr = tb->tc.ptr;
4220 
4221 #ifdef TCG_TARGET_NEED_LDST_LABELS
4222     QSIMPLEQ_INIT(&s->ldst_labels);
4223 #endif
4224 #ifdef TCG_TARGET_NEED_POOL_LABELS
4225     s->pool_labels = NULL;
4226 #endif
4227 
4228     num_insns = -1;
4229     QTAILQ_FOREACH(op, &s->ops, link) {
4230         TCGOpcode opc = op->opc;
4231 
4232 #ifdef CONFIG_PROFILER
4233         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4234 #endif
4235 
4236         switch (opc) {
4237         case INDEX_op_mov_i32:
4238         case INDEX_op_mov_i64:
4239         case INDEX_op_mov_vec:
4240             tcg_reg_alloc_mov(s, op);
4241             break;
4242         case INDEX_op_movi_i32:
4243         case INDEX_op_movi_i64:
4244         case INDEX_op_dupi_vec:
4245             tcg_reg_alloc_movi(s, op);
4246             break;
4247         case INDEX_op_dup_vec:
4248             tcg_reg_alloc_dup(s, op);
4249             break;
4250         case INDEX_op_insn_start:
4251             if (num_insns >= 0) {
4252                 size_t off = tcg_current_code_size(s);
4253                 s->gen_insn_end_off[num_insns] = off;
4254                 /* Assert that we do not overflow our stored offset.  */
4255                 assert(s->gen_insn_end_off[num_insns] == off);
4256             }
4257             num_insns++;
4258             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4259                 target_ulong a;
4260 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4261                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4262 #else
4263                 a = op->args[i];
4264 #endif
4265                 s->gen_insn_data[num_insns][i] = a;
4266             }
4267             break;
4268         case INDEX_op_discard:
4269             temp_dead(s, arg_temp(op->args[0]));
4270             break;
4271         case INDEX_op_set_label:
4272             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4273             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4274             break;
4275         case INDEX_op_call:
4276             tcg_reg_alloc_call(s, op);
4277             break;
4278         default:
4279             /* Sanity check that we've not introduced any unhandled opcodes. */
4280             tcg_debug_assert(tcg_op_supported(opc));
4281             /* Note: in order to speed up the code, it would be much
4282                faster to have specialized register allocator functions for
4283                some common argument patterns */
4284             tcg_reg_alloc_op(s, op);
4285             break;
4286         }
4287 #ifdef CONFIG_DEBUG_TCG
4288         check_regs(s);
4289 #endif
4290         /* Test for (pending) buffer overflow.  The assumption is that any
4291            one operation beginning below the high water mark cannot overrun
4292            the buffer completely.  Thus we can test for overflow after
4293            generating code without having to check during generation.  */
4294         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4295             return -1;
4296         }
4297         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4298         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4299             return -2;
4300         }
4301     }
4302     tcg_debug_assert(num_insns >= 0);
4303     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4304 
4305     /* Generate TB finalization at the end of block */
4306 #ifdef TCG_TARGET_NEED_LDST_LABELS
4307     i = tcg_out_ldst_finalize(s);
4308     if (i < 0) {
4309         return i;
4310     }
4311 #endif
4312 #ifdef TCG_TARGET_NEED_POOL_LABELS
4313     i = tcg_out_pool_finalize(s);
4314     if (i < 0) {
4315         return i;
4316     }
4317 #endif
4318     if (!tcg_resolve_relocs(s)) {
4319         return -2;
4320     }
4321 
4322     /* flush instruction cache */
4323     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4324 
4325     return tcg_current_code_size(s);
4326 }
4327 
4328 #ifdef CONFIG_PROFILER
4329 void tcg_dump_info(void)
4330 {
4331     TCGProfile prof = {};
4332     const TCGProfile *s;
4333     int64_t tb_count;
4334     int64_t tb_div_count;
4335     int64_t tot;
4336 
4337     tcg_profile_snapshot_counters(&prof);
4338     s = &prof;
4339     tb_count = s->tb_count;
4340     tb_div_count = tb_count ? tb_count : 1;
4341     tot = s->interm_time + s->code_time;
4342 
4343     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4344                 tot, tot / 2.4e9);
4345     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4346                 " %0.1f%%)\n",
4347                 tb_count, s->tb_count1 - tb_count,
4348                 (double)(s->tb_count1 - s->tb_count)
4349                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4350     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4351                 (double)s->op_count / tb_div_count, s->op_count_max);
4352     qemu_printf("deleted ops/TB      %0.2f\n",
4353                 (double)s->del_op_count / tb_div_count);
4354     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4355                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4356     qemu_printf("avg host code/TB    %0.1f\n",
4357                 (double)s->code_out_len / tb_div_count);
4358     qemu_printf("avg search data/TB  %0.1f\n",
4359                 (double)s->search_out_len / tb_div_count);
4360 
4361     qemu_printf("cycles/op           %0.1f\n",
4362                 s->op_count ? (double)tot / s->op_count : 0);
4363     qemu_printf("cycles/in byte      %0.1f\n",
4364                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4365     qemu_printf("cycles/out byte     %0.1f\n",
4366                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4367     qemu_printf("cycles/search byte     %0.1f\n",
4368                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4369     if (tot == 0) {
4370         tot = 1;
4371     }
4372     qemu_printf("  gen_interm time   %0.1f%%\n",
4373                 (double)s->interm_time / tot * 100.0);
4374     qemu_printf("  gen_code time     %0.1f%%\n",
4375                 (double)s->code_time / tot * 100.0);
4376     qemu_printf("optim./code time    %0.1f%%\n",
4377                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4378                 * 100.0);
4379     qemu_printf("liveness/code time  %0.1f%%\n",
4380                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4381     qemu_printf("cpu_restore count   %" PRId64 "\n",
4382                 s->restore_count);
4383     qemu_printf("  avg cycles        %0.1f\n",
4384                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4385 }
4386 #else
4387 void tcg_dump_info(void)
4388 {
4389     qemu_printf("[TCG profiler not compiled]\n");
4390 }
4391 #endif
4392 
4393 #ifdef ELF_HOST_MACHINE
4394 /* In order to use this feature, the backend needs to do three things:
4395 
4396    (1) Define ELF_HOST_MACHINE to indicate both what value to
4397        put into the ELF image and to indicate support for the feature.
4398 
4399    (2) Define tcg_register_jit.  This should create a buffer containing
4400        the contents of a .debug_frame section that describes the post-
4401        prologue unwind info for the tcg machine.
4402 
4403    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4404 */
4405 
4406 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4407 typedef enum {
4408     JIT_NOACTION = 0,
4409     JIT_REGISTER_FN,
4410     JIT_UNREGISTER_FN
4411 } jit_actions_t;
4412 
4413 struct jit_code_entry {
4414     struct jit_code_entry *next_entry;
4415     struct jit_code_entry *prev_entry;
4416     const void *symfile_addr;
4417     uint64_t symfile_size;
4418 };
4419 
4420 struct jit_descriptor {
4421     uint32_t version;
4422     uint32_t action_flag;
4423     struct jit_code_entry *relevant_entry;
4424     struct jit_code_entry *first_entry;
4425 };
4426 
4427 void __jit_debug_register_code(void) __attribute__((noinline));
4428 void __jit_debug_register_code(void)
4429 {
4430     asm("");
4431 }
4432 
4433 /* Must statically initialize the version, because GDB may check
4434    the version before we can set it.  */
4435 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4436 
4437 /* End GDB interface.  */
4438 
4439 static int find_string(const char *strtab, const char *str)
4440 {
4441     const char *p = strtab + 1;
4442 
4443     while (1) {
4444         if (strcmp(p, str) == 0) {
4445             return p - strtab;
4446         }
4447         p += strlen(p) + 1;
4448     }
4449 }
4450 
4451 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4452                                  const void *debug_frame,
4453                                  size_t debug_frame_size)
4454 {
4455     struct __attribute__((packed)) DebugInfo {
4456         uint32_t  len;
4457         uint16_t  version;
4458         uint32_t  abbrev;
4459         uint8_t   ptr_size;
4460         uint8_t   cu_die;
4461         uint16_t  cu_lang;
4462         uintptr_t cu_low_pc;
4463         uintptr_t cu_high_pc;
4464         uint8_t   fn_die;
4465         char      fn_name[16];
4466         uintptr_t fn_low_pc;
4467         uintptr_t fn_high_pc;
4468         uint8_t   cu_eoc;
4469     };
4470 
4471     struct ElfImage {
4472         ElfW(Ehdr) ehdr;
4473         ElfW(Phdr) phdr;
4474         ElfW(Shdr) shdr[7];
4475         ElfW(Sym)  sym[2];
4476         struct DebugInfo di;
4477         uint8_t    da[24];
4478         char       str[80];
4479     };
4480 
4481     struct ElfImage *img;
4482 
4483     static const struct ElfImage img_template = {
4484         .ehdr = {
4485             .e_ident[EI_MAG0] = ELFMAG0,
4486             .e_ident[EI_MAG1] = ELFMAG1,
4487             .e_ident[EI_MAG2] = ELFMAG2,
4488             .e_ident[EI_MAG3] = ELFMAG3,
4489             .e_ident[EI_CLASS] = ELF_CLASS,
4490             .e_ident[EI_DATA] = ELF_DATA,
4491             .e_ident[EI_VERSION] = EV_CURRENT,
4492             .e_type = ET_EXEC,
4493             .e_machine = ELF_HOST_MACHINE,
4494             .e_version = EV_CURRENT,
4495             .e_phoff = offsetof(struct ElfImage, phdr),
4496             .e_shoff = offsetof(struct ElfImage, shdr),
4497             .e_ehsize = sizeof(ElfW(Shdr)),
4498             .e_phentsize = sizeof(ElfW(Phdr)),
4499             .e_phnum = 1,
4500             .e_shentsize = sizeof(ElfW(Shdr)),
4501             .e_shnum = ARRAY_SIZE(img->shdr),
4502             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4503 #ifdef ELF_HOST_FLAGS
4504             .e_flags = ELF_HOST_FLAGS,
4505 #endif
4506 #ifdef ELF_OSABI
4507             .e_ident[EI_OSABI] = ELF_OSABI,
4508 #endif
4509         },
4510         .phdr = {
4511             .p_type = PT_LOAD,
4512             .p_flags = PF_X,
4513         },
4514         .shdr = {
4515             [0] = { .sh_type = SHT_NULL },
4516             /* Trick: The contents of code_gen_buffer are not present in
4517                this fake ELF file; that got allocated elsewhere.  Therefore
4518                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4519                will not look for contents.  We can record any address.  */
4520             [1] = { /* .text */
4521                 .sh_type = SHT_NOBITS,
4522                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4523             },
4524             [2] = { /* .debug_info */
4525                 .sh_type = SHT_PROGBITS,
4526                 .sh_offset = offsetof(struct ElfImage, di),
4527                 .sh_size = sizeof(struct DebugInfo),
4528             },
4529             [3] = { /* .debug_abbrev */
4530                 .sh_type = SHT_PROGBITS,
4531                 .sh_offset = offsetof(struct ElfImage, da),
4532                 .sh_size = sizeof(img->da),
4533             },
4534             [4] = { /* .debug_frame */
4535                 .sh_type = SHT_PROGBITS,
4536                 .sh_offset = sizeof(struct ElfImage),
4537             },
4538             [5] = { /* .symtab */
4539                 .sh_type = SHT_SYMTAB,
4540                 .sh_offset = offsetof(struct ElfImage, sym),
4541                 .sh_size = sizeof(img->sym),
4542                 .sh_info = 1,
4543                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4544                 .sh_entsize = sizeof(ElfW(Sym)),
4545             },
4546             [6] = { /* .strtab */
4547                 .sh_type = SHT_STRTAB,
4548                 .sh_offset = offsetof(struct ElfImage, str),
4549                 .sh_size = sizeof(img->str),
4550             }
4551         },
4552         .sym = {
4553             [1] = { /* code_gen_buffer */
4554                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4555                 .st_shndx = 1,
4556             }
4557         },
4558         .di = {
4559             .len = sizeof(struct DebugInfo) - 4,
4560             .version = 2,
4561             .ptr_size = sizeof(void *),
4562             .cu_die = 1,
4563             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4564             .fn_die = 2,
4565             .fn_name = "code_gen_buffer"
4566         },
4567         .da = {
4568             1,          /* abbrev number (the cu) */
4569             0x11, 1,    /* DW_TAG_compile_unit, has children */
4570             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4571             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4572             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4573             0, 0,       /* end of abbrev */
4574             2,          /* abbrev number (the fn) */
4575             0x2e, 0,    /* DW_TAG_subprogram, no children */
4576             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4577             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4578             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4579             0, 0,       /* end of abbrev */
4580             0           /* no more abbrev */
4581         },
4582         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4583                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4584     };
4585 
4586     /* We only need a single jit entry; statically allocate it.  */
4587     static struct jit_code_entry one_entry;
4588 
4589     uintptr_t buf = (uintptr_t)buf_ptr;
4590     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4591     DebugFrameHeader *dfh;
4592 
4593     img = g_malloc(img_size);
4594     *img = img_template;
4595 
4596     img->phdr.p_vaddr = buf;
4597     img->phdr.p_paddr = buf;
4598     img->phdr.p_memsz = buf_size;
4599 
4600     img->shdr[1].sh_name = find_string(img->str, ".text");
4601     img->shdr[1].sh_addr = buf;
4602     img->shdr[1].sh_size = buf_size;
4603 
4604     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4605     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4606 
4607     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4608     img->shdr[4].sh_size = debug_frame_size;
4609 
4610     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4611     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4612 
4613     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4614     img->sym[1].st_value = buf;
4615     img->sym[1].st_size = buf_size;
4616 
4617     img->di.cu_low_pc = buf;
4618     img->di.cu_high_pc = buf + buf_size;
4619     img->di.fn_low_pc = buf;
4620     img->di.fn_high_pc = buf + buf_size;
4621 
4622     dfh = (DebugFrameHeader *)(img + 1);
4623     memcpy(dfh, debug_frame, debug_frame_size);
4624     dfh->fde.func_start = buf;
4625     dfh->fde.func_len = buf_size;
4626 
4627 #ifdef DEBUG_JIT
4628     /* Enable this block to be able to debug the ELF image file creation.
4629        One can use readelf, objdump, or other inspection utilities.  */
4630     {
4631         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4632         if (f) {
4633             if (fwrite(img, img_size, 1, f) != img_size) {
4634                 /* Avoid stupid unused return value warning for fwrite.  */
4635             }
4636             fclose(f);
4637         }
4638     }
4639 #endif
4640 
4641     one_entry.symfile_addr = img;
4642     one_entry.symfile_size = img_size;
4643 
4644     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4645     __jit_debug_descriptor.relevant_entry = &one_entry;
4646     __jit_debug_descriptor.first_entry = &one_entry;
4647     __jit_debug_register_code();
4648 }
4649 #else
4650 /* No support for the feature.  Provide the entry point expected by exec.c,
4651    and implement the internal function we declared earlier.  */
4652 
4653 static void tcg_register_jit_int(void *buf, size_t size,
4654                                  const void *debug_frame,
4655                                  size_t debug_frame_size)
4656 {
4657 }
4658 
4659 void tcg_register_jit(void *buf, size_t buf_size)
4660 {
4661 }
4662 #endif /* ELF_HOST_MACHINE */
4663 
4664 #if !TCG_TARGET_MAYBE_vec
4665 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4666 {
4667     g_assert_not_reached();
4668 }
4669 #endif
4670